From 175f072eb1acf041a86c46b464dc573c601f330f Mon Sep 17 00:00:00 2001 From: Kirk McKusick Date: Thu, 6 Dec 1990 02:08:52 -0800 Subject: [PATCH] adopted from Mach 2.5 SCCS-vsn: sys/vm/pmap.h 7.1 SCCS-vsn: sys/vm/vm_fault.c 7.1 SCCS-vsn: sys/vm/vm_glue.c 7.1 SCCS-vsn: sys/vm/vm_inherit.h 7.1 SCCS-vsn: sys/vm/vm_init.c 7.1 SCCS-vsn: sys/vm/vm_kern.c 7.1 SCCS-vsn: sys/vm/vm_kern.h 7.1 SCCS-vsn: sys/vm/vm_map.c 7.1 SCCS-vsn: sys/vm/vm_map.h 7.1 SCCS-vsn: sys/vm/vm_object.c 7.1 SCCS-vsn: sys/vm/vm_object.h 7.1 SCCS-vsn: sys/vm/vm_page.c 7.1 SCCS-vsn: sys/vm/vm_page.h 7.1 SCCS-vsn: sys/vm/vm_pageout.c 7.1 SCCS-vsn: sys/vm/vm_pageout.h 7.1 SCCS-vsn: sys/vm/vm_pager.c 7.1 SCCS-vsn: sys/vm/vm_param.h 7.1 SCCS-vsn: sys/vm/vm_prot.h 7.1 SCCS-vsn: sys/vm/vm_user.c 7.1 SCCS-vsn: sys/vm/vm_user.h 7.1 --- usr/src/sys/vm/pmap.h | 61 + usr/src/sys/vm/vm_fault.c | 1036 +++++++++++++++ usr/src/sys/vm/vm_glue.c | 453 +++++++ usr/src/sys/vm/vm_inherit.h | 42 + usr/src/sys/vm/vm_init.c | 55 + usr/src/sys/vm/vm_kern.c | 497 ++++++++ usr/src/sys/vm/vm_kern.h | 36 + usr/src/sys/vm/vm_map.c | 2367 +++++++++++++++++++++++++++++++++++ usr/src/sys/vm/vm_map.h | 176 +++ usr/src/sys/vm/vm_object.c | 1406 +++++++++++++++++++++ usr/src/sys/vm/vm_object.h | 134 ++ usr/src/sys/vm/vm_page.c | 698 +++++++++++ usr/src/sys/vm/vm_page.h | 219 ++++ usr/src/sys/vm/vm_pageout.c | 335 +++++ usr/src/sys/vm/vm_pageout.h | 43 + usr/src/sys/vm/vm_pager.c | 229 ++++ usr/src/sys/vm/vm_param.h | 102 ++ usr/src/sys/vm/vm_prot.h | 53 + usr/src/sys/vm/vm_user.c | 202 +++ usr/src/sys/vm/vm_user.h | 29 + 20 files changed, 8173 insertions(+) create mode 100644 usr/src/sys/vm/pmap.h create mode 100644 usr/src/sys/vm/vm_fault.c create mode 100644 usr/src/sys/vm/vm_glue.c create mode 100644 usr/src/sys/vm/vm_inherit.h create mode 100644 usr/src/sys/vm/vm_init.c create mode 100644 usr/src/sys/vm/vm_kern.c create mode 100644 usr/src/sys/vm/vm_kern.h create mode 100644 usr/src/sys/vm/vm_map.c create mode 100644 usr/src/sys/vm/vm_map.h create mode 100644 usr/src/sys/vm/vm_object.c create mode 100644 usr/src/sys/vm/vm_object.h create mode 100644 usr/src/sys/vm/vm_page.c create mode 100644 usr/src/sys/vm/vm_page.h create mode 100644 usr/src/sys/vm/vm_pageout.c create mode 100644 usr/src/sys/vm/vm_pageout.h create mode 100644 usr/src/sys/vm/vm_pager.c create mode 100644 usr/src/sys/vm/vm_param.h create mode 100644 usr/src/sys/vm/vm_prot.h create mode 100644 usr/src/sys/vm/vm_user.c create mode 100644 usr/src/sys/vm/vm_user.h diff --git a/usr/src/sys/vm/pmap.h b/usr/src/sys/vm/pmap.h new file mode 100644 index 0000000000..83629cc7a0 --- /dev/null +++ b/usr/src/sys/vm/pmap.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr. + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)pmap.h 7.1 (Berkeley) %G% + */ + +/* + * Machine address mapping definitions -- machine-independent + * section. [For machine-dependent section, see "machine/pmap.h".] + */ + +#ifndef _PMAP_VM_ +#define _PMAP_VM_ + +#ifdef KERNEL +#include "types.h" +#include "machine/pmap.h" +#else +#include +#include +#endif + +#ifdef KERNEL +void pmap_bootstrap(); +void pmap_init(); +vm_offset_t pmap_map(); +pmap_t pmap_create(); +pmap_t pmap_kernel(); +void pmap_destroy(); +void pmap_reference(); +void pmap_remove(); +void pmap_remove_all(); +void pmap_copy_on_write(); +void pmap_protect(); +void pmap_enter(); +vm_offset_t pmap_extract(); +void pmap_update(); +void pmap_collect(); +void pmap_activate(); +void pmap_deactivate(); +void pmap_copy(); +void pmap_statistics(); +void pmap_clear_reference(); +boolean_t pmap_is_referenced(); + +void pmap_redzone(); +boolean_t pmap_access(); + +extern pmap_t kernel_pmap; +#endif + +#endif _PMAP_VM_ diff --git a/usr/src/sys/vm/vm_fault.c b/usr/src/sys/vm/vm_fault.c new file mode 100644 index 0000000000..9ec08b23e6 --- /dev/null +++ b/usr/src/sys/vm/vm_fault.c @@ -0,0 +1,1036 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_fault.c 7.1 (Berkeley) %G% + */ + +/* + * Page fault handling module. + */ + +#include "param.h" +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_object.h" +#include "../vm/vm_page.h" +#include "../vm/pmap.h" +#include "../vm/vm_statistics.h" +#include "../vm/vm_pageout.h" + +/* + * vm_fault: + * + * Handle a page fault occuring at the given address, + * requiring the given permissions, in the map specified. + * If successful, the page is inserted into the + * associated physical map. + * + * NOTE: the given address should be truncated to the + * proper page address. + * + * KERN_SUCCESS is returned if the page fault is handled; otherwise, + * a standard error specifying why the fault is fatal is returned. + * + * + * The map in question must be referenced, and remains so. + * Caller may hold no locks. + */ +vm_fault(map, vaddr, fault_type, change_wiring) + vm_map_t map; + vm_offset_t vaddr; + vm_prot_t fault_type; + boolean_t change_wiring; +{ + vm_object_t first_object; + vm_offset_t first_offset; + vm_map_entry_t entry; + register vm_object_t object; + register vm_offset_t offset; + register vm_page_t m; + vm_page_t first_m; + vm_prot_t prot; + int result; + boolean_t wired; + boolean_t su; + boolean_t lookup_still_valid; + boolean_t page_exists; + vm_page_t old_m; + vm_object_t next_object; + + vm_stat.faults++; /* needs lock XXX */ +/* + * Recovery actions + */ +#define FREE_PAGE(m) { \ + PAGE_WAKEUP(m); \ + vm_page_lock_queues(); \ + vm_page_free(m); \ + vm_page_unlock_queues(); \ +} + +#define RELEASE_PAGE(m) { \ + PAGE_WAKEUP(m); \ + vm_page_lock_queues(); \ + vm_page_activate(m); \ + vm_page_unlock_queues(); \ +} + +#define UNLOCK_MAP { \ + if (lookup_still_valid) { \ + vm_map_lookup_done(map, entry); \ + lookup_still_valid = FALSE; \ + } \ +} + +#define UNLOCK_THINGS { \ + object->paging_in_progress--; \ + vm_object_unlock(object); \ + if (object != first_object) { \ + vm_object_lock(first_object); \ + FREE_PAGE(first_m); \ + first_object->paging_in_progress--; \ + vm_object_unlock(first_object); \ + } \ + UNLOCK_MAP; \ +} + +#define UNLOCK_AND_DEALLOCATE { \ + UNLOCK_THINGS; \ + vm_object_deallocate(first_object); \ +} + + RetryFault: ; + + /* + * Find the backing store object and offset into + * it to begin the search. + */ + + if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, + &first_object, &first_offset, + &prot, &wired, &su)) != KERN_SUCCESS) { + return(result); + } + lookup_still_valid = TRUE; + + if (wired) + fault_type = prot; + + first_m = VM_PAGE_NULL; + + /* + * Make a reference to this object to + * prevent its disposal while we are messing with + * it. Once we have the reference, the map is free + * to be diddled. Since objects reference their + * shadows (and copies), they will stay around as well. + */ + + vm_object_lock(first_object); + + first_object->ref_count++; + first_object->paging_in_progress++; + + /* + * INVARIANTS (through entire routine): + * + * 1) At all times, we must either have the object + * lock or a busy page in some object to prevent + * some other thread from trying to bring in + * the same page. + * + * Note that we cannot hold any locks during the + * pager access or when waiting for memory, so + * we use a busy page then. + * + * Note also that we aren't as concerned about + * more than one thead attempting to pager_data_unlock + * the same page at once, so we don't hold the page + * as busy then, but do record the highest unlock + * value so far. [Unlock requests may also be delivered + * out of order.] + * + * 2) Once we have a busy page, we must remove it from + * the pageout queues, so that the pageout daemon + * will not grab it away. + * + * 3) To prevent another thread from racing us down the + * shadow chain and entering a new page in the top + * object before we do, we must keep a busy page in + * the top object while following the shadow chain. + * + * 4) We must increment paging_in_progress on any object + * for which we have a busy page, to prevent + * vm_object_collapse from removing the busy page + * without our noticing. + */ + + /* + * Search for the page at object/offset. + */ + + object = first_object; + offset = first_offset; + + /* + * See whether this page is resident + */ + + while (TRUE) { + m = vm_page_lookup(object, offset); + if (m != VM_PAGE_NULL) { + /* + * If the page is being brought in, + * wait for it and then retry. + */ + if (m->busy) { +#ifdef DOTHREADS + int wait_result; + + PAGE_ASSERT_WAIT(m, !change_wiring); + UNLOCK_THINGS; + thread_block(); + wait_result = current_thread()->wait_result; + vm_object_deallocate(first_object); + if (wait_result != THREAD_AWAKENED) + return(KERN_SUCCESS); + goto RetryFault; +#else + PAGE_ASSERT_WAIT(m, !change_wiring); + UNLOCK_THINGS; + thread_block(); + vm_object_deallocate(first_object); + goto RetryFault; +#endif + } + + if (m->absent) + panic("vm_fault: absent"); + + /* + * If the desired access to this page has + * been locked out, request that it be unlocked. + */ + + if (fault_type & m->page_lock) { +#ifdef DOTHREADS + int wait_result; + + if ((fault_type & m->unlock_request) != fault_type) + panic("vm_fault: pager_data_unlock"); + + PAGE_ASSERT_WAIT(m, !change_wiring); + UNLOCK_THINGS; + thread_block(); + wait_result = current_thread()->wait_result; + vm_object_deallocate(first_object); + if (wait_result != THREAD_AWAKENED) + return(KERN_SUCCESS); + goto RetryFault; +#else + if ((fault_type & m->unlock_request) != fault_type) + panic("vm_fault: pager_data_unlock"); + + PAGE_ASSERT_WAIT(m, !change_wiring); + UNLOCK_THINGS; + thread_block(); + vm_object_deallocate(first_object); + goto RetryFault; +#endif + } + + /* + * Remove the page from the pageout daemon's + * reach while we play with it. + */ + + vm_page_lock_queues(); + if (m->inactive) { + queue_remove(&vm_page_queue_inactive, m, + vm_page_t, pageq); + m->inactive = FALSE; + vm_page_inactive_count--; + vm_stat.reactivations++; + } + + if (m->active) { + queue_remove(&vm_page_queue_active, m, + vm_page_t, pageq); + m->active = FALSE; + vm_page_active_count--; + } + vm_page_unlock_queues(); + + /* + * Mark page busy for other threads. + */ + m->busy = TRUE; + m->absent = FALSE; + break; + } + + if (((object->pager != vm_pager_null) && + (!change_wiring || wired)) + || (object == first_object)) { + + /* + * Allocate a new page for this object/offset + * pair. + */ + + m = vm_page_alloc(object, offset); + + if (m == VM_PAGE_NULL) { + UNLOCK_AND_DEALLOCATE; + VM_WAIT; + goto RetryFault; + } + } + + if ((object->pager != vm_pager_null) && + (!change_wiring || wired)) { + int rv; + + /* + * Now that we have a busy page, we can + * release the object lock. + */ + vm_object_unlock(object); + + /* + * Call the pager to retrieve the data, if any, + * after releasing the lock on the map. + */ + UNLOCK_MAP; + + rv = vm_pager_get(object->pager, m, TRUE); + if (rv == VM_PAGER_OK) { + /* + * Found the page. + * Leave it busy while we play with it. + */ + vm_object_lock(object); + + /* + * Relookup in case pager changed page. + * Pager is responsible for disposition + * of old page if moved. + */ + m = vm_page_lookup(object, offset); + + vm_stat.pageins++; + m->fake = FALSE; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + break; + } + + /* + * Remove the bogus page (which does not + * exist at this object/offset); before + * doing so, we must get back our object + * lock to preserve our invariant. + * + * Also wake up any other thread that may want + * to bring in this page. + * + * If this is the top-level object, we must + * leave the busy page to prevent another + * thread from rushing past us, and inserting + * the page in that object at the same time + * that we are. + */ + + vm_object_lock(object); + /* + * Data outside the range of the pager; an error + */ + if (rv == VM_PAGER_BAD) { + FREE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + return(KERN_PROTECTION_FAILURE); /* XXX */ + } + if (object != first_object) { + FREE_PAGE(m); + /* + * XXX - we cannot just fall out at this + * point, m has been freed and is invalid! + */ + } + } + + /* + * We get here if the object has no pager (or unwiring) + * or the pager doesn't have the page. + */ + if (object == first_object) + first_m = m; + + /* + * Move on to the next object. Lock the next + * object before unlocking the current one. + */ + + offset += object->shadow_offset; + next_object = object->shadow; + if (next_object == VM_OBJECT_NULL) { + /* + * If there's no object left, fill the page + * in the top object with zeros. + */ + if (object != first_object) { + object->paging_in_progress--; + vm_object_unlock(object); + + object = first_object; + offset = first_offset; + m = first_m; + vm_object_lock(object); + } + first_m = VM_PAGE_NULL; + + vm_page_zero_fill(m); + vm_stat.zero_fill_count++; + m->fake = FALSE; + m->absent = FALSE; + break; + } + else { + vm_object_lock(next_object); + if (object != first_object) + object->paging_in_progress--; + vm_object_unlock(object); + object = next_object; + object->paging_in_progress++; + } + } + + if (m->absent || m->active || m->inactive || !m->busy) + panic("vm_fault: absent or active or inactive or not busy after main loop"); + + /* + * PAGE HAS BEEN FOUND. + * [Loop invariant still holds -- the object lock + * is held.] + */ + + old_m = m; /* save page that would be copied */ + + /* + * If the page is being written, but isn't + * already owned by the top-level object, + * we have to copy it into a new page owned + * by the top-level object. + */ + + if (object != first_object) { + /* + * We only really need to copy if we + * want to write it. + */ + + if (fault_type & VM_PROT_WRITE) { + + /* + * If we try to collapse first_object at this + * point, we may deadlock when we try to get + * the lock on an intermediate object (since we + * have the bottom object locked). We can't + * unlock the bottom object, because the page + * we found may move (by collapse) if we do. + * + * Instead, we first copy the page. Then, when + * we have no more use for the bottom object, + * we unlock it and try to collapse. + * + * Note that we copy the page even if we didn't + * need to... that's the breaks. + */ + + /* + * We already have an empty page in + * first_object - use it. + */ + + vm_page_copy(m, first_m); + first_m->fake = FALSE; + first_m->absent = FALSE; + + /* + * If another map is truly sharing this + * page with us, we have to flush all + * uses of the original page, since we + * can't distinguish those which want the + * original from those which need the + * new copy. + */ + + vm_page_lock_queues(); + if (!su) { + /* + * Also, once it's no longer in + * use by any maps, move it to + * the inactive queue instead. + */ + + vm_page_deactivate(m); + pmap_remove_all(VM_PAGE_TO_PHYS(m)); + } + else { + /* + * Old page is only (possibly) + * in use by faulting map. We + * should do a pmap_remove on + * that mapping, but we know + * that pmap_enter will remove + * the old mapping before + * inserting the new one. + */ + vm_page_activate(m); + } + vm_page_unlock_queues(); + + /* + * We no longer need the old page or object. + */ + PAGE_WAKEUP(m); + object->paging_in_progress--; + vm_object_unlock(object); + + /* + * Only use the new page below... + */ + + vm_stat.cow_faults++; + m = first_m; + object = first_object; + offset = first_offset; + + /* + * Now that we've gotten the copy out of the + * way, let's try to collapse the top object. + */ + vm_object_lock(object); + /* + * But we have to play ugly games with + * paging_in_progress to do that... + */ + object->paging_in_progress--; + vm_object_collapse(object); + object->paging_in_progress++; + } + else { + prot &= (~VM_PROT_WRITE); + m->copy_on_write = TRUE; + } + } + + if (m->active || m->inactive) + panic("vm_fault: active or inactive before copy object handling"); + + /* + * If the page is being written, but hasn't been + * copied to the copy-object, we have to copy it there. + */ + RetryCopy: + if (first_object->copy != VM_OBJECT_NULL) { + vm_object_t copy_object = first_object->copy; + vm_offset_t copy_offset; + vm_page_t copy_m; + + /* + * We only need to copy if we want to write it. + */ + if ((fault_type & VM_PROT_WRITE) == 0) { + prot &= ~VM_PROT_WRITE; + m->copy_on_write = TRUE; + } + else { + /* + * Try to get the lock on the copy_object. + */ + if (!vm_object_lock_try(copy_object)) { + vm_object_unlock(object); + /* should spin a bit here... */ + vm_object_lock(object); + goto RetryCopy; + } + + /* + * Make another reference to the copy-object, + * to keep it from disappearing during the + * copy. + */ + copy_object->ref_count++; + + /* + * Does the page exist in the copy? + */ + copy_offset = first_offset + - copy_object->shadow_offset; + copy_m = vm_page_lookup(copy_object, copy_offset); + if (page_exists = (copy_m != VM_PAGE_NULL)) { + if (copy_m->busy) { +#ifdef DOTHREADS + int wait_result; + + /* + * If the page is being brought + * in, wait for it and then retry. + */ + PAGE_ASSERT_WAIT(copy_m, !change_wiring); + RELEASE_PAGE(m); + copy_object->ref_count--; + vm_object_unlock(copy_object); + UNLOCK_THINGS; + thread_block(); + wait_result = current_thread()->wait_result; + vm_object_deallocate(first_object); + if (wait_result != THREAD_AWAKENED) + return(KERN_SUCCESS); + goto RetryFault; +#else + /* + * If the page is being brought + * in, wait for it and then retry. + */ + PAGE_ASSERT_WAIT(copy_m, !change_wiring); + RELEASE_PAGE(m); + copy_object->ref_count--; + vm_object_unlock(copy_object); + UNLOCK_THINGS; + thread_block(); + vm_object_deallocate(first_object); + goto RetryFault; +#endif + } + } + + /* + * If the page is not in memory (in the object) + * and the object has a pager, we have to check + * if the pager has the data in secondary + * storage. + */ + if (!page_exists) { + + /* + * If we don't allocate a (blank) page + * here... another thread could try + * to page it in, allocate a page, and + * then block on the busy page in its + * shadow (first_object). Then we'd + * trip over the busy page after we + * found that the copy_object's pager + * doesn't have the page... + */ + copy_m = vm_page_alloc(copy_object, + copy_offset); + if (copy_m == VM_PAGE_NULL) { + /* + * Wait for a page, then retry. + */ + RELEASE_PAGE(m); + copy_object->ref_count--; + vm_object_unlock(copy_object); + UNLOCK_AND_DEALLOCATE; + VM_WAIT; + goto RetryFault; + } + + if (copy_object->pager != vm_pager_null) { + vm_object_unlock(object); + vm_object_unlock(copy_object); + UNLOCK_MAP; + + page_exists = vm_pager_has_page( + copy_object->pager, + (copy_offset + copy_object->paging_offset)); + + vm_object_lock(copy_object); + + /* + * Since the map is unlocked, someone + * else could have copied this object + * and put a different copy_object + * between the two. Or, the last + * reference to the copy-object (other + * than the one we have) may have + * disappeared - if that has happened, + * we don't need to make the copy. + */ + if (copy_object->shadow != object || + copy_object->ref_count == 1) { + /* + * Gaah... start over! + */ + FREE_PAGE(copy_m); + vm_object_unlock(copy_object); + vm_object_deallocate(copy_object); + /* may block */ + vm_object_lock(object); + goto RetryCopy; + } + vm_object_lock(object); + + if (page_exists) { + /* + * We didn't need the page + */ + FREE_PAGE(copy_m); + } + } + } + if (!page_exists) { + /* + * Must copy page into copy-object. + */ + vm_page_copy(m, copy_m); + copy_m->fake = FALSE; + copy_m->absent = FALSE; + + /* + * Things to remember: + * 1. The copied page must be marked 'dirty' + * so it will be paged out to the copy + * object. + * 2. If the old page was in use by any users + * of the copy-object, it must be removed + * from all pmaps. (We can't know which + * pmaps use it.) + */ + vm_page_lock_queues(); + pmap_remove_all(VM_PAGE_TO_PHYS(old_m)); + copy_m->clean = FALSE; + vm_page_activate(copy_m); /* XXX */ + vm_page_unlock_queues(); + + PAGE_WAKEUP(copy_m); + } + /* + * The reference count on copy_object must be + * at least 2: one for our extra reference, + * and at least one from the outside world + * (we checked that when we last locked + * copy_object). + */ + copy_object->ref_count--; + vm_object_unlock(copy_object); + m->copy_on_write = FALSE; + } + } + + if (m->active || m->inactive) + panic("vm_fault: active or inactive before retrying lookup"); + + /* + * We must verify that the maps have not changed + * since our last lookup. + */ + + if (!lookup_still_valid) { + vm_object_t retry_object; + vm_offset_t retry_offset; + vm_prot_t retry_prot; + + /* + * Since map entries may be pageable, make sure we can + * take a page fault on them. + */ + vm_object_unlock(object); + + /* + * To avoid trying to write_lock the map while another + * thread has it read_locked (in vm_map_pageable), we + * do not try for write permission. If the page is + * still writable, we will get write permission. If it + * is not, or has been marked needs_copy, we enter the + * mapping without write permission, and will merely + * take another fault. + */ + result = vm_map_lookup(&map, vaddr, + fault_type & ~VM_PROT_WRITE, &entry, + &retry_object, &retry_offset, &retry_prot, + &wired, &su); + + vm_object_lock(object); + + /* + * If we don't need the page any longer, put it on the + * active list (the easiest thing to do here). If no + * one needs it, pageout will grab it eventually. + */ + + if (result != KERN_SUCCESS) { + RELEASE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + return(result); + } + + lookup_still_valid = TRUE; + + if ((retry_object != first_object) || + (retry_offset != first_offset)) { + RELEASE_PAGE(m); + UNLOCK_AND_DEALLOCATE; + goto RetryFault; + } + + /* + * Check whether the protection has changed or the object + * has been copied while we left the map unlocked. + * Changing from read to write permission is OK - we leave + * the page write-protected, and catch the write fault. + * Changing from write to read permission means that we + * can't mark the page write-enabled after all. + */ + prot &= retry_prot; + if (m->copy_on_write) + prot &= ~VM_PROT_WRITE; + } + + /* + * (the various bits we're fiddling with here are locked by + * the object's lock) + */ + + /* XXX This distorts the meaning of the copy_on_write bit */ + + if (prot & VM_PROT_WRITE) + m->copy_on_write = FALSE; + + /* + * It's critically important that a wired-down page be faulted + * only once in each map for which it is wired. + */ + + if (m->active || m->inactive) + panic("vm_fault: active or inactive before pmap_enter"); + + vm_object_unlock(object); + + /* + * Put this page into the physical map. + * We had to do the unlock above because pmap_enter + * may cause other faults. We don't put the + * page back on the active queue until later so + * that the page-out daemon won't find us (yet). + */ + + pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), + prot & ~(m->page_lock), wired); + + /* + * If the page is not wired down, then put it where the + * pageout daemon can find it. + */ + vm_object_lock(object); + vm_page_lock_queues(); + if (change_wiring) { + if (wired) + vm_page_wire(m); + else + vm_page_unwire(m); + } + else + vm_page_activate(m); + vm_page_unlock_queues(); + + /* + * Unlock everything, and return + */ + + PAGE_WAKEUP(m); + UNLOCK_AND_DEALLOCATE; + + return(KERN_SUCCESS); + +} + +/* + * vm_fault_wire: + * + * Wire down a range of virtual addresses in a map. + */ +void vm_fault_wire(map, start, end) + vm_map_t map; + vm_offset_t start, end; +{ + + register vm_offset_t va; + register pmap_t pmap; + + pmap = vm_map_pmap(map); + + /* + * Inform the physical mapping system that the + * range of addresses may not fault, so that + * page tables and such can be locked down as well. + */ + + pmap_pageable(pmap, start, end, FALSE); + + /* + * We simulate a fault to get the page and enter it + * in the physical map. + */ + + for (va = start; va < end; va += PAGE_SIZE) { + (void) vm_fault(map, va, VM_PROT_NONE, TRUE); + } +} + + +/* + * vm_fault_unwire: + * + * Unwire a range of virtual addresses in a map. + */ +void vm_fault_unwire(map, start, end) + vm_map_t map; + vm_offset_t start, end; +{ + + register vm_offset_t va, pa; + register pmap_t pmap; + + pmap = vm_map_pmap(map); + + /* + * Since the pages are wired down, we must be able to + * get their mappings from the physical map system. + */ + + vm_page_lock_queues(); + + for (va = start; va < end; va += PAGE_SIZE) { + pa = pmap_extract(pmap, va); + if (pa == (vm_offset_t) 0) { + panic("unwire: page not in pmap"); + } + pmap_change_wiring(pmap, va, FALSE); + vm_page_unwire(PHYS_TO_VM_PAGE(pa)); + } + vm_page_unlock_queues(); + + /* + * Inform the physical mapping system that the range + * of addresses may fault, so that page tables and + * such may be unwired themselves. + */ + + pmap_pageable(pmap, start, end, TRUE); + +} + +/* + * Routine: + * vm_fault_copy_entry + * Function: + * Copy all of the pages from a wired-down map entry to another. + * + * In/out conditions: + * The source and destination maps must be locked for write. + * The source map entry must be wired down (or be a sharing map + * entry corresponding to a main map entry that is wired down). + */ + +void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) + vm_map_t dst_map; + vm_map_t src_map; + vm_map_entry_t dst_entry; + vm_map_entry_t src_entry; +{ + + vm_object_t dst_object; + vm_object_t src_object; + vm_offset_t dst_offset; + vm_offset_t src_offset; + vm_prot_t prot; + vm_offset_t vaddr; + vm_page_t dst_m; + vm_page_t src_m; + +#ifdef lint + src_map++; +#endif lint + + src_object = src_entry->object.vm_object; + src_offset = src_entry->offset; + + /* + * Create the top-level object for the destination entry. + * (Doesn't actually shadow anything - we copy the pages + * directly.) + */ + dst_object = vm_object_allocate( + (vm_size_t) (dst_entry->end - dst_entry->start)); + + dst_entry->object.vm_object = dst_object; + dst_entry->offset = 0; + + prot = dst_entry->max_protection; + + /* + * Loop through all of the pages in the entry's range, copying + * each one from the source object (it should be there) to the + * destination object. + */ + for (vaddr = dst_entry->start, dst_offset = 0; + vaddr < dst_entry->end; + vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { + + /* + * Allocate a page in the destination object + */ + vm_object_lock(dst_object); + do { + dst_m = vm_page_alloc(dst_object, dst_offset); + if (dst_m == VM_PAGE_NULL) { + vm_object_unlock(dst_object); + VM_WAIT; + vm_object_lock(dst_object); + } + } while (dst_m == VM_PAGE_NULL); + + /* + * Find the page in the source object, and copy it in. + * (Because the source is wired down, the page will be + * in memory.) + */ + vm_object_lock(src_object); + src_m = vm_page_lookup(src_object, dst_offset + src_offset); + if (src_m == VM_PAGE_NULL) + panic("vm_fault_copy_wired: page missing"); + + vm_page_copy(src_m, dst_m); + + /* + * Enter it in the pmap... + */ + vm_object_unlock(src_object); + vm_object_unlock(dst_object); + + pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), + prot, FALSE); + + /* + * Mark it no longer busy, and put it on the active list. + */ + vm_object_lock(dst_object); + vm_page_lock_queues(); + vm_page_activate(dst_m); + vm_page_unlock_queues(); + PAGE_WAKEUP(dst_m); + vm_object_unlock(dst_object); + } + +} diff --git a/usr/src/sys/vm/vm_glue.c b/usr/src/sys/vm/vm_glue.c new file mode 100644 index 0000000000..ddb74c0306 --- /dev/null +++ b/usr/src/sys/vm/vm_glue.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_glue.c 7.1 (Berkeley) %G% + */ + +#include "param.h" +#include "systm.h" +#include "user.h" +#include "proc.h" +#include "buf.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_kern.h" + +int avefree = 0; /* XXX */ +unsigned maxdmap = MAXDSIZ; /* XXX */ + +kernacc(addr, len, rw) + caddr_t addr; + int len, rw; +{ + boolean_t rv; + vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; + + rv = vm_map_check_protection(kernel_map, trunc_page(addr), + round_page(addr+len-1), prot); + return(rv == TRUE); +} + +useracc(addr, len, rw) + caddr_t addr; + int len, rw; +{ + boolean_t rv; + vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; + + rv = vm_map_check_protection(u.u_procp->p_map, trunc_page(addr), + round_page(addr+len-1), prot); + return(rv == TRUE); +} + +#ifdef KGDB +/* + * Change protections on kernel pages from addr to addr+size + * (presumably so debugger can plant a breakpoint). + * All addresses are assumed to reside in the Sysmap, + */ +chgkprot(addr, len, rw) + register caddr_t addr; + int len, rw; +{ + vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; + + vm_map_protect(kernel_map, trunc_page(addr), + round_page(addr+len-1), prot, FALSE); +} +#endif + +vslock(addr, len) + caddr_t addr; + u_int len; +{ + vm_map_pageable(u.u_procp->p_map, trunc_page(addr), + round_page(addr+len-1), FALSE); +} + +vsunlock(addr, len, dirtied) + caddr_t addr; + u_int len; + int dirtied; +{ +#ifdef lint + dirtied++; +#endif lint + vm_map_pageable(u.u_procp->p_map, trunc_page(addr), + round_page(addr+len-1), TRUE); +} + +procdup(p, isvfork) + register struct proc *p; + int isvfork; +{ + register struct user *up; + vm_offset_t addr; + vm_size_t size; + +#if 0 + /* + * Duplicate the process address space. + * XXX if this is a vfork we arrange to share data/stack to + * preserve brain-dead semantics of vfork(). + * XXX this doesn't work due to a bug in the VM code. + * Once a process has done a vfork setting up sharing maps, + * any future forks may fail as the source VM range doesn't + * properly get write-protected. This causes the parent to + * not create copies and instead modifies the originals. + * If the parent activates before the child, the child will + * get a corrupted address space. + */ + if (isvfork) { + addr = trunc_page(u.u_daddr); + size = ctob(u.u_dsize); + (void) vm_map_inherit(u.u_procp->p_map, addr, + addr + size, VM_INHERIT_SHARE); + (void) vm_map_inherit(u.u_procp->p_map, u.u_maxsaddr, + VM_MAX_ADDRESS, VM_INHERIT_SHARE); + } +#endif + p->p_map = vm_map_fork(u.u_procp->p_map); +#if 0 + if (isvfork) { + (void) vm_map_inherit(u.u_procp->p_map, addr, + addr + size, VM_INHERIT_COPY); + (void) vm_map_inherit(u.u_procp->p_map, u.u_maxsaddr, + VM_MAX_ADDRESS, VM_INHERIT_COPY); + } +#endif + /* + * Allocate a wired-down (for now) u-area for the process + */ + size = round_page(ctob(UPAGES)); + addr = kmem_alloc_pageable(kernel_map, size); + vm_map_pageable(kernel_map, addr, addr+size, FALSE); + p->p_addr = (caddr_t)addr; + up = (struct user *)addr; + + /* + * Update the current u-area and copy it to the new one + */ + resume(pcbb(u.u_procp)); + bcopy(u.u_procp->p_addr, p->p_addr, size); + up->u_procp = p; + PMAP_ACTIVATE(p->p_map->pmap, (struct pcb *)p->p_addr); + + /* + * Arrange for a non-local goto when the new process + * is started, to resume here, returning nonzero from setjmp. + */ + up->u_pcb.pcb_sswap = (int *)&u.u_ssave; + if (savectx(&up->u_ssave)) { + /* + * Return 1 in child. + */ + return (1); + } + + /* + * Clear vm statistics of new process. + */ + bzero((caddr_t)&up->u_ru, sizeof (struct rusage)); + bzero((caddr_t)&up->u_cru, sizeof (struct rusage)); + up->u_outime = 0; + return (0); +} + +/* + * XXX Scaled down version from vm_page.c + */ +vminit() +{ + /* + * Set up the initial limits on process VM. + * Set the maximum resident set size to be all + * of (reasonably) available memory. This causes + * any single, large process to start random page + * replacement once it fills memory. + */ + u.u_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; + u.u_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; + u.u_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; + u.u_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; + u.u_rlimit[RLIMIT_RSS].rlim_cur = u.u_rlimit[RLIMIT_RSS].rlim_max = + ptoa(vm_page_free_count); + proc[0].p_maxrss = vm_page_free_count; +} + +#include "../vm/vm_pageout.h" + +#ifdef DEBUG +int enableswap = 1; +int swapdebug = 0; +#define SDB_FOLLOW 1 +#define SDB_SWAPIN 2 +#define SDB_SWAPOUT 4 +#endif + +/* + * Brutally simple: + * 1. Attempt to swapin every swaped-out, runnable process in + * order of priority. + * 2. If not enough memory, wake the pageout daemon and let it + * clear some space. + */ +sched() +{ + register struct proc *rp; + register int rppri; + struct proc *inp; + int inpri; + vm_offset_t addr; + vm_size_t size; + +loop: +#ifdef DEBUG + if (!enableswap) { + inp = NULL; + goto noswap; + } +#endif + wantin = 0; + inp = NULL; + inpri = -20000; + for (rp = allproc; rp != NULL; rp = rp->p_nxt) + if (rp->p_stat == SRUN && (rp->p_flag & SLOAD) == 0) { + rppri = rp->p_time + + rp->p_slptime - (rp->p_nice-NZERO)*8; + if (rppri > inpri) { + inp = rp; + inpri = rppri; + } + } +#ifdef DEBUG + if (swapdebug & SDB_FOLLOW) + printf("sched: running, procp %x pri %d\n", inp, inpri); +noswap: +#endif + /* + * Nothing to do, back to sleep + */ + if ((rp = inp) == NULL) { + (void) splhigh(); + runout++; + sleep((caddr_t)&runout, PVM); + (void) spl0(); + goto loop; + } + /* + * We would like to bring someone in. + * This part is really bogus cuz we could deadlock on memory + * despite our feeble check. + */ + size = round_page(ctob(UPAGES)); + addr = (vm_offset_t) rp->p_addr; + if (vm_page_free_count > atop(size)) { +#ifdef DEBUG + if (swapdebug & SDB_SWAPIN) + printf("swapin: pid %d(%s)@%x, pri %d free %d\n", + rp->p_pid, rp->p_comm, rp->p_addr, + inpri, vm_page_free_count); +#endif + vm_map_pageable(kernel_map, addr, addr+size, FALSE); + (void) splclock(); + if (rp->p_stat == SRUN) + setrq(rp); + rp->p_flag |= SLOAD; + (void) spl0(); + rp->p_time = 0; + goto loop; + } + /* + * Not enough memory, jab the pageout daemon and wait til the + * coast is clear. + */ +#ifdef DEBUG + if (swapdebug & SDB_FOLLOW) + printf("sched: no room for pid %d(%s), free %d\n", + rp->p_pid, rp->p_comm, vm_page_free_count); +#endif + (void) splhigh(); + VM_WAIT; + (void) spl0(); +#ifdef DEBUG + if (swapdebug & SDB_FOLLOW) + printf("sched: room again, free %d\n", vm_page_free_count); +#endif + goto loop; +} + +#define swappable(p) \ + (((p)->p_flag & (SSYS|SULOCK|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) + +/* + * Swapout is driven by the pageout daemon. Very simple, we find eligible + * procs and unwire their u-areas. We try to always "swap" at least one + * process in case we need the room for a swapin. + */ +swapout_threads() +{ + register struct proc *rp; + struct proc *outp, *outp2; + int outpri, outpri2; + int didswap = 0; + extern int maxslp; + +#ifdef DEBUG + if (!enableswap) + return; +#endif + outp = outp2 = NULL; + outpri = outpri2 = -20000; + for (rp = allproc; rp != NULL; rp = rp->p_nxt) { + if (!swappable(rp)) + continue; + switch(rp->p_stat) { + case SRUN: + if (rp->p_slptime > outpri2) { + outp2 = rp; + outpri2 = rp->p_slptime; + } + continue; + + case SSLEEP: + case SSTOP: + if (rp->p_slptime > maxslp) { + swapout(rp); + didswap++; + } else if (rp->p_slptime > outpri) { + outp = rp; + outpri = rp->p_slptime; + } + continue; + } + } + /* + * If we didn't get rid of any real duds, toss out the next most + * likely sleeping/stopped or running candidate. We only do this + * if we are real low on memory since we don't gain much by doing + * it (UPAGES pages). + */ + if (didswap == 0 && + vm_page_free_count <= atop(round_page(ctob(UPAGES)))) { + if ((rp = outp) == 0) + rp = outp2; +#ifdef DEBUG + if (swapdebug & SDB_SWAPOUT) + printf("swapout_threads: no duds, try procp %x\n", rp); +#endif + if (rp) + swapout(rp); + } +} + +swapout(p) + register struct proc *p; +{ + vm_offset_t addr; + vm_size_t size; + +#ifdef DEBUG + if (swapdebug & SDB_SWAPOUT) + printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n", + p->p_pid, p->p_comm, p->p_addr, p->p_stat, + p->p_slptime, vm_page_free_count); +#endif + size = round_page(ctob(UPAGES)); + addr = (vm_offset_t) p->p_addr; + vm_map_pageable(kernel_map, addr, addr+size, TRUE); + pmap_collect(vm_map_pmap(p->p_map)); + (void) splhigh(); + p->p_flag &= ~SLOAD; + if (p->p_stat == SRUN) + remrq(p); + (void) spl0(); + p->p_time = 0; +} + +/* + * The rest of these routines fake thread handling + */ + +void +assert_wait(event, ruptible) + int event; + boolean_t ruptible; +{ +#ifdef lint + ruptible++; +#endif + u.u_procp->p_thread = event; +} + +void +thread_block() +{ + int s = splhigh(); + + if (u.u_procp->p_thread) + sleep((caddr_t)u.u_procp->p_thread, PVM); + splx(s); +} + +void +thread_sleep(event, lock, ruptible) + int event; + simple_lock_t lock; + boolean_t ruptible; +{ +#ifdef lint + ruptible++; +#endif + int s = splhigh(); + + u.u_procp->p_thread = event; + simple_unlock(lock); + if (u.u_procp->p_thread) + sleep((caddr_t)u.u_procp->p_thread, PVM); + splx(s); +} + +void +thread_wakeup(event) + int event; +{ + int s = splhigh(); + + wakeup((caddr_t)event); + splx(s); +} + +/* + * DEBUG stuff + */ + +int indent = 0; + +/*ARGSUSED2*/ +iprintf(a, b, c, d, e, f, g, h) + char *a; +{ + register int i; + + for (i = indent; i > 0; ) { + if (i >= 8) { + putchar('\t', 1, (caddr_t)0); + i -= 8; + } else { + putchar(' ', 1, (caddr_t)0); + i--; + } + } + printf(a, b, c, d, e, f, g, h); +} diff --git a/usr/src/sys/vm/vm_inherit.h b/usr/src/sys/vm/vm_inherit.h new file mode 100644 index 0000000000..30ca2937d5 --- /dev/null +++ b/usr/src/sys/vm/vm_inherit.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_inherit.h 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory map inheritance definitions. + */ + +#ifndef _VM_INHERIT_ +#define _VM_INHERIT_ + +/* + * Types defined: + * + * vm_inherit_t inheritance codes. + */ + +typedef int vm_inherit_t; /* might want to change this */ + +/* + * Enumeration of valid values for vm_inherit_t. + */ + +#define VM_INHERIT_SHARE ((vm_inherit_t) 0) /* share with child */ +#define VM_INHERIT_COPY ((vm_inherit_t) 1) /* copy into child */ +#define VM_INHERIT_NONE ((vm_inherit_t) 2) /* absent from child */ +#define VM_INHERIT_DONATE_COPY ((vm_inherit_t) 3) /* copy and delete */ + +#define VM_INHERIT_DEFAULT VM_INHERIT_COPY + +#endif _VM_INHERIT_ diff --git a/usr/src/sys/vm/vm_init.c b/usr/src/sys/vm/vm_init.c new file mode 100644 index 0000000000..051fedc805 --- /dev/null +++ b/usr/src/sys/vm/vm_init.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_init.c 7.1 (Berkeley) %G% + */ + +/* + * Initialize the Virtual Memory subsystem. + */ + +#include "types.h" +#include "../vm/vm_param.h" +#include "lock.h" +#include "../vm/vm_object.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_kern.h" + +/* + * vm_init initializes the virtual memory system. + * This is done only by the first cpu up. + * + * The start and end address of physical memory is passed in. + */ + +void vm_mem_init() +{ + extern vm_offset_t avail_start, avail_end; + extern vm_offset_t virtual_avail, virtual_end; + + /* + * Initializes resident memory structures. + * From here on, all physical memory is accounted for, + * and we use only virtual addresses. + */ + + virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail); + /* + * Initialize other VM packages + */ + vm_object_init(); + vm_map_init(); + kmem_init(virtual_avail, virtual_end); + pmap_init(avail_start, avail_end); + vm_pager_init(); +} diff --git a/usr/src/sys/vm/vm_kern.c b/usr/src/sys/vm/vm_kern.c new file mode 100644 index 0000000000..61f0a0717e --- /dev/null +++ b/usr/src/sys/vm/vm_kern.c @@ -0,0 +1,497 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_kern.c 7.1 (Berkeley) %G% + */ + +/* + * Kernel memory management. + */ + +#include "types.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_pageout.h" +#include "../vm/vm_kern.h" + +/* + * kmem_alloc_pageable: + * + * Allocate pageable memory to the kernel's address map. + * map must be "kernel_map" below. + */ + +vm_offset_t kmem_alloc_pageable(map, size) + vm_map_t map; + register vm_size_t size; +{ + vm_offset_t addr; + register int result; + +#if 0 + if (map != kernel_map) + panic("kmem_alloc_pageable: not called with kernel_map"); +#endif 0 + + size = round_page(size); + + addr = vm_map_min(map); + result = vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t) 0, + &addr, size, TRUE); + if (result != KERN_SUCCESS) { + return(0); + } + + return(addr); +} + +/* + * Allocate wired-down memory in the kernel's address map + * or a submap. + */ +vm_offset_t kmem_alloc(map, size) + register vm_map_t map; + register vm_size_t size; +{ + vm_offset_t addr; + register int result; + register vm_offset_t offset; + extern vm_object_t kernel_object; + vm_offset_t i; + + size = round_page(size); + + /* + * Use the kernel object for wired-down kernel pages. + * Assume that no region of the kernel object is + * referenced more than once. + */ + + addr = vm_map_min(map); + result = vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t) 0, + &addr, size, TRUE); + if (result != KERN_SUCCESS) { + return(0); + } + + /* + * Since we didn't know where the new region would + * start, we couldn't supply the correct offset into + * the kernel object. Re-allocate that address + * region with the correct offset. + */ + + offset = addr - VM_MIN_KERNEL_ADDRESS; + vm_object_reference(kernel_object); + + vm_map_lock(map); + vm_map_delete(map, addr, addr + size); + vm_map_insert(map, kernel_object, offset, addr, addr + size); + vm_map_unlock(map); + + /* + * Guarantee that there are pages already in this object + * before calling vm_map_pageable. This is to prevent the + * following scenario: + * + * 1) Threads have swapped out, so that there is a + * pager for the kernel_object. + * 2) The kmsg zone is empty, and so we are kmem_allocing + * a new page for it. + * 3) vm_map_pageable calls vm_fault; there is no page, + * but there is a pager, so we call + * pager_data_request. But the kmsg zone is empty, + * so we must kmem_alloc. + * 4) goto 1 + * 5) Even if the kmsg zone is not empty: when we get + * the data back from the pager, it will be (very + * stale) non-zero data. kmem_alloc is defined to + * return zero-filled memory. + * + * We're intentionally not activating the pages we allocate + * to prevent a race with page-out. vm_map_pageable will wire + * the pages. + */ + + vm_object_lock(kernel_object); + for (i = 0 ; i < size; i+= PAGE_SIZE) { + vm_page_t mem; + + while ((mem = vm_page_alloc(kernel_object, offset+i)) + == VM_PAGE_NULL) { + vm_object_unlock(kernel_object); + VM_WAIT; + vm_object_lock(kernel_object); + } + vm_page_zero_fill(mem); + mem->busy = FALSE; + } + vm_object_unlock(kernel_object); + + /* + * And finally, mark the data as non-pageable. + */ + + (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); + + /* + * Try to coalesce the map + */ + + vm_map_simplify(map, addr); + + return(addr); +} + +/* + * kmem_free: + * + * Release a region of kernel virtual memory allocated + * with kmem_alloc, and return the physical pages + * associated with that region. + */ +void kmem_free(map, addr, size) + vm_map_t map; + register vm_offset_t addr; + vm_size_t size; +{ + (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); +} + +/* + * kmem_suballoc: + * + * Allocates a map to manage a subrange + * of the kernel virtual address space. + * + * Arguments are as follows: + * + * parent Map to take range from + * size Size of range to find + * min, max Returned endpoints of map + * pageable Can the region be paged + */ +vm_map_t kmem_suballoc(parent, min, max, size, pageable) + register vm_map_t parent; + vm_offset_t *min, *max; + register vm_size_t size; + boolean_t pageable; +{ + register int ret; + vm_map_t result; + + size = round_page(size); + + *min = (vm_offset_t) vm_map_min(parent); + ret = vm_map_find(parent, VM_OBJECT_NULL, (vm_offset_t) 0, + min, size, TRUE); + if (ret != KERN_SUCCESS) { + printf("kmem_suballoc: bad status return of %d.\n", ret); + panic("kmem_suballoc"); + } + *max = *min + size; + pmap_reference(vm_map_pmap(parent)); + result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); + if (result == VM_MAP_NULL) + panic("kmem_suballoc: cannot create submap"); + if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) + panic("kmem_suballoc: unable to change range to submap"); + return(result); +} + +/* + * vm_move: + * + * Move memory from source to destination map, possibly deallocating + * the source map reference to the memory. + * + * Parameters are as follows: + * + * src_map Source address map + * src_addr Address within source map + * dst_map Destination address map + * num_bytes Amount of data (in bytes) to copy/move + * src_dealloc Should source be removed after copy? + * + * Assumes the src and dst maps are not already locked. + * + * Returns new destination address or 0 (if a failure occurs). + */ +vm_offset_t vm_move(src_map,src_addr,dst_map,num_bytes,src_dealloc) + vm_map_t src_map; + register vm_offset_t src_addr; + register vm_map_t dst_map; + vm_offset_t num_bytes; + boolean_t src_dealloc; +{ + register vm_offset_t src_start; /* Beginning of region */ + register vm_size_t src_size; /* Size of rounded region */ + vm_offset_t dst_start; /* destination address */ + register int result; + + /* + * Page-align the source region + */ + + src_start = trunc_page(src_addr); + src_size = round_page(src_addr + num_bytes) - src_start; + + /* + * If there's no destination, we can be at most deallocating + * the source range. + */ + if (dst_map == VM_MAP_NULL) { + if (src_dealloc) + if (vm_deallocate(src_map, src_start, src_size) + != KERN_SUCCESS) { + printf("vm_move: deallocate of source"); + printf(" failed, dealloc_only clause\n"); + } + return(0); + } + + /* + * Allocate a place to put the copy + */ + + dst_start = (vm_offset_t) 0; + if ((result = vm_allocate(dst_map, &dst_start, src_size, TRUE)) + == KERN_SUCCESS) { + /* + * Perform the copy, asking for deallocation if desired + */ + result = vm_map_copy(dst_map, src_map, dst_start, src_size, + src_start, FALSE, src_dealloc); + } + + /* + * Return the destination address corresponding to + * the source address given (rather than the front + * of the newly-allocated page). + */ + + if (result == KERN_SUCCESS) + return(dst_start + (src_addr - src_start)); + return(0); +} + +/* + * Allocate wired-down memory in the kernel's address map for the higher + * level kernel memory allocator (kern/kern_malloc.c). We cannot use + * kmem_alloc() because we may need to allocate memory at interrupt + * level where we cannot block (canwait == FALSE). + * + * This routine has its own private kernel submap (kmem_map) and object + * (kmem_object). This, combined with the fact that only malloc uses + * this routine, ensures that we will never block in map or object waits. + * + * Note that this still only works in a uni-processor environment and + * when called at splhigh(). + * + * We don't worry about expanding the map (adding entries) since entries + * for wired maps are statically allocated. + */ +vm_offset_t +kmem_malloc(map, size, canwait) + register vm_map_t map; + register vm_size_t size; + boolean_t canwait; +{ + register vm_offset_t offset, i; + vm_map_entry_t entry; + vm_offset_t addr; + vm_page_t m; + extern vm_object_t kmem_object; + + if (map != kmem_map && map != mb_map) + panic("kern_malloc_alloc: map != {kmem,mb}_map"); + + size = round_page(size); + addr = vm_map_min(map); + + if (vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t)0, + &addr, size, TRUE) != KERN_SUCCESS) + return(0); + + /* + * Since we didn't know where the new region would start, + * we couldn't supply the correct offset into the kmem object. + * Re-allocate that address region with the correct offset. + */ + offset = addr - vm_map_min(kmem_map); + vm_object_reference(kmem_object); + + vm_map_lock(map); + vm_map_delete(map, addr, addr + size); + vm_map_insert(map, kmem_object, offset, addr, addr + size); + + /* + * If we can wait, just mark the range as wired + * (will fault pages as necessary). + */ + if (canwait) { + vm_map_unlock(map); + (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, + FALSE); + vm_map_simplify(map, addr); + return(addr); + } + + /* + * If we cannot wait then we must allocate all memory up front, + * pulling it off the active queue to prevent pageout. + */ + vm_object_lock(kmem_object); + for (i = 0; i < size; i += PAGE_SIZE) { + m = vm_page_alloc(kmem_object, offset + i); + + /* + * Ran out of space, free everything up and return. + * Don't need to lock page queues here as we know + * that the pages we got aren't on any queues. + */ + if (m == VM_PAGE_NULL) { + while (i != 0) { + i -= PAGE_SIZE; + m = vm_page_lookup(kmem_object, offset + i); + vm_page_free(m); + } + vm_object_unlock(kmem_object); + vm_map_delete(map, addr, addr + size); + vm_map_unlock(map); + return(0); + } +#if 0 + vm_page_zero_fill(m); +#endif + m->busy = FALSE; + } + vm_object_unlock(kmem_object); + + /* + * Mark map entry as non-pageable. + * Assert: vm_map_insert() will never be able to extend the previous + * entry so there will be a new entry exactly corresponding to this + * address range and it will have wired_count == 0. + */ + if (!vm_map_lookup_entry(map, addr, &entry) || + entry->start != addr || entry->end != addr + size || + entry->wired_count) + panic("kmem_malloc: entry not found or misaligned"); + entry->wired_count++; + + /* + * Loop thru pages, entering them in the pmap. + * (We cannot add them to the wired count without + * wrapping the vm_page_queue_lock in splimp...) + */ + for (i = 0; i < size; i += PAGE_SIZE) { + vm_object_lock(kmem_object); + m = vm_page_lookup(kmem_object, offset + i); + vm_object_unlock(kmem_object); + pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), + VM_PROT_DEFAULT, TRUE); + } + vm_map_unlock(map); + + vm_map_simplify(map, addr); + return(addr); +} + +/* + * kmem_alloc_wait + * + * Allocates pageable memory from a sub-map of the kernel. If the submap + * has no room, the caller sleeps waiting for more memory in the submap. + * + */ +vm_offset_t kmem_alloc_wait(map, size) + vm_map_t map; + vm_size_t size; +{ + vm_offset_t addr; + int result; + + size = round_page(size); + + do { + /* + * To make this work for more than one map, + * use the map's lock to lock out sleepers/wakers. + * Unfortunately, vm_map_find also grabs the map lock. + */ + vm_map_lock(map); + lock_set_recursive(&map->lock); + + addr = vm_map_min(map); + result = vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t) 0, + &addr, size, TRUE); + + lock_clear_recursive(&map->lock); + if (result != KERN_SUCCESS) { + + if ( (vm_map_max(map) - vm_map_min(map)) < size ) { + vm_map_unlock(map); + return(0); + } + + assert_wait((int)map, TRUE); + vm_map_unlock(map); + thread_block(); + } + else { + vm_map_unlock(map); + } + + } while (result != KERN_SUCCESS); + + return(addr); +} + +/* + * kmem_free_wakeup + * + * Returns memory to a submap of the kernel, and wakes up any threads + * waiting for memory in that map. + */ +void kmem_free_wakeup(map, addr, size) + vm_map_t map; + vm_offset_t addr; + vm_size_t size; +{ + vm_map_lock(map); + (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); + thread_wakeup((int)map); + vm_map_unlock(map); +} + +/* + * kmem_init: + * + * Initialize the kernel's virtual memory map, taking + * into account all memory allocated up to this time. + */ +void kmem_init(start, end) + vm_offset_t start; + vm_offset_t end; +{ + vm_offset_t addr; + extern vm_map_t kernel_map; + + addr = VM_MIN_KERNEL_ADDRESS; + kernel_map = vm_map_create(pmap_kernel(), addr, end, FALSE); + (void) vm_map_find(kernel_map, VM_OBJECT_NULL, (vm_offset_t) 0, + &addr, (start - VM_MIN_KERNEL_ADDRESS), + FALSE); +} diff --git a/usr/src/sys/vm/vm_kern.h b/usr/src/sys/vm/vm_kern.h new file mode 100644 index 0000000000..dcbd9bd62c --- /dev/null +++ b/usr/src/sys/vm/vm_kern.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_kern.h 7.1 (Berkeley) %G% + */ + +/* + * Kernel memory management definitions. + */ + +void kmem_init(); +vm_offset_t kmem_alloc(); +vm_offset_t kmem_alloc_pageable(); +void kmem_free(); +vm_map_t kmem_suballoc(); + +vm_offset_t vm_move(); + +vm_offset_t kmem_alloc_wait(); +void kmem_free_wakeup(); + +vm_map_t kernel_map; +vm_map_t mb_map; +vm_map_t kmem_map; +vm_map_t exec_map; +vm_map_t phys_map; +vm_map_t buffer_map; diff --git a/usr/src/sys/vm/vm_map.c b/usr/src/sys/vm/vm_map.c new file mode 100644 index 0000000000..966966239b --- /dev/null +++ b/usr/src/sys/vm/vm_map.c @@ -0,0 +1,2367 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_map.c 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory mapping module. + */ + +#include "types.h" +#include "malloc.h" +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_object.h" + +/* + * Virtual memory maps provide for the mapping, protection, + * and sharing of virtual memory objects. In addition, + * this module provides for an efficient virtual copy of + * memory from one map to another. + * + * Synchronization is required prior to most operations. + * + * Maps consist of an ordered doubly-linked list of simple + * entries; a single hint is used to speed up lookups. + * + * In order to properly represent the sharing of virtual + * memory regions among maps, the map structure is bi-level. + * Top-level ("address") maps refer to regions of sharable + * virtual memory. These regions are implemented as + * ("sharing") maps, which then refer to the actual virtual + * memory objects. When two address maps "share" memory, + * their top-level maps both have references to the same + * sharing map. When memory is virtual-copied from one + * address map to another, the references in the sharing + * maps are actually copied -- no copying occurs at the + * virtual memory object level. + * + * Since portions of maps are specified by start/end addreses, + * which may not align with existing map entries, all + * routines merely "clip" entries to these start/end values. + * [That is, an entry is split into two, bordering at a + * start or end value.] Note that these clippings may not + * always be necessary (as the two resulting entries are then + * not changed); however, the clipping is done for convenience. + * No attempt is currently made to "glue back together" two + * abutting entries. + * + * As mentioned above, virtual copy operations are performed + * by copying VM object references from one sharing map to + * another, and then marking both regions as copy-on-write. + * It is important to note that only one writeable reference + * to a VM object region exists in any map -- this means that + * shadow object creation can be delayed until a write operation + * occurs. + */ + +/* + * vm_map_init: + * + * Initialize the vm_map module. Must be called before + * any other vm_map routines. + * + * Map and entry structures are allocated from the general + * purpose memory pool with some exceptions: + * + * - The kernel map and kmem submap are allocated statically. + * - Kernel map entries are allocated out of a static pool. + * + * These restrictions are necessary since malloc() uses the + * maps and requires map entries. + */ + +vm_offset_t kentry_data; +vm_size_t kentry_data_size; +vm_map_entry_t kentry_free; +vm_map_t kmap_free; + +void vm_map_init() +{ + register int i; + register vm_map_entry_t mep; + vm_map_t mp; + + /* + * Static map structures for allocation before initialization of + * kernel map or kmem map. vm_map_create knows how to deal with them. + */ + kmap_free = mp = (vm_map_t) kentry_data; + i = MAX_KMAP; + while (--i > 0) { + mp->header.next = (vm_map_entry_t) (mp + 1); + mp++; + } + mp++->header.next = VM_MAP_ENTRY_NULL; + + /* + * Form a free list of statically allocated kernel map entries + * with the rest. + */ + kentry_free = mep = (vm_map_entry_t) mp; + i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; + while (--i > 0) { + mep->next = mep + 1; + mep++; + } + mep->next = VM_MAP_ENTRY_NULL; +} + +/* + * vm_map_create: + * + * Creates and returns a new empty VM map with + * the given physical map structure, and having + * the given lower and upper address bounds. + */ +vm_map_t vm_map_create(pmap, min, max, pageable) + pmap_t pmap; + vm_offset_t min, max; + boolean_t pageable; +{ + register vm_map_t result; + extern vm_map_t kernel_map, kmem_map; + + if (kmem_map == VM_MAP_NULL) { + result = kmap_free; + kmap_free = (vm_map_t) result->header.next; + } else + MALLOC(result, vm_map_t, sizeof(struct vm_map), + M_VMMAP, M_WAITOK); + + if (result == VM_MAP_NULL) + panic("vm_map_create: out of maps"); + + result->header.next = result->header.prev = &result->header; + result->nentries = 0; + result->size = 0; + result->ref_count = 1; + result->pmap = pmap; + result->is_main_map = TRUE; + result->min_offset = min; + result->max_offset = max; + result->entries_pageable = pageable; + result->first_free = &result->header; + result->hint = &result->header; + result->timestamp = 0; + lock_init(&result->lock, TRUE); + simple_lock_init(&result->ref_lock); + simple_lock_init(&result->hint_lock); + return(result); +} + +/* + * vm_map_entry_create: [ internal use only ] + * + * Allocates a VM map entry for insertion. + * No entry fields are filled in. This routine is + */ +vm_map_entry_t vm_map_entry_create(map) + vm_map_t map; +{ + vm_map_entry_t entry; + extern vm_map_t kernel_map, kmem_map, mb_map; + + if (map == kernel_map || map == kmem_map || map == mb_map) { + if (entry = kentry_free) + kentry_free = kentry_free->next; + } else + MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), + M_VMMAPENT, M_WAITOK); + if (entry == VM_MAP_ENTRY_NULL) + panic("vm_map_entry_create: out of map entries"); + + return(entry); +} + +/* + * vm_map_entry_dispose: [ internal use only ] + * + * Inverse of vm_map_entry_create. + */ +void vm_map_entry_dispose(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ + extern vm_map_t kernel_map, kmem_map, mb_map; + + if (map == kernel_map || map == kmem_map || map == mb_map) { + entry->next = kentry_free; + kentry_free = entry; + } else + FREE(entry, M_VMMAPENT); +} + +/* + * vm_map_entry_{un,}link: + * + * Insert/remove entries from maps. + */ +#define vm_map_entry_link(map, after_where, entry) \ + { \ + (map)->nentries++; \ + (entry)->prev = (after_where); \ + (entry)->next = (after_where)->next; \ + (entry)->prev->next = (entry); \ + (entry)->next->prev = (entry); \ + } +#define vm_map_entry_unlink(map, entry) \ + { \ + (map)->nentries--; \ + (entry)->next->prev = (entry)->prev; \ + (entry)->prev->next = (entry)->next; \ + } + +/* + * vm_map_reference: + * + * Creates another valid reference to the given map. + * + */ +void vm_map_reference(map) + register vm_map_t map; +{ + if (map == VM_MAP_NULL) + return; + + simple_lock(&map->ref_lock); + map->ref_count++; + simple_unlock(&map->ref_lock); +} + +/* + * vm_map_deallocate: + * + * Removes a reference from the specified map, + * destroying it if no references remain. + * The map should not be locked. + */ +void vm_map_deallocate(map) + register vm_map_t map; +{ + register int c; + + if (map == VM_MAP_NULL) + return; + + simple_lock(&map->ref_lock); + c = --map->ref_count; + simple_unlock(&map->ref_lock); + + if (c > 0) { + return; + } + + /* + * Lock the map, to wait out all other references + * to it. + */ + + vm_map_lock(map); + + (void) vm_map_delete(map, map->min_offset, map->max_offset); + + pmap_destroy(map->pmap); + + FREE(map, M_VMMAP); +} + +/* + * vm_map_insert: [ internal use only ] + * + * Inserts the given whole VM object into the target + * map at the specified address range. The object's + * size should match that of the address range. + * + * Requires that the map be locked, and leaves it so. + */ +vm_map_insert(map, object, offset, start, end) + vm_map_t map; + vm_object_t object; + vm_offset_t offset; + vm_offset_t start; + vm_offset_t end; +{ + register vm_map_entry_t new_entry; + register vm_map_entry_t prev_entry; + vm_map_entry_t temp_entry; + + /* + * Check that the start and end points are not bogus. + */ + + if ((start < map->min_offset) || (end > map->max_offset) || + (start >= end)) + return(KERN_INVALID_ADDRESS); + + /* + * Find the entry prior to the proposed + * starting address; if it's part of an + * existing entry, this range is bogus. + */ + + if (vm_map_lookup_entry(map, start, &temp_entry)) + return(KERN_NO_SPACE); + + prev_entry = temp_entry; + + /* + * Assert that the next entry doesn't overlap the + * end point. + */ + + if ((prev_entry->next != &map->header) && + (prev_entry->next->start < end)) + return(KERN_NO_SPACE); + + /* + * See if we can avoid creating a new entry by + * extending one of our neighbors. + */ + + if (object == VM_OBJECT_NULL) { + if ((prev_entry != &map->header) && + (prev_entry->end == start) && + (map->is_main_map) && + (prev_entry->is_a_map == FALSE) && + (prev_entry->is_sub_map == FALSE) && + (prev_entry->inheritance == VM_INHERIT_DEFAULT) && + (prev_entry->protection == VM_PROT_DEFAULT) && + (prev_entry->max_protection == VM_PROT_DEFAULT) && + (prev_entry->wired_count == 0)) { + + if (vm_object_coalesce(prev_entry->object.vm_object, + VM_OBJECT_NULL, + prev_entry->offset, + (vm_offset_t) 0, + (vm_size_t)(prev_entry->end + - prev_entry->start), + (vm_size_t)(end - prev_entry->end))) { + /* + * Coalesced the two objects - can extend + * the previous map entry to include the + * new range. + */ + map->size += (end - prev_entry->end); + prev_entry->end = end; + return(KERN_SUCCESS); + } + } + } + + /* + * Create a new entry + */ + + new_entry = vm_map_entry_create(map); + new_entry->start = start; + new_entry->end = end; + + new_entry->is_a_map = FALSE; + new_entry->is_sub_map = FALSE; + new_entry->object.vm_object = object; + new_entry->offset = offset; + + new_entry->copy_on_write = FALSE; + new_entry->needs_copy = FALSE; + + if (map->is_main_map) { + new_entry->inheritance = VM_INHERIT_DEFAULT; + new_entry->protection = VM_PROT_DEFAULT; + new_entry->max_protection = VM_PROT_DEFAULT; + new_entry->wired_count = 0; + } + + /* + * Insert the new entry into the list + */ + + vm_map_entry_link(map, prev_entry, new_entry); + map->size += new_entry->end - new_entry->start; + + /* + * Update the free space hint + */ + + if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) + map->first_free = new_entry; + + return(KERN_SUCCESS); +} + +/* + * SAVE_HINT: + * + * Saves the specified entry as the hint for + * future lookups. Performs necessary interlocks. + */ +#define SAVE_HINT(map,value) \ + simple_lock(&(map)->hint_lock); \ + (map)->hint = (value); \ + simple_unlock(&(map)->hint_lock); + +/* + * vm_map_lookup_entry: [ internal use only ] + * + * Finds the map entry containing (or + * immediately preceding) the specified address + * in the given map; the entry is returned + * in the "entry" parameter. The boolean + * result indicates whether the address is + * actually contained in the map. + */ +boolean_t vm_map_lookup_entry(map, address, entry) + register vm_map_t map; + register vm_offset_t address; + vm_map_entry_t *entry; /* OUT */ +{ + register vm_map_entry_t cur; + register vm_map_entry_t last; + + /* + * Start looking either from the head of the + * list, or from the hint. + */ + + simple_lock(&map->hint_lock); + cur = map->hint; + simple_unlock(&map->hint_lock); + + if (cur == &map->header) + cur = cur->next; + + if (address >= cur->start) { + /* + * Go from hint to end of list. + * + * But first, make a quick check to see if + * we are already looking at the entry we + * want (which is usually the case). + * Note also that we don't need to save the hint + * here... it is the same hint (unless we are + * at the header, in which case the hint didn't + * buy us anything anyway). + */ + last = &map->header; + if ((cur != last) && (cur->end > address)) { + *entry = cur; + return(TRUE); + } + } + else { + /* + * Go from start to hint, *inclusively* + */ + last = cur->next; + cur = map->header.next; + } + + /* + * Search linearly + */ + + while (cur != last) { + if (cur->end > address) { + if (address >= cur->start) { + /* + * Save this lookup for future + * hints, and return + */ + + *entry = cur; + SAVE_HINT(map, cur); + return(TRUE); + } + break; + } + cur = cur->next; + } + *entry = cur->prev; + SAVE_HINT(map, *entry); + return(FALSE); +} + +/* + * vm_map_find finds an unallocated region in the target address + * map with the given length. The search is defined to be + * first-fit from the specified address; the region found is + * returned in the same parameter. + * + */ +vm_map_find(map, object, offset, addr, length, find_space) + vm_map_t map; + vm_object_t object; + vm_offset_t offset; + vm_offset_t *addr; /* IN/OUT */ + vm_size_t length; + boolean_t find_space; +{ + register vm_map_entry_t entry; + register vm_offset_t start; + register vm_offset_t end; + int result; + + start = *addr; + + vm_map_lock(map); + + if (find_space) { + /* + * Calculate the first possible address. + */ + + if (start < map->min_offset) + start = map->min_offset; + if (start > map->max_offset) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + + /* + * Look for the first possible address; + * if there's already something at this + * address, we have to start after it. + */ + + if (start == map->min_offset) { + if ((entry = map->first_free) != &map->header) + start = entry->end; + } else { + vm_map_entry_t tmp_entry; + if (vm_map_lookup_entry(map, start, &tmp_entry)) + start = tmp_entry->end; + entry = tmp_entry; + } + + /* + * In any case, the "entry" always precedes + * the proposed new region throughout the + * loop: + */ + + while (TRUE) { + register vm_map_entry_t next; + + /* + * Find the end of the proposed new region. + * Be sure we didn't go beyond the end, or + * wrap around the address. + */ + + end = start + length; + + if ((end > map->max_offset) || (end < start)) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + + /* + * If there are no more entries, we must win. + */ + + next = entry->next; + if (next == &map->header) + break; + + /* + * If there is another entry, it must be + * after the end of the potential new region. + */ + + if (next->start >= end) + break; + + /* + * Didn't fit -- move to the next entry. + */ + + entry = next; + start = entry->end; + } + *addr = start; + + SAVE_HINT(map, entry); + } + + result = vm_map_insert(map, object, offset, start, start + length); + + vm_map_unlock(map); + return(result); +} + +/* + * vm_map_simplify_entry: [ internal use only ] + * + * Simplify the given map entry by: + * removing extra sharing maps + * [XXX maybe later] merging with a neighbor + */ +void vm_map_simplify_entry(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ +#ifdef lint + map++; +#endif lint + + /* + * If this entry corresponds to a sharing map, then + * see if we can remove the level of indirection. + * If it's not a sharing map, then it points to + * a VM object, so see if we can merge with either + * of our neighbors. + */ + + if (entry->is_sub_map) + return; + if (entry->is_a_map) { +#if 0 + vm_map_t my_share_map; + int count; + + my_share_map = entry->object.share_map; + simple_lock(&my_share_map->ref_lock); + count = my_share_map->ref_count; + simple_unlock(&my_share_map->ref_lock); + + if (count == 1) { + /* Can move the region from + * entry->start to entry->end (+ entry->offset) + * in my_share_map into place of entry. + * Later. + */ + } +#endif 0 + } + else { + /* + * Try to merge with our neighbors. + * + * Conditions for merge are: + * + * 1. entries are adjacent. + * 2. both entries point to objects + * with null pagers. + * + * If a merge is possible, we replace the two + * entries with a single entry, then merge + * the two objects into a single object. + * + * Now, all that is left to do is write the + * code! + */ + } +} + +/* + * vm_map_clip_start: [ internal use only ] + * + * Asserts that the given entry begins at or after + * the specified address; if necessary, + * it splits the entry into two. + */ +#define vm_map_clip_start(map, entry, startaddr) \ +{ \ + if (startaddr > entry->start) \ + _vm_map_clip_start(map, entry, startaddr); \ +} + +/* + * This routine is called only when it is known that + * the entry must be split. + */ +void _vm_map_clip_start(map, entry, start) + register vm_map_t map; + register vm_map_entry_t entry; + register vm_offset_t start; +{ + register vm_map_entry_t new_entry; + + /* + * See if we can simplify this entry first + */ + + vm_map_simplify_entry(map, entry); + + /* + * Split off the front portion -- + * note that we must insert the new + * entry BEFORE this one, so that + * this entry has the specified starting + * address. + */ + + new_entry = vm_map_entry_create(map); + *new_entry = *entry; + + new_entry->end = start; + entry->offset += (start - entry->start); + entry->start = start; + + vm_map_entry_link(map, entry->prev, new_entry); + + if (entry->is_a_map || entry->is_sub_map) + vm_map_reference(new_entry->object.share_map); + else + vm_object_reference(new_entry->object.vm_object); +} + +/* + * vm_map_clip_end: [ internal use only ] + * + * Asserts that the given entry ends at or before + * the specified address; if necessary, + * it splits the entry into two. + */ + +void _vm_map_clip_end(); +#define vm_map_clip_end(map, entry, endaddr) \ +{ \ + if (endaddr < entry->end) \ + _vm_map_clip_end(map, entry, endaddr); \ +} + +/* + * This routine is called only when it is known that + * the entry must be split. + */ +void _vm_map_clip_end(map, entry, end) + register vm_map_t map; + register vm_map_entry_t entry; + register vm_offset_t end; +{ + register vm_map_entry_t new_entry; + + /* + * Create a new entry and insert it + * AFTER the specified entry + */ + + new_entry = vm_map_entry_create(map); + *new_entry = *entry; + + new_entry->start = entry->end = end; + new_entry->offset += (end - entry->start); + + vm_map_entry_link(map, entry, new_entry); + + if (entry->is_a_map || entry->is_sub_map) + vm_map_reference(new_entry->object.share_map); + else + vm_object_reference(new_entry->object.vm_object); +} + +/* + * VM_MAP_RANGE_CHECK: [ internal use only ] + * + * Asserts that the starting and ending region + * addresses fall within the valid range of the map. + */ +#define VM_MAP_RANGE_CHECK(map, start, end) \ + { \ + if (start < vm_map_min(map)) \ + start = vm_map_min(map); \ + if (end > vm_map_max(map)) \ + end = vm_map_max(map); \ + if (start > end) \ + start = end; \ + } + +/* + * vm_map_submap: [ kernel use only ] + * + * Mark the given range as handled by a subordinate map. + * + * This range must have been created with vm_map_find, + * and no other operations may have been performed on this + * range prior to calling vm_map_submap. + * + * Only a limited number of operations can be performed + * within this rage after calling vm_map_submap: + * vm_fault + * [Don't try vm_map_copy!] + * + * To remove a submapping, one must first remove the + * range from the superior map, and then destroy the + * submap (if desired). [Better yet, don't try it.] + */ +vm_map_submap(map, start, end, submap) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + vm_map_t submap; +{ + vm_map_entry_t entry; + register int result = KERN_INVALID_ARGUMENT; + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (vm_map_lookup_entry(map, start, &entry)) { + vm_map_clip_start(map, entry, start); + } + else + entry = entry->next; + + vm_map_clip_end(map, entry, end); + + if ((entry->start == start) && (entry->end == end) && + (!entry->is_a_map) && + (entry->object.vm_object == VM_OBJECT_NULL) && + (!entry->copy_on_write)) { + entry->is_a_map = FALSE; + entry->is_sub_map = TRUE; + vm_map_reference(entry->object.sub_map = submap); + result = KERN_SUCCESS; + } + vm_map_unlock(map); + + return(result); +} + +/* + * vm_map_protect: + * + * Sets the protection of the specified address + * region in the target map. If "set_max" is + * specified, the maximum protection is to be set; + * otherwise, only the current protection is affected. + */ +vm_map_protect(map, start, end, new_prot, set_max) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_prot_t new_prot; + register boolean_t set_max; +{ + register vm_map_entry_t current; + vm_map_entry_t entry; + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (vm_map_lookup_entry(map, start, &entry)) { + vm_map_clip_start(map, entry, start); + } + else + entry = entry->next; + + /* + * Make a first pass to check for protection + * violations. + */ + + current = entry; + while ((current != &map->header) && (current->start < end)) { + if (current->is_sub_map) + return(KERN_INVALID_ARGUMENT); + if ((new_prot & current->max_protection) != new_prot) { + vm_map_unlock(map); + return(KERN_PROTECTION_FAILURE); + } + + current = current->next; + } + + /* + * Go back and fix up protections. + * [Note that clipping is not necessary the second time.] + */ + + current = entry; + + while ((current != &map->header) && (current->start < end)) { + vm_prot_t old_prot; + + vm_map_clip_end(map, current, end); + + old_prot = current->protection; + if (set_max) + current->protection = + (current->max_protection = new_prot) & + old_prot; + else + current->protection = new_prot; + + /* + * Update physical map if necessary. + * Worry about copy-on-write here -- CHECK THIS XXX + */ + + if (current->protection != old_prot) { + +#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ + VM_PROT_ALL) +#define max(a,b) ((a) > (b) ? (a) : (b)) + + if (current->is_a_map) { + vm_map_entry_t share_entry; + vm_offset_t share_end; + + vm_map_lock(current->object.share_map); + (void) vm_map_lookup_entry( + current->object.share_map, + current->offset, + &share_entry); + share_end = current->offset + + (current->end - current->start); + while ((share_entry != + ¤t->object.share_map->header) && + (share_entry->start < share_end)) { + + pmap_protect(map->pmap, + (max(share_entry->start, + current->offset) - + current->offset + + current->start), + min(share_entry->end, + share_end) - + current->offset + + current->start, + current->protection & + MASK(share_entry)); + + share_entry = share_entry->next; + } + vm_map_unlock(current->object.share_map); + } + else + pmap_protect(map->pmap, current->start, + current->end, + current->protection & MASK(entry)); +#undef max +#undef MASK + } + current = current->next; + } + + vm_map_unlock(map); + return(KERN_SUCCESS); +} + +/* + * vm_map_inherit: + * + * Sets the inheritance of the specified address + * range in the target map. Inheritance + * affects how the map will be shared with + * child maps at the time of vm_map_fork. + */ +vm_map_inherit(map, start, end, new_inheritance) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_inherit_t new_inheritance; +{ + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; + + switch (new_inheritance) { + case VM_INHERIT_NONE: + case VM_INHERIT_COPY: + case VM_INHERIT_SHARE: + break; + default: + return(KERN_INVALID_ARGUMENT); + } + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (vm_map_lookup_entry(map, start, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else + entry = temp_entry->next; + + while ((entry != &map->header) && (entry->start < end)) { + vm_map_clip_end(map, entry, end); + + entry->inheritance = new_inheritance; + + entry = entry->next; + } + + vm_map_unlock(map); + return(KERN_SUCCESS); +} + +/* + * vm_map_pageable: + * + * Sets the pageability of the specified address + * range in the target map. Regions specified + * as not pageable require locked-down physical + * memory and physical page maps. + * + * The map must not be locked, but a reference + * must remain to the map throughout the call. + */ +vm_map_pageable(map, start, end, new_pageable) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register boolean_t new_pageable; +{ + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + /* + * Only one pageability change may take place at one + * time, since vm_fault assumes it will be called + * only once for each wiring/unwiring. Therefore, we + * have to make sure we're actually changing the pageability + * for the entire region. We do so before making any changes. + */ + + if (vm_map_lookup_entry(map, start, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else + entry = temp_entry->next; + temp_entry = entry; + + /* + * Actions are rather different for wiring and unwiring, + * so we have two separate cases. + */ + + if (new_pageable) { + + /* + * Unwiring. First ensure that the range to be + * unwired is really wired down. + */ + while ((entry != &map->header) && (entry->start < end)) { + + if (entry->wired_count == 0) { + vm_map_unlock(map); + return(KERN_INVALID_ARGUMENT); + } + entry = entry->next; + } + + /* + * Now decrement the wiring count for each region. + * If a region becomes completely unwired, + * unwire its physical pages and mappings. + */ + lock_set_recursive(&map->lock); + + entry = temp_entry; + while ((entry != &map->header) && (entry->start < end)) { + vm_map_clip_end(map, entry, end); + + entry->wired_count--; + if (entry->wired_count == 0) + vm_fault_unwire(map, entry->start, entry->end); + + entry = entry->next; + } + lock_clear_recursive(&map->lock); + } + + else { + /* + * Wiring. We must do this in two passes: + * + * 1. Holding the write lock, we increment the + * wiring count. For any area that is not already + * wired, we create any shadow objects that need + * to be created. + * + * 2. We downgrade to a read lock, and call + * vm_fault_wire to fault in the pages for any + * newly wired area (wired_count is 1). + * + * Downgrading to a read lock for vm_fault_wire avoids + * a possible deadlock with another thread that may have + * faulted on one of the pages to be wired (it would mark + * the page busy, blocking us, then in turn block on the + * map lock that we hold). Because of problems in the + * recursive lock package, we cannot upgrade to a write + * lock in vm_map_lookup. Thus, any actions that require + * the write lock must be done beforehand. Because we + * keep the read lock on the map, the copy-on-write status + * of the entries we modify here cannot change. + */ + + /* + * Pass 1. + */ + entry = temp_entry; + while ((entry != &map->header) && (entry->start < end)) { + vm_map_clip_end(map, entry, end); + + entry->wired_count++; + if (entry->wired_count == 1) { + + /* + * Perform actions of vm_map_lookup that need + * the write lock on the map: create a shadow + * object for a copy-on-write region, or an + * object for a zero-fill region. + * + * We don't have to do this for entries that + * point to sharing maps, because we won't hold + * the lock on the sharing map. + */ + if (!entry->is_a_map) { + if (entry->needs_copy && + ((entry->protection & VM_PROT_WRITE) != 0)) { + + vm_object_shadow(&entry->object.vm_object, + &entry->offset, + (vm_size_t)(entry->end + - entry->start)); + entry->needs_copy = FALSE; + } + else if (entry->object.vm_object == VM_OBJECT_NULL) { + entry->object.vm_object = + vm_object_allocate((vm_size_t)(entry->end + - entry->start)); + entry->offset = (vm_offset_t)0; + } + } + } + + entry = entry->next; + } + + /* + * Pass 2. + */ + + /* + * HACK HACK HACK HACK + * + * If we are wiring in the kernel map or a submap of it, + * unlock the map to avoid deadlocks. We trust that the + * kernel threads are well-behaved, and therefore will + * not do anything destructive to this region of the map + * while we have it unlocked. We cannot trust user threads + * to do the same. + * + * HACK HACK HACK HACK + */ + if (vm_map_pmap(map) == kernel_pmap) { + vm_map_unlock(map); /* trust me ... */ + } + else { + lock_set_recursive(&map->lock); + lock_write_to_read(&map->lock); + } + + entry = temp_entry; + while (entry != &map->header && entry->start < end) { + if (entry->wired_count == 1) { + vm_fault_wire(map, entry->start, entry->end); + } + entry = entry->next; + } + + if (vm_map_pmap(map) == kernel_pmap) { + vm_map_lock(map); + } + else { + lock_clear_recursive(&map->lock); + } + } + + vm_map_unlock(map); + + return(KERN_SUCCESS); +} + +/* + * vm_map_entry_unwire: [ internal use only ] + * + * Make the region specified by this entry pageable. + * + * The map in question should be locked. + * [This is the reason for this routine's existence.] + */ +void vm_map_entry_unwire(map, entry) + vm_map_t map; + register vm_map_entry_t entry; +{ + vm_fault_unwire(map, entry->start, entry->end); + entry->wired_count = 0; +} + +/* + * vm_map_entry_delete: [ internal use only ] + * + * Deallocate the given entry from the target map. + */ +void vm_map_entry_delete(map, entry) + register vm_map_t map; + register vm_map_entry_t entry; +{ + if (entry->wired_count != 0) + vm_map_entry_unwire(map, entry); + + vm_map_entry_unlink(map, entry); + map->size -= entry->end - entry->start; + + if (entry->is_a_map || entry->is_sub_map) + vm_map_deallocate(entry->object.share_map); + else + vm_object_deallocate(entry->object.vm_object); + + vm_map_entry_dispose(map, entry); +} + +/* + * vm_map_delete: [ internal use only ] + * + * Deallocates the given address range from the target + * map. + * + * When called with a sharing map, removes pages from + * that region from all physical maps. + */ +vm_map_delete(map, start, end) + register vm_map_t map; + vm_offset_t start; + register vm_offset_t end; +{ + register vm_map_entry_t entry; + vm_map_entry_t first_entry; + + /* + * Find the start of the region, and clip it + */ + + if (!vm_map_lookup_entry(map, start, &first_entry)) + entry = first_entry->next; + else { + entry = first_entry; + vm_map_clip_start(map, entry, start); + + /* + * Fix the lookup hint now, rather than each + * time though the loop. + */ + + SAVE_HINT(map, entry->prev); + } + + /* + * Save the free space hint + */ + + if (map->first_free->start >= start) + map->first_free = entry->prev; + + /* + * Step through all entries in this region + */ + + while ((entry != &map->header) && (entry->start < end)) { + vm_map_entry_t next; + register vm_offset_t s, e; + register vm_object_t object; + + vm_map_clip_end(map, entry, end); + + next = entry->next; + s = entry->start; + e = entry->end; + + /* + * Unwire before removing addresses from the pmap; + * otherwise, unwiring will put the entries back in + * the pmap. + */ + + object = entry->object.vm_object; + if (entry->wired_count != 0) + vm_map_entry_unwire(map, entry); + + /* + * If this is a sharing map, we must remove + * *all* references to this data, since we can't + * find all of the physical maps which are sharing + * it. + */ + + if (object == kernel_object || object == kmem_object) + vm_object_page_remove(object, entry->offset, + entry->offset + (e - s)); + else if (!map->is_main_map) + vm_object_pmap_remove(object, + entry->offset, + entry->offset + (e - s)); + else + pmap_remove(map->pmap, s, e); + + /* + * Delete the entry (which may delete the object) + * only after removing all pmap entries pointing + * to its pages. (Otherwise, its page frames may + * be reallocated, and any modify bits will be + * set in the wrong object!) + */ + + vm_map_entry_delete(map, entry); + entry = next; + } + return(KERN_SUCCESS); +} + +/* + * vm_map_remove: + * + * Remove the given address range from the target map. + * This is the exported form of vm_map_delete. + */ +vm_map_remove(map, start, end) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; +{ + register int result; + + vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); + result = vm_map_delete(map, start, end); + vm_map_unlock(map); + + return(result); +} + +/* + * vm_map_check_protection: + * + * Assert that the target map allows the specified + * privilege on the entire address region given. + * The entire region must be allocated. + */ +boolean_t vm_map_check_protection(map, start, end, protection) + register vm_map_t map; + register vm_offset_t start; + register vm_offset_t end; + register vm_prot_t protection; +{ + register vm_map_entry_t entry; + vm_map_entry_t tmp_entry; + + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + return(FALSE); + } + + entry = tmp_entry; + + while (start < end) { + if (entry == &map->header) { + return(FALSE); + } + + /* + * No holes allowed! + */ + + if (start < entry->start) { + return(FALSE); + } + + /* + * Check protection associated with entry. + */ + + if ((entry->protection & protection) != protection) { + return(FALSE); + } + + /* go to next entry */ + + start = entry->end; + entry = entry->next; + } + return(TRUE); +} + +/* + * vm_map_copy_entry: + * + * Copies the contents of the source entry to the destination + * entry. The entries *must* be aligned properly. + */ +void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) + vm_map_t src_map, dst_map; + register vm_map_entry_t src_entry, dst_entry; +{ + vm_object_t temp_object; + + if (src_entry->is_sub_map || dst_entry->is_sub_map) + return; + + if (dst_entry->object.vm_object != VM_OBJECT_NULL && + !dst_entry->object.vm_object->internal) + printf("vm_map_copy_entry: copying over permanent data!\n"); + + /* + * If our destination map was wired down, + * unwire it now. + */ + + if (dst_entry->wired_count != 0) + vm_map_entry_unwire(dst_map, dst_entry); + + /* + * If we're dealing with a sharing map, we + * must remove the destination pages from + * all maps (since we cannot know which maps + * this sharing map belongs in). + */ + + if (dst_map->is_main_map) + pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); + else + vm_object_pmap_remove(dst_entry->object.vm_object, + dst_entry->offset, + dst_entry->offset + + (dst_entry->end - dst_entry->start)); + + if (src_entry->wired_count == 0) { + + boolean_t src_needs_copy; + + /* + * If the source entry is marked needs_copy, + * it is already write-protected. + */ + if (!src_entry->needs_copy) { + + boolean_t su; + + /* + * If the source entry has only one mapping, + * we can just protect the virtual address + * range. + */ + if (!(su = src_map->is_main_map)) { + simple_lock(&src_map->ref_lock); + su = (src_map->ref_count == 1); + simple_unlock(&src_map->ref_lock); + } + + if (su) { + pmap_protect(src_map->pmap, + src_entry->start, + src_entry->end, + src_entry->protection & ~VM_PROT_WRITE); + } + else { + vm_object_pmap_copy(src_entry->object.vm_object, + src_entry->offset, + src_entry->offset + (src_entry->end + -src_entry->start)); + } + } + + /* + * Make a copy of the object. + */ + temp_object = dst_entry->object.vm_object; + vm_object_copy(src_entry->object.vm_object, + src_entry->offset, + (vm_size_t)(src_entry->end - + src_entry->start), + &dst_entry->object.vm_object, + &dst_entry->offset, + &src_needs_copy); + /* + * If we didn't get a copy-object now, mark the + * source map entry so that a shadow will be created + * to hold its changed pages. + */ + if (src_needs_copy) + src_entry->needs_copy = TRUE; + + /* + * The destination always needs to have a shadow + * created. + */ + dst_entry->needs_copy = TRUE; + + /* + * Mark the entries copy-on-write, so that write-enabling + * the entry won't make copy-on-write pages writable. + */ + src_entry->copy_on_write = TRUE; + dst_entry->copy_on_write = TRUE; + /* + * Get rid of the old object. + */ + vm_object_deallocate(temp_object); + + pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, + dst_entry->end - dst_entry->start, src_entry->start); + } + else { + /* + * Of course, wired down pages can't be set copy-on-write. + * Cause wired pages to be copied into the new + * map by simulating faults (the new pages are + * pageable) + */ + vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); + } +} + +/* + * vm_map_copy: + * + * Perform a virtual memory copy from the source + * address map/range to the destination map/range. + * + * If src_destroy or dst_alloc is requested, + * the source and destination regions should be + * disjoint, not only in the top-level map, but + * in the sharing maps as well. [The best way + * to guarantee this is to use a new intermediate + * map to make copies. This also reduces map + * fragmentation.] + */ +vm_map_copy(dst_map, src_map, + dst_addr, len, src_addr, + dst_alloc, src_destroy) + vm_map_t dst_map; + vm_map_t src_map; + vm_offset_t dst_addr; + vm_size_t len; + vm_offset_t src_addr; + boolean_t dst_alloc; + boolean_t src_destroy; +{ + register + vm_map_entry_t src_entry; + register + vm_map_entry_t dst_entry; + vm_map_entry_t tmp_entry; + vm_offset_t src_start; + vm_offset_t src_end; + vm_offset_t dst_start; + vm_offset_t dst_end; + vm_offset_t src_clip; + vm_offset_t dst_clip; + int result; + boolean_t old_src_destroy; + + /* + * XXX While we figure out why src_destroy screws up, + * we'll do it by explicitly vm_map_delete'ing at the end. + */ + + old_src_destroy = src_destroy; + src_destroy = FALSE; + + /* + * Compute start and end of region in both maps + */ + + src_start = src_addr; + src_end = src_start + len; + dst_start = dst_addr; + dst_end = dst_start + len; + + /* + * Check that the region can exist in both source + * and destination. + */ + + if ((dst_end < dst_start) || (src_end < src_start)) + return(KERN_NO_SPACE); + + /* + * Lock the maps in question -- we avoid deadlock + * by ordering lock acquisition by map value + */ + + if (src_map == dst_map) { + vm_map_lock(src_map); + } + else if ((int) src_map < (int) dst_map) { + vm_map_lock(src_map); + vm_map_lock(dst_map); + } else { + vm_map_lock(dst_map); + vm_map_lock(src_map); + } + + result = KERN_SUCCESS; + + /* + * Check protections... source must be completely readable and + * destination must be completely writable. [Note that if we're + * allocating the destination region, we don't have to worry + * about protection, but instead about whether the region + * exists.] + */ + + if (src_map->is_main_map && dst_map->is_main_map) { + if (!vm_map_check_protection(src_map, src_start, src_end, + VM_PROT_READ)) { + result = KERN_PROTECTION_FAILURE; + goto Return; + } + + if (dst_alloc) { + /* XXX Consider making this a vm_map_find instead */ + if ((result = vm_map_insert(dst_map, VM_OBJECT_NULL, + (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) + goto Return; + } + else if (!vm_map_check_protection(dst_map, dst_start, dst_end, + VM_PROT_WRITE)) { + result = KERN_PROTECTION_FAILURE; + goto Return; + } + } + + /* + * Find the start entries and clip. + * + * Note that checking protection asserts that the + * lookup cannot fail. + * + * Also note that we wait to do the second lookup + * until we have done the first clip, as the clip + * may affect which entry we get! + */ + + (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); + src_entry = tmp_entry; + vm_map_clip_start(src_map, src_entry, src_start); + + (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry); + dst_entry = tmp_entry; + vm_map_clip_start(dst_map, dst_entry, dst_start); + + /* + * If both source and destination entries are the same, + * retry the first lookup, as it may have changed. + */ + + if (src_entry == dst_entry) { + (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); + src_entry = tmp_entry; + } + + /* + * If source and destination entries are still the same, + * a null copy is being performed. + */ + + if (src_entry == dst_entry) + goto Return; + + /* + * Go through entries until we get to the end of the + * region. + */ + + while (src_start < src_end) { + /* + * Clip the entries to the endpoint of the entire region. + */ + + vm_map_clip_end(src_map, src_entry, src_end); + vm_map_clip_end(dst_map, dst_entry, dst_end); + + /* + * Clip each entry to the endpoint of the other entry. + */ + + src_clip = src_entry->start + (dst_entry->end - dst_entry->start); + vm_map_clip_end(src_map, src_entry, src_clip); + + dst_clip = dst_entry->start + (src_entry->end - src_entry->start); + vm_map_clip_end(dst_map, dst_entry, dst_clip); + + /* + * Both entries now match in size and relative endpoints. + * + * If both entries refer to a VM object, we can + * deal with them now. + */ + + if (!src_entry->is_a_map && !dst_entry->is_a_map) { + vm_map_copy_entry(src_map, dst_map, src_entry, + dst_entry); + } + else { + register vm_map_t new_dst_map; + vm_offset_t new_dst_start; + vm_size_t new_size; + vm_map_t new_src_map; + vm_offset_t new_src_start; + + /* + * We have to follow at least one sharing map. + */ + + new_size = (dst_entry->end - dst_entry->start); + + if (src_entry->is_a_map) { + new_src_map = src_entry->object.share_map; + new_src_start = src_entry->offset; + } + else { + new_src_map = src_map; + new_src_start = src_entry->start; + lock_set_recursive(&src_map->lock); + } + + if (dst_entry->is_a_map) { + vm_offset_t new_dst_end; + + new_dst_map = dst_entry->object.share_map; + new_dst_start = dst_entry->offset; + + /* + * Since the destination sharing entries + * will be merely deallocated, we can + * do that now, and replace the region + * with a null object. [This prevents + * splitting the source map to match + * the form of the destination map.] + * Note that we can only do so if the + * source and destination do not overlap. + */ + + new_dst_end = new_dst_start + new_size; + + if (new_dst_map != new_src_map) { + vm_map_lock(new_dst_map); + (void) vm_map_delete(new_dst_map, + new_dst_start, + new_dst_end); + (void) vm_map_insert(new_dst_map, + VM_OBJECT_NULL, + (vm_offset_t) 0, + new_dst_start, + new_dst_end); + vm_map_unlock(new_dst_map); + } + } + else { + new_dst_map = dst_map; + new_dst_start = dst_entry->start; + lock_set_recursive(&dst_map->lock); + } + + /* + * Recursively copy the sharing map. + */ + + (void) vm_map_copy(new_dst_map, new_src_map, + new_dst_start, new_size, new_src_start, + FALSE, FALSE); + + if (dst_map == new_dst_map) + lock_clear_recursive(&dst_map->lock); + if (src_map == new_src_map) + lock_clear_recursive(&src_map->lock); + } + + /* + * Update variables for next pass through the loop. + */ + + src_start = src_entry->end; + src_entry = src_entry->next; + dst_start = dst_entry->end; + dst_entry = dst_entry->next; + + /* + * If the source is to be destroyed, here is the + * place to do it. + */ + + if (src_destroy && src_map->is_main_map && + dst_map->is_main_map) + vm_map_entry_delete(src_map, src_entry->prev); + } + + /* + * Update the physical maps as appropriate + */ + + if (src_map->is_main_map && dst_map->is_main_map) { + if (src_destroy) + pmap_remove(src_map->pmap, src_addr, src_addr + len); + } + + /* + * Unlock the maps + */ + + Return: ; + + if (old_src_destroy) + vm_map_delete(src_map, src_addr, src_addr + len); + + vm_map_unlock(src_map); + if (src_map != dst_map) + vm_map_unlock(dst_map); + + return(result); +} + +/* + * vm_map_fork: + * + * Create and return a new map based on the old + * map, according to the inheritance values on the + * regions in that map. + * + * The source map must not be locked. + */ +vm_map_t vm_map_fork(old_map) + vm_map_t old_map; +{ + vm_map_t new_map; + vm_map_entry_t old_entry; + vm_map_entry_t new_entry; + pmap_t new_pmap; + + vm_map_lock(old_map); + + new_pmap = pmap_create((vm_size_t) 0); + new_map = vm_map_create(new_pmap, + old_map->min_offset, + old_map->max_offset, + old_map->entries_pageable); + + old_entry = old_map->header.next; + + while (old_entry != &old_map->header) { + if (old_entry->is_sub_map) + panic("vm_map_fork: encountered a submap"); + + switch (old_entry->inheritance) { + case VM_INHERIT_NONE: + break; + + case VM_INHERIT_SHARE: + /* + * If we don't already have a sharing map: + */ + + if (!old_entry->is_a_map) { + vm_map_t new_share_map; + vm_map_entry_t new_share_entry; + + /* + * Create a new sharing map + */ + + new_share_map = vm_map_create(PMAP_NULL, + old_entry->start, + old_entry->end, + TRUE); + new_share_map->is_main_map = FALSE; + + /* + * Create the only sharing entry from the + * old task map entry. + */ + + new_share_entry = + vm_map_entry_create(new_share_map); + *new_share_entry = *old_entry; + + /* + * Insert the entry into the new sharing + * map + */ + + vm_map_entry_link(new_share_map, + new_share_map->header.prev, + new_share_entry); + + /* + * Fix up the task map entry to refer + * to the sharing map now. + */ + + old_entry->is_a_map = TRUE; + old_entry->object.share_map = new_share_map; + old_entry->offset = old_entry->start; + } + + /* + * Clone the entry, referencing the sharing map. + */ + + new_entry = vm_map_entry_create(new_map); + *new_entry = *old_entry; + vm_map_reference(new_entry->object.share_map); + + /* + * Insert the entry into the new map -- we + * know we're inserting at the end of the new + * map. + */ + + vm_map_entry_link(new_map, new_map->header.prev, + new_entry); + + /* + * Update the physical map + */ + + pmap_copy(new_map->pmap, old_map->pmap, + new_entry->start, + (old_entry->end - old_entry->start), + old_entry->start); + break; + + case VM_INHERIT_COPY: + /* + * Clone the entry and link into the map. + */ + + new_entry = vm_map_entry_create(new_map); + *new_entry = *old_entry; + new_entry->wired_count = 0; + new_entry->object.vm_object = VM_OBJECT_NULL; + new_entry->is_a_map = FALSE; + vm_map_entry_link(new_map, new_map->header.prev, + new_entry); + if (old_entry->is_a_map) { + int check; + + check = vm_map_copy(new_map, + old_entry->object.share_map, + new_entry->start, + (vm_size_t)(new_entry->end - + new_entry->start), + old_entry->offset, + FALSE, FALSE); + if (check != KERN_SUCCESS) + printf("vm_map_fork: copy in share_map region failed\n"); + } + else { + vm_map_copy_entry(old_map, new_map, old_entry, + new_entry); + } + break; + } + old_entry = old_entry->next; + } + + new_map->size = old_map->size; + vm_map_unlock(old_map); + + return(new_map); +} + +/* + * vm_map_lookup: + * + * Finds the VM object, offset, and + * protection for a given virtual address in the + * specified map, assuming a page fault of the + * type specified. + * + * Leaves the map in question locked for read; return + * values are guaranteed until a vm_map_lookup_done + * call is performed. Note that the map argument + * is in/out; the returned map must be used in + * the call to vm_map_lookup_done. + * + * A handle (out_entry) is returned for use in + * vm_map_lookup_done, to make that fast. + * + * If a lookup is requested with "write protection" + * specified, the map may be changed to perform virtual + * copying operations, although the data referenced will + * remain the same. + */ +vm_map_lookup(var_map, vaddr, fault_type, out_entry, + object, offset, out_prot, wired, single_use) + vm_map_t *var_map; /* IN/OUT */ + register vm_offset_t vaddr; + register vm_prot_t fault_type; + + vm_map_entry_t *out_entry; /* OUT */ + vm_object_t *object; /* OUT */ + vm_offset_t *offset; /* OUT */ + vm_prot_t *out_prot; /* OUT */ + boolean_t *wired; /* OUT */ + boolean_t *single_use; /* OUT */ +{ + vm_map_t share_map; + vm_offset_t share_offset; + register vm_map_entry_t entry; + register vm_map_t map = *var_map; + register vm_prot_t prot; + register boolean_t su; + + RetryLookup: ; + + /* + * Lookup the faulting address. + */ + + vm_map_lock_read(map); + +#define RETURN(why) \ + { \ + vm_map_unlock_read(map); \ + return(why); \ + } + + /* + * If the map has an interesting hint, try it before calling + * full blown lookup routine. + */ + + simple_lock(&map->hint_lock); + entry = map->hint; + simple_unlock(&map->hint_lock); + + *out_entry = entry; + + if ((entry == &map->header) || + (vaddr < entry->start) || (vaddr >= entry->end)) { + vm_map_entry_t tmp_entry; + + /* + * Entry was either not a valid hint, or the vaddr + * was not contained in the entry, so do a full lookup. + */ + if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) + RETURN(KERN_INVALID_ADDRESS); + + entry = tmp_entry; + *out_entry = entry; + } + + /* + * Handle submaps. + */ + + if (entry->is_sub_map) { + vm_map_t old_map = map; + + *var_map = map = entry->object.sub_map; + vm_map_unlock_read(old_map); + goto RetryLookup; + } + + /* + * Check whether this task is allowed to have + * this page. + */ + + prot = entry->protection; + if ((fault_type & (prot)) != fault_type) + RETURN(KERN_PROTECTION_FAILURE); + + /* + * If this page is not pageable, we have to get + * it for all possible accesses. + */ + + if (*wired = (entry->wired_count != 0)) + prot = fault_type = entry->protection; + + /* + * If we don't already have a VM object, track + * it down. + */ + + if (su = !entry->is_a_map) { + share_map = map; + share_offset = vaddr; + } + else { + vm_map_entry_t share_entry; + + /* + * Compute the sharing map, and offset into it. + */ + + share_map = entry->object.share_map; + share_offset = (vaddr - entry->start) + entry->offset; + + /* + * Look for the backing store object and offset + */ + + vm_map_lock_read(share_map); + + if (!vm_map_lookup_entry(share_map, share_offset, + &share_entry)) { + vm_map_unlock_read(share_map); + RETURN(KERN_INVALID_ADDRESS); + } + entry = share_entry; + } + + /* + * If the entry was copy-on-write, we either ... + */ + + if (entry->needs_copy) { + /* + * If we want to write the page, we may as well + * handle that now since we've got the sharing + * map locked. + * + * If we don't need to write the page, we just + * demote the permissions allowed. + */ + + if (fault_type & VM_PROT_WRITE) { + /* + * Make a new object, and place it in the + * object chain. Note that no new references + * have appeared -- one just moved from the + * share map to the new object. + */ + + if (lock_read_to_write(&share_map->lock)) { + if (share_map != map) + vm_map_unlock_read(map); + goto RetryLookup; + } + + vm_object_shadow( + &entry->object.vm_object, + &entry->offset, + (vm_size_t) (entry->end - entry->start)); + + entry->needs_copy = FALSE; + + lock_write_to_read(&share_map->lock); + } + else { + /* + * We're attempting to read a copy-on-write + * page -- don't allow writes. + */ + + prot &= (~VM_PROT_WRITE); + } + } + + /* + * Create an object if necessary. + */ + if (entry->object.vm_object == VM_OBJECT_NULL) { + + if (lock_read_to_write(&share_map->lock)) { + if (share_map != map) + vm_map_unlock_read(map); + goto RetryLookup; + } + + entry->object.vm_object = vm_object_allocate( + (vm_size_t)(entry->end - entry->start)); + entry->offset = 0; + lock_write_to_read(&share_map->lock); + } + + /* + * Return the object/offset from this entry. If the entry + * was copy-on-write or empty, it has been fixed up. + */ + + *offset = (share_offset - entry->start) + entry->offset; + *object = entry->object.vm_object; + + /* + * Return whether this is the only map sharing this data. + */ + + if (!su) { + simple_lock(&share_map->ref_lock); + su = (share_map->ref_count == 1); + simple_unlock(&share_map->ref_lock); + } + + *out_prot = prot; + *single_use = su; + + return(KERN_SUCCESS); + +#undef RETURN +} + +/* + * vm_map_lookup_done: + * + * Releases locks acquired by a vm_map_lookup + * (according to the handle returned by that lookup). + */ + +void vm_map_lookup_done(map, entry) + register vm_map_t map; + vm_map_entry_t entry; +{ + /* + * If this entry references a map, unlock it first. + */ + + if (entry->is_a_map) + vm_map_unlock_read(entry->object.share_map); + + /* + * Unlock the main-level map + */ + + vm_map_unlock_read(map); +} + +/* + * Routine: vm_map_simplify + * Purpose: + * Attempt to simplify the map representation in + * the vicinity of the given starting address. + * Note: + * This routine is intended primarily to keep the + * kernel maps more compact -- they generally don't + * benefit from the "expand a map entry" technology + * at allocation time because the adjacent entry + * is often wired down. + */ +void vm_map_simplify(map, start) + vm_map_t map; + vm_offset_t start; +{ + vm_map_entry_t this_entry; + vm_map_entry_t prev_entry; + + vm_map_lock(map); + if ( + (vm_map_lookup_entry(map, start, &this_entry)) && + ((prev_entry = this_entry->prev) != &map->header) && + + (prev_entry->end == start) && + (map->is_main_map) && + + (prev_entry->is_a_map == FALSE) && + (prev_entry->is_sub_map == FALSE) && + + (this_entry->is_a_map == FALSE) && + (this_entry->is_sub_map == FALSE) && + + (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->protection == this_entry->protection) && + (prev_entry->max_protection == this_entry->max_protection) && + (prev_entry->wired_count == this_entry->wired_count) && + + (prev_entry->copy_on_write == this_entry->copy_on_write) && + (prev_entry->needs_copy == this_entry->needs_copy) && + + (prev_entry->object.vm_object == this_entry->object.vm_object) && + ((prev_entry->offset + (prev_entry->end - prev_entry->start)) + == this_entry->offset) + ) { + if (map->first_free == this_entry) + map->first_free = prev_entry; + + SAVE_HINT(map, prev_entry); + vm_map_entry_unlink(map, this_entry); + prev_entry->end = this_entry->end; + vm_object_deallocate(this_entry->object.vm_object); + vm_map_entry_dispose(map, this_entry); + } + vm_map_unlock(map); +} + +/* + * vm_map_print: [ debug ] + */ +void vm_map_print(map, full) + register vm_map_t map; + boolean_t full; +{ + register vm_map_entry_t entry; + extern int indent; + + iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", + (map->is_main_map ? "Task" : "Share"), + (int) map, (int) (map->pmap), map->ref_count, map->nentries, + map->timestamp); + + if (!full && indent) + return; + + indent += 2; + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", + (int) entry, (int) entry->start, (int) entry->end); + if (map->is_main_map) { + static char *inheritance_name[4] = + { "share", "copy", "none", "donate_copy"}; + printf("prot=%x/%x/%s, ", + entry->protection, + entry->max_protection, + inheritance_name[entry->inheritance]); + if (entry->wired_count != 0) + printf("wired, "); + } + + if (entry->is_a_map || entry->is_sub_map) { + printf("share=0x%x, offset=0x%x\n", + (int) entry->object.share_map, + (int) entry->offset); + if ((entry->prev == &map->header) || + (!entry->prev->is_a_map) || + (entry->prev->object.share_map != + entry->object.share_map)) { + indent += 2; + vm_map_print(entry->object.share_map, full); + indent -= 2; + } + + } + else { + printf("object=0x%x, offset=0x%x", + (int) entry->object.vm_object, + (int) entry->offset); + if (entry->copy_on_write) + printf(", copy (%s)", + entry->needs_copy ? "needed" : "done"); + printf("\n"); + + if ((entry->prev == &map->header) || + (entry->prev->is_a_map) || + (entry->prev->object.vm_object != + entry->object.vm_object)) { + indent += 2; + vm_object_print(entry->object.vm_object, full); + indent -= 2; + } + } + } + indent -= 2; +} diff --git a/usr/src/sys/vm/vm_map.h b/usr/src/sys/vm/vm_map.h new file mode 100644 index 0000000000..5e552b266e --- /dev/null +++ b/usr/src/sys/vm/vm_map.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_map.h 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory map module definitions. + */ + +#ifndef _VM_MAP_ +#define _VM_MAP_ + +#ifdef KERNEL +#include "types.h" +#include "lock.h" +#include "../vm/pmap.h" +#include "../vm/vm_prot.h" +#include "../vm/vm_inherit.h" +#include "../vm/vm_object.h" +#else +#include +#include +#include +#include +#include +#include +#endif + +/* + * Types defined: + * + * vm_map_t the high-level address map data structure. + * vm_map_entry_t an entry in an address map. + * vm_map_version_t a timestamp of a map, for use with vm_map_lookup + */ + +/* + * Objects which live in maps may be either VM objects, or + * another map (called a "sharing map") which denotes read-write + * sharing with other maps. + */ + +union vm_map_object { + struct vm_object *vm_object; /* object object */ + struct vm_map *share_map; /* share map */ + struct vm_map *sub_map; /* belongs to another map */ +}; + +typedef union vm_map_object vm_map_object_t; + +/* + * Address map entries consist of start and end addresses, + * a VM object (or sharing map) and offset into that object, + * and user-exported inheritance and protection information. + * Also included is control information for virtual copy operations. + */ +struct vm_map_entry { + struct vm_map_entry *prev; /* previous entry */ + struct vm_map_entry *next; /* next entry */ + vm_offset_t start; /* start address */ + vm_offset_t end; /* end address */ + union vm_map_object object; /* object I point to */ + vm_offset_t offset; /* offset into object */ + boolean_t is_a_map; /* Is "object" a map? */ + boolean_t is_sub_map; /* Is "object" a submap? */ + /* Only in sharing maps: */ + boolean_t copy_on_write; /* is data copy-on-write */ + boolean_t needs_copy; /* does object need to be copied */ + /* Only in task maps: */ + vm_prot_t protection; /* protection code */ + vm_prot_t max_protection; /* maximum protection */ + vm_inherit_t inheritance; /* inheritance */ + int wired_count; /* can be paged if = 0 */ +}; + +typedef struct vm_map_entry *vm_map_entry_t; + +#define VM_MAP_ENTRY_NULL ((vm_map_entry_t) 0) + +/* + * Maps are doubly-linked lists of map entries, kept sorted + * by address. A single hint is provided to start + * searches again from the last successful search, + * insertion, or removal. + */ +struct vm_map { + lock_data_t lock; /* Lock for map data */ + struct vm_map_entry header; /* List of entries */ + int nentries; /* Number of entries */ + pmap_t pmap; /* Physical map */ + vm_size_t size; /* virtual size */ + boolean_t is_main_map; /* Am I a main map? */ + int ref_count; /* Reference count */ + simple_lock_data_t ref_lock; /* Lock for ref_count field */ + vm_map_entry_t hint; /* hint for quick lookups */ + simple_lock_data_t hint_lock; /* lock for hint storage */ + vm_map_entry_t first_free; /* First free space hint */ + boolean_t entries_pageable; /* map entries pageable?? */ + unsigned int timestamp; /* Version number */ +#define min_offset header.start +#define max_offset header.end +}; + +typedef struct vm_map *vm_map_t; + +#define VM_MAP_NULL ((vm_map_t) 0) + +/* + * Map versions are used to validate a previous lookup attempt. + * + * Since lookup operations may involve both a main map and + * a sharing map, it is necessary to have a timestamp from each. + * [If the main map timestamp has changed, the share_map and + * associated timestamp are no longer valid; the map version + * does not include a reference for the imbedded share_map.] + */ +typedef struct { + int main_timestamp; + vm_map_t share_map; + int share_timestamp; +} vm_map_version_t; + +/* + * Macros: vm_map_lock, etc. + * Function: + * Perform locking on the data portion of a map. + */ + +#define vm_map_lock(map) { lock_write(&(map)->lock); (map)->timestamp++; } +#define vm_map_unlock(map) lock_write_done(&(map)->lock) +#define vm_map_lock_read(map) lock_read(&(map)->lock) +#define vm_map_unlock_read(map) lock_read_done(&(map)->lock) + +/* + * Exported procedures that operate on vm_map_t. + */ + +void vm_map_init(); +vm_map_t vm_map_create(); +void vm_map_deallocate(); +void vm_map_reference(); +int vm_map_find(); +int vm_map_remove(); +int vm_map_lookup(); +void vm_map_lookup_done(); +int vm_map_protect(); +int vm_map_inherit(); +int vm_map_copy(); +vm_map_t vm_map_fork(); +void vm_map_print(); +void vm_map_copy_entry(); +boolean_t vm_map_verify(); +void vm_map_verify_done(); + +/* + * Functions implemented as macros + */ +#define vm_map_min(map) ((map)->min_offset) +#define vm_map_max(map) ((map)->max_offset) +#define vm_map_pmap(map) ((map)->pmap) + +/* XXX: number of kernel maps and entries to statically allocate */ +#define MAX_KMAP 10 +#define MAX_KMAPENT 500 + +#endif _VM_MAP_ diff --git a/usr/src/sys/vm/vm_object.c b/usr/src/sys/vm/vm_object.c new file mode 100644 index 0000000000..3a49d4253e --- /dev/null +++ b/usr/src/sys/vm/vm_object.c @@ -0,0 +1,1406 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_object.c 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory object module. + */ + +#include "param.h" +#include "malloc.h" +#include "../vm/vm_param.h" +#include "lock.h" +#include "../vm/vm_page.h" +#include "../vm/vm_map.h" +#include "../vm/vm_object.h" + +/* + * Virtual memory objects maintain the actual data + * associated with allocated virtual memory. A given + * page of memory exists within exactly one object. + * + * An object is only deallocated when all "references" + * are given up. Only one "reference" to a given + * region of an object should be writeable. + * + * Associated with each object is a list of all resident + * memory pages belonging to that object; this list is + * maintained by the "vm_page" module, and locked by the object's + * lock. + * + * Each object also records a "pager" routine which is + * used to retrieve (and store) pages to the proper backing + * storage. In addition, objects may be backed by other + * objects from which they were virtual-copied. + * + * The only items within the object structure which are + * modified after time of creation are: + * reference count locked by object's lock + * pager routine locked by object's lock + * + */ + +struct vm_object kernel_object_store; +struct vm_object kmem_object_store; + +#define VM_OBJECT_HASH_COUNT 157 + +int vm_cache_max = 100; /* can patch if necessary */ +queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; + +long object_collapses = 0; +long object_bypasses = 0; + +/* + * vm_object_init: + * + * Initialize the VM objects module. + */ +void vm_object_init() +{ + register int i; + + queue_init(&vm_object_cached_list); + queue_init(&vm_object_list); + vm_object_count = 0; + simple_lock_init(&vm_cache_lock); + simple_lock_init(&vm_object_list_lock); + + for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) + queue_init(&vm_object_hashtable[i]); + + kernel_object = &kernel_object_store; + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, + kernel_object); + + kmem_object = &kmem_object_store; + _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object); +} + +/* + * vm_object_allocate: + * + * Returns a new object with the given size. + */ + +vm_object_t vm_object_allocate(size) + vm_size_t size; +{ + register vm_object_t result; + + result = (vm_object_t) + malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK); + + _vm_object_allocate(size, result); + + return(result); +} + +_vm_object_allocate(size, object) + vm_size_t size; + register vm_object_t object; +{ + queue_init(&object->memq); + vm_object_lock_init(object); + object->ref_count = 1; + object->resident_page_count = 0; + object->size = size; + object->can_persist = FALSE; + object->paging_in_progress = 0; + object->copy = VM_OBJECT_NULL; + + /* + * Object starts out read-write, with no pager. + */ + + object->pager = vm_pager_null; + object->pager_ready = FALSE; + object->internal = TRUE; /* vm_allocate_with_pager will reset */ + object->paging_offset = 0; + object->shadow = VM_OBJECT_NULL; + object->shadow_offset = (vm_offset_t) 0; + + simple_lock(&vm_object_list_lock); + queue_enter(&vm_object_list, object, vm_object_t, object_list); + vm_object_count++; + simple_unlock(&vm_object_list_lock); +} + +/* + * vm_object_reference: + * + * Gets another reference to the given object. + */ +void vm_object_reference(object) + register vm_object_t object; +{ + if (object == VM_OBJECT_NULL) + return; + + vm_object_lock(object); + object->ref_count++; + vm_object_unlock(object); +} + +/* + * vm_object_deallocate: + * + * Release a reference to the specified object, + * gained either through a vm_object_allocate + * or a vm_object_reference call. When all references + * are gone, storage associated with this object + * may be relinquished. + * + * No object may be locked. + */ +void vm_object_deallocate(object) + register vm_object_t object; +{ + vm_object_t temp; + + while (object != VM_OBJECT_NULL) { + + /* + * The cache holds a reference (uncounted) to + * the object; we must lock it before removing + * the object. + */ + + vm_object_cache_lock(); + + /* + * Lose the reference + */ + vm_object_lock(object); + if (--(object->ref_count) != 0) { + + /* + * If there are still references, then + * we are done. + */ + vm_object_unlock(object); + vm_object_cache_unlock(); + return; + } + + /* + * See if this object can persist. If so, enter + * it in the cache, then deactivate all of its + * pages. + */ + + if (object->can_persist) { + + queue_enter(&vm_object_cached_list, object, + vm_object_t, cached_list); + vm_object_cached++; + vm_object_cache_unlock(); + + vm_object_deactivate_pages(object); + vm_object_unlock(object); + + vm_object_cache_trim(); + return; + } + + /* + * Make sure no one can look us up now. + */ + vm_object_remove(object->pager); + vm_object_cache_unlock(); + + temp = object->shadow; + vm_object_terminate(object); + /* unlocks and deallocates object */ + object = temp; + } +} + + +/* + * vm_object_terminate actually destroys the specified object, freeing + * up all previously used resources. + * + * The object must be locked. + */ +void vm_object_terminate(object) + register vm_object_t object; +{ + register vm_page_t p; + vm_object_t shadow_object; + + /* + * Detach the object from its shadow if we are the shadow's + * copy. + */ + if ((shadow_object = object->shadow) != VM_OBJECT_NULL) { + vm_object_lock(shadow_object); + if (shadow_object->copy == object) + shadow_object->copy = VM_OBJECT_NULL; +#if 0 + else if (shadow_object->copy != VM_OBJECT_NULL) + panic("vm_object_terminate: copy/shadow inconsistency"); +#endif + vm_object_unlock(shadow_object); + } + + /* + * Wait until the pageout daemon is through + * with the object. + */ + + while (object->paging_in_progress != 0) { + vm_object_sleep(object, object, FALSE); + vm_object_lock(object); + } + + + /* + * While the paging system is locked, + * pull the object's pages off the active + * and inactive queues. This keeps the + * pageout daemon from playing with them + * during vm_pager_deallocate. + * + * We can't free the pages yet, because the + * object's pager may have to write them out + * before deallocating the paging space. + */ + + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + VM_PAGE_CHECK(p); + + vm_page_lock_queues(); + if (p->active) { + queue_remove(&vm_page_queue_active, p, vm_page_t, + pageq); + p->active = FALSE; + vm_page_active_count--; + } + + if (p->inactive) { + queue_remove(&vm_page_queue_inactive, p, vm_page_t, + pageq); + p->inactive = FALSE; + vm_page_inactive_count--; + } + vm_page_unlock_queues(); + p = (vm_page_t) queue_next(&p->listq); + } + + vm_object_unlock(object); + + if (object->paging_in_progress != 0) + panic("vm_object_deallocate: pageout in progress"); + + /* + * Clean and free the pages, as appropriate. + * All references to the object are gone, + * so we don't need to lock it. + */ + + if (!object->internal) { + vm_object_lock(object); + vm_object_page_clean(object, 0, 0); + vm_object_unlock(object); + } + while (!queue_empty(&object->memq)) { + p = (vm_page_t) queue_first(&object->memq); + + VM_PAGE_CHECK(p); + + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + } + + /* + * Let the pager know object is dead. + */ + + if (object->pager != vm_pager_null) + vm_pager_deallocate(object->pager); + + + simple_lock(&vm_object_list_lock); + queue_remove(&vm_object_list, object, vm_object_t, object_list); + vm_object_count--; + simple_unlock(&vm_object_list_lock); + + /* + * Free the space for the object. + */ + + free((caddr_t)object, M_VMOBJ); +} + +/* + * vm_object_page_clean + * + * Clean all dirty pages in the specified range of object. + * Leaves page on whatever queue it is currently on. + * + * Odd semantics: if start == end, we clean everything. + * + * The object must be locked. + */ +vm_object_page_clean(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; +{ + register vm_page_t p; + + if (object->pager == vm_pager_null) + return; + +again: + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + if (start == end || + p->offset >= start && p->offset < end) { + if (p->clean && pmap_is_modified(VM_PAGE_TO_PHYS(p))) + p->clean = FALSE; + pmap_remove_all(VM_PAGE_TO_PHYS(p)); + if (!p->clean) { + p->busy = TRUE; + object->paging_in_progress++; + vm_object_unlock(object); + (void) vm_pager_put(object->pager, p, TRUE); + vm_object_lock(object); + object->paging_in_progress--; + p->busy = FALSE; + PAGE_WAKEUP(p); + goto again; + } + } + p = (vm_page_t) queue_next(&p->listq); + } +} + +/* + * vm_object_deactivate_pages + * + * Deactivate all pages in the specified object. (Keep its pages + * in memory even though it is no longer referenced.) + * + * The object must be locked. + */ +vm_object_deactivate_pages(object) + register vm_object_t object; +{ + register vm_page_t p, next; + + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + next = (vm_page_t) queue_next(&p->listq); + vm_page_lock_queues(); + vm_page_deactivate(p); + vm_page_unlock_queues(); + p = next; + } +} + +/* + * Trim the object cache to size. + */ +vm_object_cache_trim() +{ + register vm_object_t object; + + vm_object_cache_lock(); + while (vm_object_cached > vm_cache_max) { + object = (vm_object_t) queue_first(&vm_object_cached_list); + vm_object_cache_unlock(); + + if (object != vm_object_lookup(object->pager)) + panic("vm_object_deactivate: I'm sooo confused."); + + pager_cache(object, FALSE); + + vm_object_cache_lock(); + } + vm_object_cache_unlock(); +} + + +/* + * vm_object_shutdown() + * + * Shut down the object system. Unfortunately, while we + * may be trying to do this, init is happily waiting for + * processes to exit, and therefore will be causing some objects + * to be deallocated. To handle this, we gain a fake reference + * to all objects we release paging areas for. This will prevent + * a duplicate deallocation. This routine is probably full of + * race conditions! + */ + +void vm_object_shutdown() +{ + register vm_object_t object; + + /* + * Clean up the object cache *before* we screw up the reference + * counts on all of the objects. + */ + + vm_object_cache_clear(); + + printf("free paging spaces: "); + + /* + * First we gain a reference to each object so that + * no one else will deallocate them. + */ + + simple_lock(&vm_object_list_lock); + object = (vm_object_t) queue_first(&vm_object_list); + while (!queue_end(&vm_object_list, (queue_entry_t) object)) { + vm_object_reference(object); + object = (vm_object_t) queue_next(&object->object_list); + } + simple_unlock(&vm_object_list_lock); + + /* + * Now we deallocate all the paging areas. We don't need + * to lock anything because we've reduced to a single + * processor while shutting down. This also assumes that + * no new objects are being created. + */ + + object = (vm_object_t) queue_first(&vm_object_list); + while (!queue_end(&vm_object_list, (queue_entry_t) object)) { + if (object->pager != vm_pager_null) + vm_pager_deallocate(object->pager); + object = (vm_object_t) queue_next(&object->object_list); + printf("."); + } + printf("done.\n"); +} + +/* + * vm_object_pmap_copy: + * + * Makes all physical pages in the specified + * object range copy-on-write. No writeable + * references to these pages should remain. + * + * The object must *not* be locked. + */ +void vm_object_pmap_copy(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; +{ + register vm_page_t p; + + if (object == VM_OBJECT_NULL) + return; + + vm_object_lock(object); + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + if ((start <= p->offset) && (p->offset < end)) { + if (!p->copy_on_write) { + pmap_copy_on_write(VM_PAGE_TO_PHYS(p)); + p->copy_on_write = TRUE; + } + } + p = (vm_page_t) queue_next(&p->listq); + } + vm_object_unlock(object); +} + +/* + * vm_object_pmap_remove: + * + * Removes all physical pages in the specified + * object range from all physical maps. + * + * The object must *not* be locked. + */ +void vm_object_pmap_remove(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; +{ + register vm_page_t p; + + if (object == VM_OBJECT_NULL) + return; + + vm_object_lock(object); + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + if ((start <= p->offset) && (p->offset < end)) { + pmap_remove_all(VM_PAGE_TO_PHYS(p)); + } + p = (vm_page_t) queue_next(&p->listq); + } + vm_object_unlock(object); +} + +/* + * vm_object_copy: + * + * Create a new object which is a copy of an existing + * object, and mark all of the pages in the existing + * object 'copy-on-write'. The new object has one reference. + * Returns the new object. + * + * May defer the copy until later if the object is not backed + * up by a non-default pager. + */ +void vm_object_copy(src_object, src_offset, size, + dst_object, dst_offset, src_needs_copy) + register vm_object_t src_object; + vm_offset_t src_offset; + vm_size_t size; + vm_object_t *dst_object; /* OUT */ + vm_offset_t *dst_offset; /* OUT */ + boolean_t *src_needs_copy; /* OUT */ +{ + register vm_object_t new_copy; + register vm_object_t old_copy; + vm_offset_t new_start, new_end; + + register vm_page_t p; + + if (src_object == VM_OBJECT_NULL) { + /* + * Nothing to copy + */ + *dst_object = VM_OBJECT_NULL; + *dst_offset = 0; + *src_needs_copy = FALSE; + return; + } + + /* + * If the object's pager is null_pager or the + * default pager, we don't have to make a copy + * of it. Instead, we set the needs copy flag and + * make a shadow later. + */ + + vm_object_lock(src_object); + if (src_object->pager == vm_pager_null || + src_object->internal) { + + /* + * Make another reference to the object + */ + src_object->ref_count++; + + /* + * Mark all of the pages copy-on-write. + */ + for (p = (vm_page_t) queue_first(&src_object->memq); + !queue_end(&src_object->memq, (queue_entry_t)p); + p = (vm_page_t) queue_next(&p->listq)) { + if (src_offset <= p->offset && + p->offset < src_offset + size) + p->copy_on_write = TRUE; + } + vm_object_unlock(src_object); + + *dst_object = src_object; + *dst_offset = src_offset; + + /* + * Must make a shadow when write is desired + */ + *src_needs_copy = TRUE; + return; + } + + /* + * Try to collapse the object before copying it. + */ + vm_object_collapse(src_object); + + /* + * If the object has a pager, the pager wants to + * see all of the changes. We need a copy-object + * for the changed pages. + * + * If there is a copy-object, and it is empty, + * no changes have been made to the object since the + * copy-object was made. We can use the same copy- + * object. + */ + + Retry1: + old_copy = src_object->copy; + if (old_copy != VM_OBJECT_NULL) { + /* + * Try to get the locks (out of order) + */ + if (!vm_object_lock_try(old_copy)) { + vm_object_unlock(src_object); + + /* should spin a bit here... */ + vm_object_lock(src_object); + goto Retry1; + } + + if (old_copy->resident_page_count == 0 && + old_copy->pager == vm_pager_null) { + /* + * Return another reference to + * the existing copy-object. + */ + old_copy->ref_count++; + vm_object_unlock(old_copy); + vm_object_unlock(src_object); + *dst_object = old_copy; + *dst_offset = src_offset; + *src_needs_copy = FALSE; + return; + } + vm_object_unlock(old_copy); + } + vm_object_unlock(src_object); + + /* + * If the object has a pager, the pager wants + * to see all of the changes. We must make + * a copy-object and put the changed pages there. + * + * The copy-object is always made large enough to + * completely shadow the original object, since + * it may have several users who want to shadow + * the original object at different points. + */ + + new_copy = vm_object_allocate(src_object->size); + + Retry2: + vm_object_lock(src_object); + /* + * Copy object may have changed while we were unlocked + */ + old_copy = src_object->copy; + if (old_copy != VM_OBJECT_NULL) { + /* + * Try to get the locks (out of order) + */ + if (!vm_object_lock_try(old_copy)) { + vm_object_unlock(src_object); + goto Retry2; + } + + /* + * Consistency check + */ + if (old_copy->shadow != src_object || + old_copy->shadow_offset != (vm_offset_t) 0) + panic("vm_object_copy: copy/shadow inconsistency"); + + /* + * Make the old copy-object shadow the new one. + * It will receive no more pages from the original + * object. + */ + + src_object->ref_count--; /* remove ref. from old_copy */ + old_copy->shadow = new_copy; + new_copy->ref_count++; /* locking not needed - we + have the only pointer */ + vm_object_unlock(old_copy); /* done with old_copy */ + } + + new_start = (vm_offset_t) 0; /* always shadow original at 0 */ + new_end = (vm_offset_t) new_copy->size; /* for the whole object */ + + /* + * Point the new copy at the existing object. + */ + + new_copy->shadow = src_object; + new_copy->shadow_offset = new_start; + src_object->ref_count++; + src_object->copy = new_copy; + + /* + * Mark all the affected pages of the existing object + * copy-on-write. + */ + p = (vm_page_t) queue_first(&src_object->memq); + while (!queue_end(&src_object->memq, (queue_entry_t) p)) { + if ((new_start <= p->offset) && (p->offset < new_end)) { + p->copy_on_write = TRUE; + } + p = (vm_page_t) queue_next(&p->listq); + } + + vm_object_unlock(src_object); + + *dst_object = new_copy; + *dst_offset = src_offset - new_start; + *src_needs_copy = FALSE; +} + +/* + * vm_object_shadow: + * + * Create a new object which is backed by the + * specified existing object range. The source + * object reference is deallocated. + * + * The new object and offset into that object + * are returned in the source parameters. + */ + +void vm_object_shadow(object, offset, length) + vm_object_t *object; /* IN/OUT */ + vm_offset_t *offset; /* IN/OUT */ + vm_size_t length; +{ + register vm_object_t source; + register vm_object_t result; + + source = *object; + + /* + * Allocate a new object with the given length + */ + + if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL) + panic("vm_object_shadow: no object for shadowing"); + + /* + * The new object shadows the source object, adding + * a reference to it. Our caller changes his reference + * to point to the new object, removing a reference to + * the source object. Net result: no change of reference + * count. + */ + result->shadow = source; + + /* + * Store the offset into the source object, + * and fix up the offset into the new object. + */ + + result->shadow_offset = *offset; + + /* + * Return the new things + */ + + *offset = 0; + *object = result; +} + +/* + * Set the specified object's pager to the specified pager. + */ + +void vm_object_setpager(object, pager, paging_offset, + read_only) + vm_object_t object; + vm_pager_t pager; + vm_offset_t paging_offset; + boolean_t read_only; +{ +#ifdef lint + read_only++; /* No longer used */ +#endif lint + + vm_object_lock(object); /* XXX ? */ + object->pager = pager; + object->paging_offset = paging_offset; + vm_object_unlock(object); /* XXX ? */ +} + +/* + * vm_object_hash hashes the pager/id pair. + */ + +#define vm_object_hash(pager) \ + (((unsigned)pager)%VM_OBJECT_HASH_COUNT) + +/* + * vm_object_lookup looks in the object cache for an object with the + * specified pager and paging id. + */ + +vm_object_t vm_object_lookup(pager) + vm_pager_t pager; +{ + register queue_t bucket; + register vm_object_hash_entry_t entry; + vm_object_t object; + + bucket = &vm_object_hashtable[vm_object_hash(pager)]; + + vm_object_cache_lock(); + + entry = (vm_object_hash_entry_t) queue_first(bucket); + while (!queue_end(bucket, (queue_entry_t) entry)) { + object = entry->object; + if (object->pager == pager) { + vm_object_lock(object); + if (object->ref_count == 0) { + queue_remove(&vm_object_cached_list, object, + vm_object_t, cached_list); + vm_object_cached--; + } + object->ref_count++; + vm_object_unlock(object); + vm_object_cache_unlock(); + return(object); + } + entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links); + } + + vm_object_cache_unlock(); + return(VM_OBJECT_NULL); +} + +/* + * vm_object_enter enters the specified object/pager/id into + * the hash table. + */ + +void vm_object_enter(object, pager) + vm_object_t object; + vm_pager_t pager; +{ + register queue_t bucket; + register vm_object_hash_entry_t entry; + + /* + * We don't cache null objects, and we can't cache + * objects with the null pager. + */ + + if (object == VM_OBJECT_NULL) + return; + if (pager == vm_pager_null) + return; + + bucket = &vm_object_hashtable[vm_object_hash(pager)]; + entry = (vm_object_hash_entry_t) + malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK); + entry->object = object; + object->can_persist = TRUE; + + vm_object_cache_lock(); + queue_enter(bucket, entry, vm_object_hash_entry_t, hash_links); + vm_object_cache_unlock(); +} + +/* + * vm_object_remove: + * + * Remove the pager from the hash table. + * Note: This assumes that the object cache + * is locked. XXX this should be fixed + * by reorganizing vm_object_deallocate. + */ +vm_object_remove(pager) + register vm_pager_t pager; +{ + register queue_t bucket; + register vm_object_hash_entry_t entry; + register vm_object_t object; + + bucket = &vm_object_hashtable[vm_object_hash(pager)]; + + entry = (vm_object_hash_entry_t) queue_first(bucket); + while (!queue_end(bucket, (queue_entry_t) entry)) { + object = entry->object; + if (object->pager == pager) { + queue_remove(bucket, entry, vm_object_hash_entry_t, + hash_links); + free((caddr_t)entry, M_VMOBJHASH); + break; + } + entry = (vm_object_hash_entry_t) queue_next(&entry->hash_links); + } +} + +/* + * vm_object_cache_clear removes all objects from the cache. + * + */ + +void vm_object_cache_clear() +{ + register vm_object_t object; + + /* + * Remove each object in the cache by scanning down the + * list of cached objects. + */ + vm_object_cache_lock(); + while (!queue_empty(&vm_object_cached_list)) { + object = (vm_object_t) queue_first(&vm_object_cached_list); + vm_object_cache_unlock(); + + /* + * Note: it is important that we use vm_object_lookup + * to gain a reference, and not vm_object_reference, because + * the logic for removing an object from the cache lies in + * lookup. + */ + if (object != vm_object_lookup(object->pager)) + panic("vm_object_cache_clear: I'm sooo confused."); + pager_cache(object, FALSE); + + vm_object_cache_lock(); + } + vm_object_cache_unlock(); +} + +boolean_t vm_object_collapse_allowed = TRUE; +/* + * vm_object_collapse: + * + * Collapse an object with the object backing it. + * Pages in the backing object are moved into the + * parent, and the backing object is deallocated. + * + * Requires that the object be locked and the page + * queues be unlocked. + * + */ +void vm_object_collapse(object) + register vm_object_t object; + +{ + register vm_object_t backing_object; + register vm_offset_t backing_offset; + register vm_size_t size; + register vm_offset_t new_offset; + register vm_page_t p, pp; + + if (!vm_object_collapse_allowed) + return; + + while (TRUE) { + /* + * Verify that the conditions are right for collapse: + * + * The object exists and no pages in it are currently + * being paged out (or have ever been paged out). + */ + if (object == VM_OBJECT_NULL || + object->paging_in_progress != 0 || + object->pager != vm_pager_null) + return; + + /* + * There is a backing object, and + */ + + if ((backing_object = object->shadow) == VM_OBJECT_NULL) + return; + + vm_object_lock(backing_object); + /* + * ... + * The backing object is not read_only, + * and no pages in the backing object are + * currently being paged out. + * The backing object is internal. + */ + + if (!backing_object->internal || + backing_object->paging_in_progress != 0) { + vm_object_unlock(backing_object); + return; + } + + /* + * The backing object can't be a copy-object: + * the shadow_offset for the copy-object must stay + * as 0. Furthermore (for the 'we have all the + * pages' case), if we bypass backing_object and + * just shadow the next object in the chain, old + * pages from that object would then have to be copied + * BOTH into the (former) backing_object and into the + * parent object. + */ + if (backing_object->shadow != VM_OBJECT_NULL && + backing_object->shadow->copy != VM_OBJECT_NULL) { + vm_object_unlock(backing_object); + return; + } + + /* + * We know that we can either collapse the backing + * object (if the parent is the only reference to + * it) or (perhaps) remove the parent's reference + * to it. + */ + + backing_offset = object->shadow_offset; + size = object->size; + + /* + * If there is exactly one reference to the backing + * object, we can collapse it into the parent. + */ + + if (backing_object->ref_count == 1) { + + /* + * We can collapse the backing object. + * + * Move all in-memory pages from backing_object + * to the parent. Pages that have been paged out + * will be overwritten by any of the parent's + * pages that shadow them. + */ + + while (!queue_empty(&backing_object->memq)) { + + p = (vm_page_t) + queue_first(&backing_object->memq); + + new_offset = (p->offset - backing_offset); + + /* + * If the parent has a page here, or if + * this page falls outside the parent, + * dispose of it. + * + * Otherwise, move it as planned. + */ + + if (p->offset < backing_offset || + new_offset >= size) { + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + } else { + pp = vm_page_lookup(object, new_offset); + if (pp != VM_PAGE_NULL && !pp->fake) { + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + } + else { + if (pp) { + /* may be someone waiting for it */ + PAGE_WAKEUP(pp); + vm_page_lock_queues(); + vm_page_free(pp); + vm_page_unlock_queues(); + } + vm_page_rename(p, object, new_offset); + } + } + } + + /* + * Move the pager from backing_object to object. + * + * XXX We're only using part of the paging space + * for keeps now... we ought to discard the + * unused portion. + */ + + object->pager = backing_object->pager; + object->paging_offset += backing_offset; + + backing_object->pager = vm_pager_null; + + /* + * Object now shadows whatever backing_object did. + * Note that the reference to backing_object->shadow + * moves from within backing_object to within object. + */ + + object->shadow = backing_object->shadow; + object->shadow_offset += backing_object->shadow_offset; + if (object->shadow != VM_OBJECT_NULL && + object->shadow->copy != VM_OBJECT_NULL) { + panic("vm_object_collapse: we collapsed a copy-object!"); + } + /* + * Discard backing_object. + * + * Since the backing object has no pages, no + * pager left, and no object references within it, + * all that is necessary is to dispose of it. + */ + + vm_object_unlock(backing_object); + + simple_lock(&vm_object_list_lock); + queue_remove(&vm_object_list, backing_object, + vm_object_t, object_list); + vm_object_count--; + simple_unlock(&vm_object_list_lock); + + free((caddr_t)backing_object, M_VMOBJ); + + object_collapses++; + } + else { + /* + * If all of the pages in the backing object are + * shadowed by the parent object, the parent + * object no longer has to shadow the backing + * object; it can shadow the next one in the + * chain. + * + * The backing object must not be paged out - we'd + * have to check all of the paged-out pages, as + * well. + */ + + if (backing_object->pager != vm_pager_null) { + vm_object_unlock(backing_object); + return; + } + + /* + * Should have a check for a 'small' number + * of pages here. + */ + + p = (vm_page_t) queue_first(&backing_object->memq); + while (!queue_end(&backing_object->memq, + (queue_entry_t) p)) { + + new_offset = (p->offset - backing_offset); + + /* + * If the parent has a page here, or if + * this page falls outside the parent, + * keep going. + * + * Otherwise, the backing_object must be + * left in the chain. + */ + + if (p->offset >= backing_offset && + new_offset <= size && + ((pp = vm_page_lookup(object, new_offset)) + == VM_PAGE_NULL || + pp->fake)) { + /* + * Page still needed. + * Can't go any further. + */ + vm_object_unlock(backing_object); + return; + } + p = (vm_page_t) queue_next(&p->listq); + } + + /* + * Make the parent shadow the next object + * in the chain. Deallocating backing_object + * will not remove it, since its reference + * count is at least 2. + */ + + vm_object_reference(object->shadow = backing_object->shadow); + object->shadow_offset += backing_object->shadow_offset; + + /* Drop the reference count on backing_object. + * Since its ref_count was at least 2, it + * will not vanish; so we don't need to call + * vm_object_deallocate. + */ + backing_object->ref_count--; + vm_object_unlock(backing_object); + + object_bypasses ++; + + } + + /* + * Try again with this object's new backing object. + */ + } +} + +/* + * vm_object_page_remove: [internal] + * + * Removes all physical pages in the specified + * object range from the object's list of pages. + * + * The object must be locked. + */ +void vm_object_page_remove(object, start, end) + register vm_object_t object; + register vm_offset_t start; + register vm_offset_t end; +{ + register vm_page_t p, next; + + if (object == VM_OBJECT_NULL) + return; + + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + next = (vm_page_t) queue_next(&p->listq); + if ((start <= p->offset) && (p->offset < end)) { + pmap_remove_all(VM_PAGE_TO_PHYS(p)); + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + } + p = next; + } +} + +/* + * Routine: vm_object_coalesce + * Function: Coalesces two objects backing up adjoining + * regions of memory into a single object. + * + * returns TRUE if objects were combined. + * + * NOTE: Only works at the moment if the second object is NULL - + * if it's not, which object do we lock first? + * + * Parameters: + * prev_object First object to coalesce + * prev_offset Offset into prev_object + * next_object Second object into coalesce + * next_offset Offset into next_object + * + * prev_size Size of reference to prev_object + * next_size Size of reference to next_object + * + * Conditions: + * The object must *not* be locked. + */ +boolean_t vm_object_coalesce(prev_object, next_object, + prev_offset, next_offset, + prev_size, next_size) + + register vm_object_t prev_object; + vm_object_t next_object; + vm_offset_t prev_offset, next_offset; + vm_size_t prev_size, next_size; +{ + vm_size_t newsize; + +#ifdef lint + next_offset++; +#endif lint + + if (next_object != VM_OBJECT_NULL) { + return(FALSE); + } + + if (prev_object == VM_OBJECT_NULL) { + return(TRUE); + } + + vm_object_lock(prev_object); + + /* + * Try to collapse the object first + */ + vm_object_collapse(prev_object); + + /* + * Can't coalesce if: + * . more than one reference + * . paged out + * . shadows another object + * . has a copy elsewhere + * (any of which mean that the pages not mapped to + * prev_entry may be in use anyway) + */ + + if (prev_object->ref_count > 1 || + prev_object->pager != vm_pager_null || + prev_object->shadow != VM_OBJECT_NULL || + prev_object->copy != VM_OBJECT_NULL) { + vm_object_unlock(prev_object); + return(FALSE); + } + + /* + * Remove any pages that may still be in the object from + * a previous deallocation. + */ + + vm_object_page_remove(prev_object, + prev_offset + prev_size, + prev_offset + prev_size + next_size); + + /* + * Extend the object if necessary. + */ + newsize = prev_offset + prev_size + next_size; + if (newsize > prev_object->size) + prev_object->size = newsize; + + vm_object_unlock(prev_object); + return(TRUE); +} + +/* + * vm_object_print: [ debug ] + */ +void vm_object_print(object, full) + vm_object_t object; + boolean_t full; +{ + register vm_page_t p; + extern indent; + + register int count; + + if (object == VM_OBJECT_NULL) + return; + + iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", + (int) object, (int) object->size, + object->resident_page_count, object->ref_count); + printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n", + (int) object->pager, (int) object->paging_offset, + (int) object->shadow, (int) object->shadow_offset); + printf("cache: next=0x%x, prev=0x%x\n", + object->cached_list.next, object->cached_list.prev); + + if (!full) + return; + + indent += 2; + count = 0; + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + if (count == 0) + iprintf("memory:="); + else if (count == 6) { + printf("\n"); + iprintf(" ..."); + count = 0; + } else + printf(","); + count++; + + printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p)); + p = (vm_page_t) queue_next(&p->listq); + } + if (count != 0) + printf("\n"); + indent -= 2; +} diff --git a/usr/src/sys/vm/vm_object.h b/usr/src/sys/vm/vm_object.h new file mode 100644 index 0000000000..938fe6d81c --- /dev/null +++ b/usr/src/sys/vm/vm_object.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_object.h 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory object module definitions. + */ + +#ifndef _VM_OBJECT_ +#define _VM_OBJECT_ + +#ifdef KERNEL +#include "types.h" +#include "lock.h" +#include "queue.h" +#include "../vm/vm_pager.h" +#else +#include +#include +#include +#include +#endif + +/* + * Types defined: + * + * vm_object_t Virtual memory object. + */ + +struct vm_object { + queue_chain_t memq; /* Resident memory */ + queue_chain_t object_list; /* list of all objects */ + simple_lock_data_t Lock; /* Synchronization */ + int LockHolder; + int ref_count; /* How many refs?? */ + vm_size_t size; /* Object size */ + int resident_page_count; + /* number of resident pages */ + struct vm_object *copy; /* Object that holds copies of + my changed pages */ + vm_pager_t pager; /* Where to get data */ + boolean_t pager_ready; /* Have pager fields been filled? */ + vm_offset_t paging_offset; /* Offset into paging space */ + struct vm_object *shadow; /* My shadow */ + vm_offset_t shadow_offset; /* Offset in shadow */ + unsigned int + paging_in_progress:16, + /* Paging (in or out) - don't + collapse or destroy */ + /* boolean_t */ can_persist:1, /* allow to persist */ + /* boolean_t */ internal:1; /* internally created object */ + queue_chain_t cached_list; /* for persistence */ +}; + +typedef struct vm_object *vm_object_t; + +struct vm_object_hash_entry { + queue_chain_t hash_links; /* hash chain links */ + vm_object_t object; /* object we represent */ +}; + +typedef struct vm_object_hash_entry *vm_object_hash_entry_t; + +#ifdef KERNEL +queue_head_t vm_object_cached_list; /* list of objects persisting */ +int vm_object_cached; /* size of cached list */ +simple_lock_data_t vm_cache_lock; /* lock for object cache */ + +queue_head_t vm_object_list; /* list of allocated objects */ +long vm_object_count; /* count of all objects */ +simple_lock_data_t vm_object_list_lock; + /* lock for object list and count */ + +vm_object_t kernel_object; /* the single kernel object */ +vm_object_t kmem_object; + +#define vm_object_cache_lock() simple_lock(&vm_cache_lock) +#define vm_object_cache_unlock() simple_unlock(&vm_cache_lock) +#endif KERNEL + +#define VM_OBJECT_NULL ((vm_object_t) 0) + +/* + * Declare procedures that operate on VM objects. + */ + +void vm_object_init (); +void vm_object_terminate(); +vm_object_t vm_object_allocate(); +void vm_object_reference(); +void vm_object_deallocate(); +void vm_object_pmap_copy(); +void vm_object_pmap_remove(); +void vm_object_page_remove(); +void vm_object_shadow(); +void vm_object_copy(); +void vm_object_collapse(); +vm_object_t vm_object_lookup(); +void vm_object_enter(); +void vm_object_setpager(); +#define vm_object_cache(pager) pager_cache(vm_object_lookup(pager),TRUE) +#define vm_object_uncache(pager) pager_cache(vm_object_lookup(pager),FALSE) + +void vm_object_cache_clear(); +void vm_object_print(); + +#if VM_OBJECT_DEBUG +#define vm_object_lock_init(object) { simple_lock_init(&(object)->Lock); (object)->LockHolder = 0; } +#define vm_object_lock(object) { simple_lock(&(object)->Lock); (object)->LockHolder = (int) current_thread(); } +#define vm_object_unlock(object) { (object)->LockHolder = 0; simple_unlock(&(object)->Lock); } +#define vm_object_lock_try(object) (simple_lock_try(&(object)->Lock) ? ( ((object)->LockHolder = (int) current_thread()) , TRUE) : FALSE) +#define vm_object_sleep(event, object, interruptible) \ + { (object)->LockHolder = 0; thread_sleep((event), &(object)->Lock, (interruptible)); } +#else VM_OBJECT_DEBUG +#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock) +#define vm_object_lock(object) simple_lock(&(object)->Lock) +#define vm_object_unlock(object) simple_unlock(&(object)->Lock) +#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock) +#define vm_object_sleep(event, object, interruptible) \ + thread_sleep((event), &(object)->Lock, (interruptible)) +#endif VM_OBJECT_DEBUG + +#endif _VM_OBJECT_ diff --git a/usr/src/sys/vm/vm_page.c b/usr/src/sys/vm/vm_page.c new file mode 100644 index 0000000000..0bae906989 --- /dev/null +++ b/usr/src/sys/vm/vm_page.c @@ -0,0 +1,698 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_page.c 7.1 (Berkeley) %G% + */ + +/* + * Resident memory management module. + */ + +#include "types.h" +#include "../vm/vm_param.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_prot.h" +#include "../vm/vm_statistics.h" +#include "../vm/vm_pageout.h" +#include "../vm/pmap.h" + +/* + * Associated with page of user-allocatable memory is a + * page structure. + */ + +queue_head_t *vm_page_buckets; /* Array of buckets */ +int vm_page_bucket_count = 0; /* How big is array? */ +int vm_page_hash_mask; /* Mask for hash function */ +simple_lock_data_t bucket_lock; /* lock for all buckets XXX */ + +vm_size_t page_size = 4096; +vm_size_t page_mask = 4095; +int page_shift = 12; + +queue_head_t vm_page_queue_free; +queue_head_t vm_page_queue_active; +queue_head_t vm_page_queue_inactive; +simple_lock_data_t vm_page_queue_lock; +simple_lock_data_t vm_page_queue_free_lock; + +vm_page_t vm_page_array; +long first_page; +long last_page; +vm_offset_t first_phys_addr; +vm_offset_t last_phys_addr; + +int vm_page_free_count; +int vm_page_active_count; +int vm_page_inactive_count; +int vm_page_wire_count; +int vm_page_laundry_count; + +int vm_page_free_target = 0; +int vm_page_free_min = 0; +int vm_page_inactive_target = 0; +int vm_page_free_reserved = 0; + +/* + * vm_set_page_size: + * + * Sets the page size, perhaps based upon the memory + * size. Must be called before any use of page-size + * dependent functions. + * + * Sets page_shift and page_mask from page_size. + */ +void vm_set_page_size() +{ + page_mask = page_size - 1; + + if ((page_mask & page_size) != 0) + panic("vm_set_page_size: page size not a power of two"); + + for (page_shift = 0; ; page_shift++) + if ((1 << page_shift) == page_size) + break; +} + + +/* + * vm_page_startup: + * + * Initializes the resident memory module. + * + * Allocates memory for the page cells, and + * for the object/offset-to-page hash table headers. + * Each page cell is initialized and placed on the free list. + */ +vm_offset_t vm_page_startup(start, end, vaddr) + register vm_offset_t start; + vm_offset_t end; + register vm_offset_t vaddr; +{ + register vm_offset_t mapped; + register vm_page_t m; + register queue_t bucket; + vm_size_t npages; + register vm_offset_t new_start; + int i; + vm_offset_t pa; + + extern vm_offset_t kentry_data; + extern vm_size_t kentry_data_size; + + + /* + * Initialize the locks + */ + + simple_lock_init(&vm_page_queue_free_lock); + simple_lock_init(&vm_page_queue_lock); + + /* + * Initialize the queue headers for the free queue, + * the active queue and the inactive queue. + */ + + queue_init(&vm_page_queue_free); + queue_init(&vm_page_queue_active); + queue_init(&vm_page_queue_inactive); + + /* + * Allocate (and initialize) the hash table buckets. + * + * The number of buckets MUST BE a power of 2, and + * the actual value is the next power of 2 greater + * than the number of physical pages in the system. + * + * Note: + * This computation can be tweaked if desired. + */ + + vm_page_buckets = (queue_t) vaddr; + bucket = vm_page_buckets; + if (vm_page_bucket_count == 0) { + vm_page_bucket_count = 1; + while (vm_page_bucket_count < atop(end - start)) + vm_page_bucket_count <<= 1; + } + + vm_page_hash_mask = vm_page_bucket_count - 1; + + /* + * Validate these addresses. + */ + + new_start = round_page(((queue_t)start) + vm_page_bucket_count); + mapped = vaddr; + vaddr = pmap_map(mapped, start, new_start, + VM_PROT_READ|VM_PROT_WRITE); + start = new_start; + blkclr((caddr_t) mapped, vaddr - mapped); + mapped = vaddr; + + for (i = vm_page_bucket_count; i--;) { + queue_init(bucket); + bucket++; + } + + simple_lock_init(&bucket_lock); + + /* + * round (or truncate) the addresses to our page size. + */ + + end = trunc_page(end); + + /* + * Pre-allocate maps and map entries that cannot be dynamically + * allocated via malloc(). The maps include the kernel_map and + * kmem_map which must be initialized before malloc() will + * work (obviously). Also could include pager maps which would + * be allocated before kmeminit. + * + * Allow some kernel map entries... this should be plenty + * since people shouldn't be cluttering up the kernel + * map (they should use their own maps). + */ + + kentry_data_size = MAX_KMAP * sizeof(struct vm_map) + + MAX_KMAPENT * sizeof(struct vm_map_entry); + kentry_data_size = round_page(kentry_data_size); + kentry_data = (vm_offset_t) vaddr; + vaddr += kentry_data_size; + + /* + * Validate these zone addresses. + */ + + new_start = start + (vaddr - mapped); + pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE); + blkclr((caddr_t) mapped, (vaddr - mapped)); + mapped = vaddr; + start = new_start; + + /* + * Compute the number of pages of memory that will be + * available for use (taking into account the overhead + * of a page structure per page). + */ + + vm_page_free_count = npages = + (end - start)/(PAGE_SIZE + sizeof(struct vm_page)); + + /* + * Initialize the mem entry structures now, and + * put them in the free queue. + */ + + m = vm_page_array = (vm_page_t) vaddr; + first_page = start; + first_page += npages*sizeof(struct vm_page); + first_page = atop(round_page(first_page)); + last_page = first_page + npages - 1; + + first_phys_addr = ptoa(first_page); + last_phys_addr = ptoa(last_page) + PAGE_MASK; + + /* + * Validate these addresses. + */ + + new_start = start + (round_page(m + npages) - mapped); + mapped = pmap_map(mapped, start, new_start, + VM_PROT_READ|VM_PROT_WRITE); + start = new_start; + + /* + * Clear all of the page structures + */ + blkclr((caddr_t)m, npages * sizeof(*m)); + + pa = first_phys_addr; + while (npages--) { + m->copy_on_write = FALSE; + m->wanted = FALSE; + m->inactive = FALSE; + m->active = FALSE; + m->busy = FALSE; + m->object = VM_OBJECT_NULL; + m->phys_addr = pa; + queue_enter(&vm_page_queue_free, m, vm_page_t, pageq); + m++; + pa += PAGE_SIZE; + } + + /* + * Initialize vm_pages_needed lock here - don't wait for pageout + * daemon XXX + */ + simple_lock_init(&vm_pages_needed_lock); + + return(mapped); +} + +/* + * vm_page_hash: + * + * Distributes the object/offset key pair among hash buckets. + * + * NOTE: This macro depends on vm_page_bucket_count being a power of 2. + */ +#define vm_page_hash(object, offset) \ + (((unsigned)object+(unsigned)atop(offset))&vm_page_hash_mask) + +/* + * vm_page_insert: [ internal use only ] + * + * Inserts the given mem entry into the object/object-page + * table and object list. + * + * The object and page must be locked. + */ + +void vm_page_insert(mem, object, offset) + register vm_page_t mem; + register vm_object_t object; + register vm_offset_t offset; +{ + register queue_t bucket; + int spl; + + VM_PAGE_CHECK(mem); + + if (mem->tabled) + panic("vm_page_insert: already inserted"); + + /* + * Record the object/offset pair in this page + */ + + mem->object = object; + mem->offset = offset; + + /* + * Insert it into the object_object/offset hash table + */ + + bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + spl = splimp(); + simple_lock(&bucket_lock); + queue_enter(bucket, mem, vm_page_t, hashq); + simple_unlock(&bucket_lock); + (void) splx(spl); + + /* + * Now link into the object's list of backed pages. + */ + + queue_enter(&object->memq, mem, vm_page_t, listq); + mem->tabled = TRUE; + + /* + * And show that the object has one more resident + * page. + */ + + object->resident_page_count++; +} + +/* + * vm_page_remove: [ internal use only ] + * + * Removes the given mem entry from the object/offset-page + * table and the object page list. + * + * The object and page must be locked. + */ + +void vm_page_remove(mem) + register vm_page_t mem; +{ + register queue_t bucket; + int spl; + + VM_PAGE_CHECK(mem); + + if (!mem->tabled) + return; + + /* + * Remove from the object_object/offset hash table + */ + + bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; + spl = splimp(); + simple_lock(&bucket_lock); + queue_remove(bucket, mem, vm_page_t, hashq); + simple_unlock(&bucket_lock); + (void) splx(spl); + + /* + * Now remove from the object's list of backed pages. + */ + + queue_remove(&mem->object->memq, mem, vm_page_t, listq); + + /* + * And show that the object has one fewer resident + * page. + */ + + mem->object->resident_page_count--; + + mem->tabled = FALSE; +} + +/* + * vm_page_lookup: + * + * Returns the page associated with the object/offset + * pair specified; if none is found, VM_PAGE_NULL is returned. + * + * The object must be locked. No side effects. + */ + +vm_page_t vm_page_lookup(object, offset) + register vm_object_t object; + register vm_offset_t offset; +{ + register vm_page_t mem; + register queue_t bucket; + int spl; + + /* + * Search the hash table for this object/offset pair + */ + + bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + + spl = splimp(); + simple_lock(&bucket_lock); + mem = (vm_page_t) queue_first(bucket); + while (!queue_end(bucket, (queue_entry_t) mem)) { + VM_PAGE_CHECK(mem); + if ((mem->object == object) && (mem->offset == offset)) { + simple_unlock(&bucket_lock); + splx(spl); + return(mem); + } + mem = (vm_page_t) queue_next(&mem->hashq); + } + + simple_unlock(&bucket_lock); + splx(spl); + return(VM_PAGE_NULL); +} + +/* + * vm_page_rename: + * + * Move the given memory entry from its + * current object to the specified target object/offset. + * + * The object must be locked. + */ +void vm_page_rename(mem, new_object, new_offset) + register vm_page_t mem; + register vm_object_t new_object; + vm_offset_t new_offset; +{ + if (mem->object == new_object) + return; + + vm_page_lock_queues(); /* keep page from moving out from + under pageout daemon */ + vm_page_remove(mem); + vm_page_insert(mem, new_object, new_offset); + vm_page_unlock_queues(); +} + +void vm_page_init(mem, object, offset) + vm_page_t mem; + vm_object_t object; + vm_offset_t offset; +{ +#define vm_page_init(mem, object, offset) {\ + (mem)->busy = TRUE; \ + (mem)->tabled = FALSE; \ + vm_page_insert((mem), (object), (offset)); \ + (mem)->absent = FALSE; \ + (mem)->fictitious = FALSE; \ + (mem)->page_lock = VM_PROT_NONE; \ + (mem)->unlock_request = VM_PROT_NONE; \ + (mem)->laundry = FALSE; \ + (mem)->active = FALSE; \ + (mem)->inactive = FALSE; \ + (mem)->wire_count = 0; \ + (mem)->clean = TRUE; \ + (mem)->copy_on_write = FALSE; \ + (mem)->fake = TRUE; \ + } + + vm_page_init(mem, object, offset); +} + +/* + * vm_page_alloc: + * + * Allocate and return a memory cell associated + * with this VM object/offset pair. + * + * Object must be locked. + */ +vm_page_t vm_page_alloc(object, offset) + vm_object_t object; + vm_offset_t offset; +{ + register vm_page_t mem; + int spl; + + spl = splimp(); /* XXX */ + simple_lock(&vm_page_queue_free_lock); + if (queue_empty(&vm_page_queue_free)) { + simple_unlock(&vm_page_queue_free_lock); + splx(spl); + return(VM_PAGE_NULL); + } + + queue_remove_first(&vm_page_queue_free, mem, vm_page_t, pageq); + + vm_page_free_count--; + simple_unlock(&vm_page_queue_free_lock); + splx(spl); + + vm_page_init(mem, object, offset); + + /* + * Decide if we should poke the pageout daemon. + * We do this if the free count is less than the low + * water mark, or if the free count is less than the high + * water mark (but above the low water mark) and the inactive + * count is less than its target. + * + * We don't have the counts locked ... if they change a little, + * it doesn't really matter. + */ + + if ((vm_page_free_count < vm_page_free_min) || + ((vm_page_free_count < vm_page_free_target) && + (vm_page_inactive_count < vm_page_inactive_target))) + thread_wakeup(&vm_pages_needed); + return(mem); +} + +/* + * vm_page_free: + * + * Returns the given page to the free list, + * disassociating it with any VM object. + * + * Object and page must be locked prior to entry. + */ +void vm_page_free(mem) + register vm_page_t mem; +{ + vm_page_remove(mem); + if (mem->active) { + queue_remove(&vm_page_queue_active, mem, vm_page_t, pageq); + mem->active = FALSE; + vm_page_active_count--; + } + + if (mem->inactive) { + queue_remove(&vm_page_queue_inactive, mem, vm_page_t, pageq); + mem->inactive = FALSE; + vm_page_inactive_count--; + } + + if (!mem->fictitious) { + int spl; + + spl = splimp(); + simple_lock(&vm_page_queue_free_lock); + queue_enter(&vm_page_queue_free, mem, vm_page_t, pageq); + + vm_page_free_count++; + simple_unlock(&vm_page_queue_free_lock); + splx(spl); + } +} + +/* + * vm_page_wire: + * + * Mark this page as wired down by yet + * another map, removing it from paging queues + * as necessary. + * + * The page queues must be locked. + */ +void vm_page_wire(mem) + register vm_page_t mem; +{ + VM_PAGE_CHECK(mem); + + if (mem->wire_count == 0) { + if (mem->active) { + queue_remove(&vm_page_queue_active, mem, vm_page_t, + pageq); + vm_page_active_count--; + mem->active = FALSE; + } + if (mem->inactive) { + queue_remove(&vm_page_queue_inactive, mem, vm_page_t, + pageq); + vm_page_inactive_count--; + mem->inactive = FALSE; + } + vm_page_wire_count++; + } + mem->wire_count++; +} + +/* + * vm_page_unwire: + * + * Release one wiring of this page, potentially + * enabling it to be paged again. + * + * The page queues must be locked. + */ +void vm_page_unwire(mem) + register vm_page_t mem; +{ + VM_PAGE_CHECK(mem); + + mem->wire_count--; + if (mem->wire_count == 0) { + queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); + vm_page_active_count++; + mem->active = TRUE; + vm_page_wire_count--; + } +} + +/* + * vm_page_deactivate: + * + * Returns the given page to the inactive list, + * indicating that no physical maps have access + * to this page. [Used by the physical mapping system.] + * + * The page queues must be locked. + */ +void vm_page_deactivate(m) + register vm_page_t m; +{ + VM_PAGE_CHECK(m); + + /* + * Only move active pages -- ignore locked or already + * inactive ones. + */ + + if (m->active) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); + m->active = FALSE; + m->inactive = TRUE; + vm_page_active_count--; + vm_page_inactive_count++; + if (pmap_is_modified(VM_PAGE_TO_PHYS(m))) + m->clean = FALSE; + m->laundry = !m->clean; + } +} + +/* + * vm_page_activate: + * + * Put the specified page on the active list (if appropriate). + * + * The page queues must be locked. + */ + +void vm_page_activate(m) + register vm_page_t m; +{ + VM_PAGE_CHECK(m); + + if (m->inactive) { + queue_remove(&vm_page_queue_inactive, m, vm_page_t, + pageq); + vm_page_inactive_count--; + m->inactive = FALSE; + } + if (m->wire_count == 0) { + if (m->active) + panic("vm_page_activate: already active"); + + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + m->active = TRUE; + vm_page_active_count++; + } +} + +/* + * vm_page_zero_fill: + * + * Zero-fill the specified page. + * Written as a standard pagein routine, to + * be used by the zero-fill object. + */ + +boolean_t vm_page_zero_fill(m) + vm_page_t m; +{ + VM_PAGE_CHECK(m); + + pmap_zero_page(VM_PAGE_TO_PHYS(m)); + return(TRUE); +} + +/* + * vm_page_copy: + * + * Copy one page to another + */ + +void vm_page_copy(src_m, dest_m) + vm_page_t src_m; + vm_page_t dest_m; +{ + VM_PAGE_CHECK(src_m); + VM_PAGE_CHECK(dest_m); + + pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); +} diff --git a/usr/src/sys/vm/vm_page.h b/usr/src/sys/vm/vm_page.h new file mode 100644 index 0000000000..dcfcd7c596 --- /dev/null +++ b/usr/src/sys/vm/vm_page.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_page.h 7.1 (Berkeley) %G% + */ + +/* + * Resident memory system definitions. + */ + +#ifndef _VM_PAGE_ +#define _VM_PAGE_ + +#include "../vm/vm_param.h" +#include "../vm/vm_object.h" +#include "../vm/vm_prot.h" +#include "lock.h" + +/* + * Management of resident (logical) pages. + * + * A small structure is kept for each resident + * page, indexed by page number. Each structure + * is an element of several lists: + * + * A hash table bucket used to quickly + * perform object/offset lookups + * + * A list of all pages for a given object, + * so they can be quickly deactivated at + * time of deallocation. + * + * An ordered list of pages due for pageout. + * + * In addition, the structure contains the object + * and offset to which this page belongs (for pageout), + * and sundry status bits. + * + * Fields in this structure are locked either by the lock on the + * object that the page belongs to (O) or by the lock on the page + * queues (P). + */ + +struct vm_page { + queue_chain_t pageq; /* queue info for FIFO + * queue or free list (P) */ + queue_chain_t hashq; /* hash table links (O)*/ + queue_chain_t listq; /* all pages in same object (O)*/ + + vm_object_t object; /* which object am I in (O,P)*/ + vm_offset_t offset; /* offset into that object (O,P) */ + + unsigned int wire_count:16, /* how many wired down maps use me? + (P) */ + /* boolean_t */ inactive:1, /* page is in inactive list (P) */ + active:1, /* page is in active list (P) */ + laundry:1, /* page is being cleaned now (P)*/ +#ifdef DEBUG + pagerowned:1, /* async paging op in progress */ + ptpage:1, /* is a user page table page */ +#endif + :0; /* (force to 'long' boundary) */ +#ifdef ns32000 + int pad; /* extra space for ns32000 bit ops */ +#endif ns32000 + boolean_t clean; /* page has not been modified */ + unsigned int + /* boolean_t */ busy:1, /* page is in transit (O) */ + wanted:1, /* someone is waiting for page (O) */ + tabled:1, /* page is in VP table (O) */ + copy_on_write:1,/* page must be copied before being + changed (O) */ + fictitious:1, /* physical page doesn't exist (O) */ + absent:1, /* virtual page doesn't exist (O) */ + fake:1, /* page is a placeholder for page-in + (O) */ + :0; + + vm_offset_t phys_addr; /* physical address of page */ + vm_prot_t page_lock; /* Uses prohibited by data manager */ + vm_prot_t unlock_request; /* Outstanding unlock request */ +}; + +typedef struct vm_page *vm_page_t; + +#define VM_PAGE_NULL ((vm_page_t) 0) + +#if VM_PAGE_DEBUG +#define VM_PAGE_CHECK(mem) { \ + if ( (((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \ + (((unsigned int) mem) > ((unsigned int) &vm_page_array[last_page-first_page])) || \ + (mem->active && mem->inactive) \ + ) panic("vm_page_check: not valid!"); \ + } +#else VM_PAGE_DEBUG +#define VM_PAGE_CHECK(mem) +#endif VM_PAGE_DEBUG + +#ifdef KERNEL +/* + * Each pageable resident page falls into one of three lists: + * + * free + * Available for allocation now. + * inactive + * Not referenced in any map, but still has an + * object/offset-page mapping, and may be dirty. + * This is the list of pages that should be + * paged out next. + * active + * A list of pages which have been placed in + * at least one physical map. This list is + * ordered, in LRU-like fashion. + */ + +extern +queue_head_t vm_page_queue_free; /* memory free queue */ +extern +queue_head_t vm_page_queue_active; /* active memory queue */ +extern +queue_head_t vm_page_queue_inactive; /* inactive memory queue */ + +extern +vm_page_t vm_page_array; /* First resident page in table */ +extern +long first_page; /* first physical page number */ + /* ... represented in vm_page_array */ +extern +long last_page; /* last physical page number */ + /* ... represented in vm_page_array */ + /* [INCLUSIVE] */ +extern +vm_offset_t first_phys_addr; /* physical address for first_page */ +extern +vm_offset_t last_phys_addr; /* physical address for last_page */ + +extern +int vm_page_free_count; /* How many pages are free? */ +extern +int vm_page_active_count; /* How many pages are active? */ +extern +int vm_page_inactive_count; /* How many pages are inactive? */ +extern +int vm_page_wire_count; /* How many pages are wired? */ +extern +int vm_page_free_target; /* How many do we want free? */ +extern +int vm_page_free_min; /* When to wakeup pageout */ +extern +int vm_page_inactive_target;/* How many do we want inactive? */ +extern +int vm_page_free_reserved; /* How many pages reserved to do pageout */ +extern +int vm_page_laundry_count; /* How many pages being laundered? */ + +#define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) + +#define IS_VM_PHYSADDR(pa) \ + ((pa) >= first_phys_addr && (pa) <= last_phys_addr) + +#define PHYS_TO_VM_PAGE(pa) \ + (&vm_page_array[atop(pa) - first_page ]) + +extern +simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive + page queues */ +extern +simple_lock_data_t vm_page_queue_free_lock; + /* lock on free page queue */ +vm_offset_t vm_page_startup(); +vm_page_t vm_page_lookup(); +vm_page_t vm_page_alloc(); +void vm_page_init(); +void vm_page_free(); +void vm_page_activate(); +void vm_page_deactivate(); +void vm_page_rename(); +void vm_page_replace(); + +boolean_t vm_page_zero_fill(); +void vm_page_copy(); + +void vm_page_wire(); +void vm_page_unwire(); + +void vm_set_page_size(); + +/* + * Functions implemented as macros + */ + +#define PAGE_ASSERT_WAIT(m, interruptible) { \ + (m)->wanted = TRUE; \ + assert_wait((int) (m), (interruptible)); \ + } + +#define PAGE_WAKEUP(m) { \ + (m)->busy = FALSE; \ + if ((m)->wanted) { \ + (m)->wanted = FALSE; \ + thread_wakeup((int) (m)); \ + } \ + } + +#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock) +#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock) + +#define vm_page_set_modified(m) { (m)->clean = FALSE; } +#endif KERNEL +#endif _VM_PAGE_ diff --git a/usr/src/sys/vm/vm_pageout.c b/usr/src/sys/vm/vm_pageout.c new file mode 100644 index 0000000000..4e6301f094 --- /dev/null +++ b/usr/src/sys/vm/vm_pageout.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_pageout.c 7.1 (Berkeley) %G% + */ + +/* + * The proverbial page-out daemon. + */ + +#include "types.h" +#include "../vm/vm_page.h" +#include "../vm/pmap.h" +#include "../vm/vm_object.h" +#include "../vm/vm_pageout.h" +#include "../vm/vm_statistics.h" +#include "../vm/vm_param.h" + +int vm_pages_needed; /* Event on which pageout daemon sleeps */ +int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */ + +int vm_page_free_min_sanity = 40; + +/* + * vm_pageout_scan does the dirty work for the pageout daemon. + */ +vm_pageout_scan() +{ + register vm_page_t m; + register int page_shortage; + register int s; + register int pages_freed; + int free; + + /* + * Only continue when we want more pages to be "free" + */ + + s = splimp(); + simple_lock(&vm_page_queue_free_lock); + free = vm_page_free_count; + simple_unlock(&vm_page_queue_free_lock); + splx(s); + + if (free < vm_page_free_target) { + swapout_threads(); + + /* + * Be sure the pmap system is updated so + * we can scan the inactive queue. + */ + + pmap_update(); + } + + /* + * Acquire the resident page system lock, + * as we may be changing what's resident quite a bit. + */ + vm_page_lock_queues(); + + /* + * Start scanning the inactive queue for pages we can free. + * We keep scanning until we have enough free pages or + * we have scanned through the entire queue. If we + * encounter dirty pages, we start cleaning them. + */ + + pages_freed = 0; + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) { + vm_page_t next; + + s = splimp(); + simple_lock(&vm_page_queue_free_lock); + free = vm_page_free_count; + simple_unlock(&vm_page_queue_free_lock); + splx(s); + + if (free >= vm_page_free_target) + break; + + if (m->clean) { + next = (vm_page_t) queue_next(&m->pageq); + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + vm_page_activate(m); + vm_stat.reactivations++; + } + else { + register vm_object_t object; + object = m->object; + if (!vm_object_lock_try(object)) { + /* + * Can't lock object - + * skip page. + */ + m = next; + continue; + } + pmap_remove_all(VM_PAGE_TO_PHYS(m)); + vm_page_free(m); /* will dequeue */ + pages_freed++; + vm_object_unlock(object); + } + m = next; + } + else { + /* + * If a page is dirty, then it is either + * being washed (but not yet cleaned) + * or it is still in the laundry. If it is + * still in the laundry, then we start the + * cleaning operation. + */ + + if (m->laundry) { + /* + * Clean the page and remove it from the + * laundry. + * + * We set the busy bit to cause + * potential page faults on this page to + * block. + * + * And we set pageout-in-progress to keep + * the object from disappearing during + * pageout. This guarantees that the + * page won't move from the inactive + * queue. (However, any other page on + * the inactive queue may move!) + */ + + register vm_object_t object; + register vm_pager_t pager; + int pageout_status; + + object = m->object; + if (!vm_object_lock_try(object)) { + /* + * Skip page if we can't lock + * its object + */ + m = (vm_page_t) queue_next(&m->pageq); + continue; + } + + pmap_remove_all(VM_PAGE_TO_PHYS(m)); + m->busy = TRUE; + vm_stat.pageouts++; + + /* + * Try to collapse the object before + * making a pager for it. We must + * unlock the page queues first. + */ + vm_page_unlock_queues(); + + vm_object_collapse(object); + + object->paging_in_progress++; + vm_object_unlock(object); + + /* + * Do a wakeup here in case the following + * operations block. + */ + thread_wakeup((int) &vm_page_free_count); + + /* + * If there is no pager for the page, + * use the default pager. If there's + * no place to put the page at the + * moment, leave it in the laundry and + * hope that there will be paging space + * later. + */ + + if ((pager = object->pager) == vm_pager_null) { + pager = vm_pager_allocate(PG_DFLT, + (caddr_t)0, + object->size, + VM_PROT_ALL); + if (pager != vm_pager_null) { + vm_object_setpager(object, + pager, 0, FALSE); + } + } + pageout_status = pager ? + vm_pager_put(pager, m, FALSE) : + VM_PAGER_FAIL; + vm_object_lock(object); + vm_page_lock_queues(); + next = (vm_page_t) queue_next(&m->pageq); + + switch (pageout_status) { + case VM_PAGER_OK: + case VM_PAGER_PEND: + m->laundry = FALSE; + break; + case VM_PAGER_BAD: + /* + * Page outside of range of object. + * Right now we essentially lose the + * changes by pretending it worked. + * XXX dubious, what should we do? + */ + m->laundry = FALSE; + m->clean = TRUE; + pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + break; + case VM_PAGER_FAIL: + /* + * If page couldn't be paged out, then + * reactivate the page so it doesn't + * clog the inactive list. (We will + * try paging out it again later). + */ + vm_page_activate(m); + break; + } + + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + m->busy = FALSE; + PAGE_WAKEUP(m); + + /* + * If the operation is still going, leave the + * paging in progress indicator set so that we + * don't attempt an object collapse. + */ + if (pageout_status != VM_PAGER_PEND) + object->paging_in_progress--; + thread_wakeup((int) object); + vm_object_unlock(object); + m = next; + } + else + m = (vm_page_t) queue_next(&m->pageq); + } + } + + /* + * Compute the page shortage. If we are still very low on memory + * be sure that we will move a minimal amount of pages from active + * to inactive. + */ + + page_shortage = vm_page_inactive_target - vm_page_inactive_count; + page_shortage -= vm_page_free_count; + + if ((page_shortage <= 0) && (pages_freed == 0)) + page_shortage = 1; + + while (page_shortage > 0) { + /* + * Move some more pages from active to inactive. + */ + + if (queue_empty(&vm_page_queue_active)) { + break; + } + m = (vm_page_t) queue_first(&vm_page_queue_active); + vm_page_deactivate(m); + page_shortage--; + } + + vm_page_unlock_queues(); +} + +/* + * vm_pageout is the high level pageout daemon. + */ + +void vm_pageout() +{ + (void) spl0(); + + /* + * Initialize some paging parameters. + */ + + if (vm_page_free_min == 0) { + vm_page_free_min = vm_page_free_count / 20; + if (vm_page_free_min < 3) + vm_page_free_min = 3; + + if (vm_page_free_min > vm_page_free_min_sanity) + vm_page_free_min = vm_page_free_min_sanity; + } + + if (vm_page_free_reserved == 0) { + if ((vm_page_free_reserved = vm_page_free_min / 2) < 10) + vm_page_free_reserved = 10; + } + if (vm_pageout_free_min == 0) { + if ((vm_pageout_free_min = vm_page_free_reserved / 2) > 10) + vm_pageout_free_min = 10; + } + + if (vm_page_free_target == 0) + vm_page_free_target = (vm_page_free_min * 4) / 3; + + if (vm_page_inactive_target == 0) + vm_page_inactive_target = vm_page_free_min * 2; + + if (vm_page_free_target <= vm_page_free_min) + vm_page_free_target = vm_page_free_min + 1; + + if (vm_page_inactive_target <= vm_page_free_target) + vm_page_inactive_target = vm_page_free_target + 1; + + /* + * The pageout daemon is never done, so loop + * forever. + */ + + simple_lock(&vm_pages_needed_lock); + while (TRUE) { + thread_sleep((int) &vm_pages_needed, &vm_pages_needed_lock, + FALSE); + vm_pageout_scan(); + vm_pager_sync(); + simple_lock(&vm_pages_needed_lock); + thread_wakeup((int) &vm_page_free_count); + } +} diff --git a/usr/src/sys/vm/vm_pageout.h b/usr/src/sys/vm/vm_pageout.h new file mode 100644 index 0000000000..d3acd166b4 --- /dev/null +++ b/usr/src/sys/vm/vm_pageout.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 1986, Avadis Tevanian, Jr. + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_pageout.h 7.1 (Berkeley) %G% + */ + +/* + * Header file for pageout daemon. + */ + +#include "lock.h" + +/* + * Exported data structures. + */ + +extern int vm_pages_needed; /* should be some "event" structure */ +simple_lock_data_t vm_pages_needed_lock; + + +/* + * Exported routines. + */ + +/* + * Signal pageout-daemon and wait for it. + */ + +#define VM_WAIT { \ + simple_lock(&vm_pages_needed_lock); \ + thread_wakeup((int)&vm_pages_needed); \ + thread_sleep((int)&vm_page_free_count, \ + &vm_pages_needed_lock, FALSE); \ + } diff --git a/usr/src/sys/vm/vm_pager.c b/usr/src/sys/vm/vm_pager.c new file mode 100644 index 0000000000..12efff57a7 --- /dev/null +++ b/usr/src/sys/vm/vm_pager.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 1985, 1986 Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_pager.c 7.1 (Berkeley) %G% + */ + +/* + * Paging space routine stubs. Emulates a matchmaker-like interface + * for builtin pagers. + */ + +#include "param.h" +#include "queue.h" +#include "malloc.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_pager.h" +#include "../vm/vm_page.h" +#include "../vm/vm_prot.h" +#include "../vm/vm_map.h" +#include "../vm/vm_kern.h" + +#include "../vm/pmap.h" + +#include "swappager.h" +#if NSWAPPAGER > 0 +extern struct pagerops swappagerops; +#else +#define swappagerops PAGER_OPS_NULL +#endif +#include "vnodepager.h" +#if NVNODEPAGER > 0 +extern struct pagerops vnodepagerops; +#else +#define vnodepagerops PAGER_OPS_NULL +#endif +#include "devpager.h" +#if NDEVPAGER > 0 +extern struct pagerops devicepagerops; +#else +#define devicepagerops PAGER_OPS_NULL +#endif + +struct pagerops *pagertab[] = { + &swappagerops, /* PG_SWAP */ + &vnodepagerops, /* PG_VNODE */ + &devicepagerops, /* PG_DEV */ +}; +int npagers = sizeof (pagertab) / sizeof (pagertab[0]); + +struct pagerops *dfltpagerops = PAGER_OPS_NULL; /* default pager */ + +/* + * Kernel address space for mapping pages. + * Used by pagers where KVAs are needed for IO. + */ +#define PAGER_MAP_SIZE (256 * PAGE_SIZE) +vm_map_t pager_map; + +void +vm_pager_init() +{ + vm_offset_t whocares1, whocares2; + struct pagerops **pgops; + + /* + * Allocate a kernel submap for tracking get/put page mappings + */ + pager_map = kmem_suballoc(kernel_map, &whocares1, &whocares2, + PAGER_MAP_SIZE, FALSE); + /* + * Initialize known pagers + */ + for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) + (*(*pgops)->pgo_init)(); + if (dfltpagerops == PAGER_OPS_NULL) + panic("no default pager"); +} + +/* + * Allocate an instance of a pager of the given type. + */ +vm_pager_t +vm_pager_allocate(type, handle, size, prot) + int type; + caddr_t handle; + vm_size_t size; + vm_prot_t prot; +{ + vm_pager_t pager; + struct pagerops *ops; + + ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type]; + return((*ops->pgo_alloc)(handle, size, prot)); +} + +void +vm_pager_deallocate(pager) + vm_pager_t pager; +{ + if (pager == vm_pager_null) + panic("vm_pager_deallocate: null pager"); + + VM_PAGER_DEALLOC(pager); +} + +vm_pager_get(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ + extern boolean_t vm_page_zero_fill(); + + if (pager == vm_pager_null) + return(vm_page_zero_fill(m) ? VM_PAGER_OK : VM_PAGER_FAIL); + return(VM_PAGER_GET(pager, m, sync)); +} + +vm_pager_put(pager, m, sync) + vm_pager_t pager; + vm_page_t m; + boolean_t sync; +{ + if (pager == vm_pager_null) + panic("vm_pager_put: null pager"); + return(VM_PAGER_PUT(pager, m, sync)); +} + +boolean_t +vm_pager_has_page(pager, offset) + vm_pager_t pager; + vm_offset_t offset; +{ + if (pager == vm_pager_null) + panic("vm_pager_has_page"); + return(VM_PAGER_HASPAGE(pager, offset)); +} + +/* + * Called by pageout daemon before going back to sleep. + * Gives pagers a chance to clean up any completed async pageing operations. + */ +void +vm_pager_sync() +{ + struct pagerops **pgops; + + for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) + (*(*pgops)->pgo_putpage)(VM_PAGER_NULL, VM_PAGE_NULL, FALSE); +} + +vm_offset_t +vm_pager_map_page(m) + vm_page_t m; +{ + vm_offset_t kva; + + kva = kmem_alloc_wait(pager_map, PAGE_SIZE); +#if 1 + /* + * XXX: cannot use pmap_enter as the mapping would be + * removed by a pmap_remove_all(). + */ + *(int *)kvtopte(kva) = VM_PAGE_TO_PHYS(m) | PG_CI | PG_V; + TBIS(kva); +#else + pmap_enter(vm_map_pmap(pager_map), kva, VM_PAGE_TO_PHYS(m), + VM_PROT_DEFAULT, TRUE); +#endif + return(kva); +} + +void +vm_pager_unmap_page(kva) + vm_offset_t kva; +{ +#if 1 + *(int *)kvtopte(kva) = PG_NV; + TBIS(kva); +#endif + kmem_free_wakeup(pager_map, kva, PAGE_SIZE); +} + +vm_pager_t +vm_pager_lookup(list, handle) + register queue_head_t *list; + caddr_t handle; +{ + register vm_pager_t pager; + + pager = (vm_pager_t) queue_first(list); + while (!queue_end(list, (queue_entry_t)pager)) { + if (pager->pg_handle == handle) + return(pager); + pager = (vm_pager_t) queue_next(&pager->pg_list); + } + return(VM_PAGER_NULL); +} + +/* + * This routine gains a reference to the object. + * Explicit deallocation is necessary. + */ +pager_cache(object, should_cache) + vm_object_t object; + boolean_t should_cache; +{ + if (object == VM_OBJECT_NULL) + return(KERN_INVALID_ARGUMENT); + + vm_object_cache_lock(); + vm_object_lock(object); + object->can_persist = should_cache; + vm_object_unlock(object); + vm_object_cache_unlock(); + + vm_object_deallocate(object); + + return(KERN_SUCCESS); +} diff --git a/usr/src/sys/vm/vm_param.h b/usr/src/sys/vm/vm_param.h new file mode 100644 index 0000000000..cea357d56a --- /dev/null +++ b/usr/src/sys/vm/vm_param.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_param.h 7.1 (Berkeley) %G% + */ + +/* + * Machine independent virtual memory parameters. + */ + +#ifndef _VM_PARAM_ +#define _VM_PARAM_ + +#ifdef KERNEL +#include "machine/vmparam.h" +#else +#include +#endif + +/* + * This belongs in types.h, but breaks too many existing programs. + */ +typedef int boolean_t; +#define TRUE 1 +#define FALSE 0 + +/* + * The machine independent pages are refered to as PAGES. A page + * is some number of hardware pages, depending on the target machine. + */ + +/* + * All references to the size of a page should be done with PAGE_SIZE + * or PAGE_SHIFT. The fact they are variables is hidden here so that + * we can easily make them constant if we so desire. + */ + +#define PAGE_SIZE page_size /* size of page in addressible units */ +#define PAGE_SHIFT page_shift /* number of bits to shift for pages */ + +/* + * Return values from the VM routines. + */ +#define KERN_SUCCESS 0 +#define KERN_INVALID_ADDRESS 1 +#define KERN_PROTECTION_FAILURE 2 +#define KERN_NO_SPACE 3 +#define KERN_INVALID_ARGUMENT 4 +#define KERN_FAILURE 5 +#define KERN_RESOURCE_SHORTAGE 6 +#define KERN_NOT_RECEIVER 7 +#define KERN_NO_ACCESS 8 + +#ifdef ASSEMBLER +#else ASSEMBLER +/* + * Convert addresses to pages and vice versa. + * No rounding is used. + */ + +#ifdef KERNEL +#define atop(x) (((unsigned)(x)) >> page_shift) +#define ptoa(x) ((vm_offset_t)((x) << page_shift)) +#endif KERNEL + +/* + * Round off or truncate to the nearest page. These will work + * for either addresses or counts. (i.e. 1 byte rounds to 1 page + * bytes. + */ + +#ifdef KERNEL +#define round_page(x) ((vm_offset_t)((((vm_offset_t)(x)) + page_mask) & ~page_mask)) +#define trunc_page(x) ((vm_offset_t)(((vm_offset_t)(x)) & ~page_mask)) +#else KERNEL +#define round_page(x) ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size) +#define trunc_page(x) ((((vm_offset_t)(x)) / vm_page_size) * vm_page_size) +#endif KERNEL + +#ifdef KERNEL +extern vm_size_t page_size; /* machine independent page size */ +extern vm_size_t page_mask; /* page_size - 1; mask for + offset within page */ +extern int page_shift; /* shift to use for page size */ + +extern vm_size_t mem_size; /* size of physical memory (bytes) */ +extern vm_offset_t first_addr; /* first physical page */ +extern vm_offset_t last_addr; /* last physical page */ +#endif KERNEL + +#endif ASSEMBLER + +#endif _VM_PARAM_ diff --git a/usr/src/sys/vm/vm_prot.h b/usr/src/sys/vm/vm_prot.h new file mode 100644 index 0000000000..84f368dacf --- /dev/null +++ b/usr/src/sys/vm/vm_prot.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_prot.h 7.1 (Berkeley) %G% + */ + +/* + * Virtual memory protection definitions. + */ + +#ifndef _VM_PROT_ +#define _VM_PROT_ + +/* + * Types defined: + * + * vm_prot_t VM protection values. + */ + +typedef int vm_prot_t; + +/* + * Protection values, defined as bits within the vm_prot_t type + */ + +#define VM_PROT_NONE ((vm_prot_t) 0x00) + +#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */ +#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */ +#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */ + +/* + * The default protection for newly-created virtual memory + */ + +#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) + +/* + * The maximum privileges possible, for parameter checking. + */ + +#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) + +#endif _VM_PROT_ diff --git a/usr/src/sys/vm/vm_user.c b/usr/src/sys/vm/vm_user.c new file mode 100644 index 0000000000..9414f4e05f --- /dev/null +++ b/usr/src/sys/vm/vm_user.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_user.c 7.1 (Berkeley) %G% + */ + +/* + * User-exported virtual memory functions. + */ + +#include "param.h" +#include "systm.h" +#include "user.h" +#include "proc.h" + +#include "../vm/vm_param.h" +#include "../vm/vm_object.h" +#include "../vm/vm_map.h" +#include "../vm/vm_page.h" +#include "../vm/vm_statistics.h" + +#include "lock.h" /* XXX */ +simple_lock_data_t vm_alloc_lock; /* XXX */ + +#ifdef MACHVMCOMPAT +/* + * BSD style syscall interfaces to MACH calls + * All return MACH return values. + */ +/* ARGSUSED */ +svm_allocate(p, uap, retval) + struct proc *p; + struct args { + vm_map_t map; + vm_offset_t *addr; + vm_size_t size; + boolean_t anywhere; + } *uap; + int *retval; +{ + vm_offset_t addr; + int rv; + + uap->map = p->p_map; /* XXX */ + + if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr))) + rv = KERN_INVALID_ARGUMENT; + else + rv = vm_allocate(uap->map, &addr, uap->size, uap->anywhere); + if (rv == KERN_SUCCESS) { + if (copyout((caddr_t)&addr, (caddr_t)uap->addr, sizeof(addr))) + rv = KERN_INVALID_ARGUMENT; + } + return((int)rv); +} + +/* ARGSUSED */ +svm_deallocate(p, uap, retval) + struct proc *p; + struct args { + vm_map_t map; + vm_offset_t addr; + vm_size_t size; + } *uap; + int *retval; +{ + int rv; + + uap->map = p->p_map; /* XXX */ + rv = vm_deallocate(uap->map, uap->addr, uap->size); + return((int)rv); +} + +/* ARGSUSED */ +svm_inherit(p, uap, retval) + struct proc *p; + struct args { + vm_map_t map; + vm_offset_t addr; + vm_size_t size; + vm_inherit_t inherit; + } *uap; + int *retval; +{ + int rv; + + uap->map = p->p_map; /* XXX */ + rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit); + return((int)rv); +} + +/* ARGSUSED */ +svm_protect(p, uap, retval) + struct proc *p; + struct args { + vm_map_t map; + vm_offset_t addr; + vm_size_t size; + boolean_t setmax; + vm_prot_t prot; + } *uap; + int *retval; +{ + int rv; + + uap->map = p->p_map; /* XXX */ + rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot); + return((int)rv); +} +#endif + +/* + * vm_allocate allocates "zero fill" memory in the specfied + * map. + */ +vm_allocate(map, addr, size, anywhere) + register vm_map_t map; + register vm_offset_t *addr; + register vm_size_t size; + boolean_t anywhere; +{ + int result; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + if (size == 0) { + *addr = 0; + return(KERN_SUCCESS); + } + + if (anywhere) + *addr = vm_map_min(map); + else + *addr = trunc_page(*addr); + size = round_page(size); + + result = vm_map_find(map, VM_OBJECT_NULL, (vm_offset_t) 0, addr, + size, anywhere); + + return(result); +} + +/* + * vm_deallocate deallocates the specified range of addresses in the + * specified address map. + */ +vm_deallocate(map, start, size) + register vm_map_t map; + vm_offset_t start; + vm_size_t size; +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + if (size == (vm_offset_t) 0) + return(KERN_SUCCESS); + + return(vm_map_remove(map, trunc_page(start), round_page(start+size))); +} + +/* + * vm_inherit sets the inheritence of the specified range in the + * specified map. + */ +vm_inherit(map, start, size, new_inheritance) + register vm_map_t map; + vm_offset_t start; + vm_size_t size; + vm_inherit_t new_inheritance; +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance)); +} + +/* + * vm_protect sets the protection of the specified range in the + * specified map. + */ + +vm_protect(map, start, size, set_maximum, new_protection) + register vm_map_t map; + vm_offset_t start; + vm_size_t size; + boolean_t set_maximum; + vm_prot_t new_protection; +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum)); +} diff --git a/usr/src/sys/vm/vm_user.h b/usr/src/sys/vm/vm_user.h new file mode 100644 index 0000000000..9d528bd2bd --- /dev/null +++ b/usr/src/sys/vm/vm_user.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 1986, Avadis Tevanian, Jr., Michael Wayne Young + * Copyright (c) 1987 Carnegie-Mellon University + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * The Mach Operating System project at Carnegie-Mellon University. + * + * The CMU software License Agreement specifies the terms and conditions + * for use and redistribution. + * + * @(#)vm_user.h 7.1 (Berkeley) %G% + */ + +/* + * Kernel memory management definitions. + */ + +#ifndef _VM_USER_ +#define _VM_USER_ + +int vm_allocate(); +int vm_deallocate(); +int vm_inherit(); +int vm_protect(); +int vm_statistics(); + +#endif _VM_USER_ -- 2.20.1