* Copyright (c) 1991 Regents of the University of California.
* Copyright (c) 1994 John S. Dyson
* Copyright (c) 1994 David Greenman
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
* Carnegie Mellon requests users of this software to return to
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
* $Id: vm_pageout.c,v 1.19 1994/04/14 07:50:25 davidg Exp $
* The proverbial page-out daemon.
extern vm_map_t kmem_map
;
int vm_pages_needed
; /* Event on which pageout daemon sleeps */
int vm_pagescanner
; /* Event on which pagescanner sleeps */
int vm_pageout_free_min
= 0; /* Stop pageout to wait for pagers at this free level */
int vm_pageout_pages_needed
= 0; /* flag saying that the pageout daemon needs pages */
extern int vm_page_count
;
int vm_pageout_proc_limit
;
extern int swap_pager_full
;
extern int swap_pager_ready();
#define MAXSCAN 512 /* maximum number of pages to scan in active queue */
/* set the "clock" hands to be (MAXSCAN * 4096) Bytes */
#define LOWATER ((2048*1024)/NBPG)
#define VM_PAGEOUT_PAGE_COUNT 8
static vm_offset_t vm_space_needed
;
int vm_pageout_req_do_stats
;
vm_pageout_clean(m
, sync
)
* Clean the page and remove it from the
* We set the busy bit to cause
* potential page faults on this page to
* And we set pageout-in-progress to keep
* the object from disappearing during
* pageout. This guarantees that the
* page won't move from the inactive
* queue. (However, any other page on
* the inactive queue may move!)
register vm_object_t object
;
register vm_pager_t pager
;
int pageout_status
[VM_PAGEOUT_PAGE_COUNT
];
vm_page_t ms
[VM_PAGEOUT_PAGE_COUNT
];
vm_offset_t offset
= m
->offset
;
printf("pager: object missing\n");
* Try to collapse the object before
* making a pager for it. We must
* unlock the page queues first.
* We try to defer the creation of a pager
* until all shadows are not paging. This
* allows vm_object_collapse to work better and
* helps control swap space size.
vm_page_free_count
< vm_pageout_free_min
)
object
->shadow
->paging_in_progress
)
vm_object_collapse(object
);
if (!vm_page_lookup(object
, offset
))
if ((m
->flags
& PG_BUSY
) || (m
->hold_count
!= 0)) {
if( pager
= object
->pager
) {
for(i
=1;i
<VM_PAGEOUT_PAGE_COUNT
;i
++) {
if( ms
[i
] = vm_page_lookup( object
, offset
+i
*NBPG
)) {
if( ((ms
[i
]->flags
& (PG_CLEAN
|PG_INACTIVE
|PG_BUSY
)) == PG_INACTIVE
)
&& (ms
[i
]->wire_count
== 0)
&& (ms
[i
]->hold_count
== 0))
for(i
=0;i
<pageout_count
;i
++) {
pmap_page_protect(VM_PAGE_TO_PHYS(ms
[i
]), VM_PROT_READ
);
object
->paging_in_progress
+= pageout_count
;
vm_stat
.pageouts
+= pageout_count
;
pmap_page_protect(VM_PAGE_TO_PHYS(m
), VM_PROT_READ
);
object
->paging_in_progress
++;
pager
= vm_pager_allocate(PG_DFLT
, (caddr_t
)0,
object
->size
, VM_PROT_ALL
, 0);
vm_object_setpager(object
, pager
, 0, FALSE
);
* If there is no pager for the page,
* use the default pager. If there's
* no place to put the page at the
* moment, leave it in the laundry and
* hope that there will be paging space
if ((pager
&& pager
->pg_type
== PG_SWAP
) ||
vm_page_free_count
>= vm_pageout_free_min
) {
if( pageout_count
== 1) {
pageout_status
[0] = pager
?
((sync
|| (object
== kernel_object
)) ? TRUE
: FALSE
)) :
for(i
=0;i
<pageout_count
;i
++)
pageout_status
[i
] = VM_PAGER_FAIL
;
vm_pager_putmulti(pager
, ms
, pageout_count
,
((sync
|| (object
== kernel_object
)) ? TRUE
: FALSE
),
for(i
=0;i
<pageout_count
;i
++)
pageout_status
[i
] = VM_PAGER_FAIL
;
for(i
=0;i
<pageout_count
;i
++) {
switch (pageout_status
[i
]) {
ms
[i
]->flags
&= ~PG_LAUNDRY
;
ms
[i
]->flags
&= ~PG_LAUNDRY
;
* Page outside of range of object.
* Right now we essentially lose the
* changes by pretending it worked.
ms
[i
]->flags
&= ~PG_LAUNDRY
;
ms
[i
]->flags
|= PG_CLEAN
;
pmap_clear_modify(VM_PAGE_TO_PHYS(ms
[i
]));
* If page couldn't be paged out, then
* reactivate the page so it doesn't
* clog the inactive list. (We will
* try paging out it again later).
if (ms
[i
]->flags
& PG_INACTIVE
)
* If the operation is still going, leave
* the page busy to block all other accesses.
* Also, leave the paging in progress
* indicator set so that we don't attempt an
if (pageout_status
[i
] != VM_PAGER_PEND
) {
if (--object
->paging_in_progress
== 0)
wakeup((caddr_t
) object
);
if (pmap_is_referenced(VM_PAGE_TO_PHYS(ms
[i
]))) {
pmap_clear_reference(VM_PAGE_TO_PHYS(ms
[i
]));
if( ms
[i
]->flags
& PG_INACTIVE
)
* vm_pageout_object_deactivate_pages
* deactivate enough pages to satisfy the inactive target
* requirements or if vm_page_proc_limit is set, then
* deactivate all of the pages in the object and its
* The object and map must be locked.
vm_pageout_object_deactivate_pages(map
, object
, count
)
register vm_page_t p
, next
;
if( object
->shadow
->ref_count
> 1)
scount
/= object
->shadow
->ref_count
;
dcount
+= vm_pageout_object_deactivate_pages(map
, object
->shadow
, scount
);
if (object
->paging_in_progress
)
* scan the objects entire memory queue
rcount
= object
->resident_page_count
;
p
= (vm_page_t
) queue_first(&object
->memq
);
while ((rcount
-- > 0) && !queue_end(&object
->memq
, (queue_entry_t
) p
) ) {
next
= (vm_page_t
) queue_next(&p
->listq
);
* if a page is active, not wired and is in the processes pmap,
* then deactivate the page.
if ((p
->flags
& (PG_ACTIVE
|PG_BUSY
)) == PG_ACTIVE
&&
pmap_page_exists(vm_map_pmap(map
), VM_PAGE_TO_PHYS(p
))) {
if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p
))) {
p
->act_count
-= min(p
->act_count
, ACT_DECLINE
);
* if the page act_count is zero -- then we deactivate
pmap_page_protect(VM_PAGE_TO_PHYS(p
),
* else if on the next go-around we will deactivate the page
* we need to place the page on the end of the queue to age
* the other pages in memory.
queue_remove(&vm_page_queue_active
, p
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, p
, vm_page_t
, pageq
);
queue_remove(&object
->memq
, p
, vm_page_t
, listq
);
queue_enter(&object
->memq
, p
, vm_page_t
, listq
);
if (p
->flags
& PG_INACTIVE
) {
vm_page_inactive_count
> vm_page_inactive_target
) {
* Move the page to the bottom of the queue.
pmap_clear_reference(VM_PAGE_TO_PHYS(p
));
if (p
->act_count
< ACT_MAX
)
p
->act_count
+= ACT_ADVANCE
;
queue_remove(&object
->memq
, p
, vm_page_t
, listq
);
queue_enter(&object
->memq
, p
, vm_page_t
, listq
);
queue_remove(&vm_page_queue_active
, p
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, p
, vm_page_t
, pageq
);
* deactivate some number of pages in a map, try to do it fairly, but
* that is really hard to do.
vm_pageout_map_deactivate_pages(map
, entry
, count
, freeer
)
int (*freeer
)(vm_map_t
, vm_object_t
, int);
if (!lock_try_read(&map
->lock
)) {
while (tmpe
!= &map
->header
&& *count
> 0) {
vm_pageout_map_deactivate_pages(map
, tmpe
, count
, freeer
);
} else if (entry
->is_sub_map
|| entry
->is_a_map
) {
tmpm
= entry
->object
.share_map
;
tmpe
= tmpm
->header
.next
;
while (tmpe
!= &tmpm
->header
&& *count
> 0) {
vm_pageout_map_deactivate_pages(tmpm
, tmpe
, count
, freeer
);
} else if (obj
= entry
->object
.vm_object
) {
*count
-= (*freeer
)(map
, obj
, *count
);
lock_read_done(&map
->lock
);
* vm_pageout_scan does the dirty work for the pageout daemon.
int page_shortage
, maxscan
, maxlaunder
;
int pages_freed
, free
, nproc
;
* scan the processes for exceeding their rlimits or if process
* is swapped out -- deactivate pages
for (p
= allproc
; p
!= NULL
; p
= p
->p_nxt
) {
* if this is a system process or if we have already
* looked at this process, skip it.
if (p
->p_flag
& (SSYS
|SPAGEDAEMON
|SWEXIT
)) {
* if the process is in a non-running type state,
if (p
->p_stat
!= SRUN
&& p
->p_stat
!= SSLEEP
) {
limit
= min(p
->p_rlimit
[RLIMIT_RSS
].rlim_cur
,
p
->p_rlimit
[RLIMIT_RSS
].rlim_max
);
* let processes that are swapped out really be swapped out
* set the limit to nothing (will force a swap-out.)
if ((p
->p_flag
& SLOAD
) == 0)
size
= p
->p_vmspace
->vm_pmap
.pm_stats
.resident_count
* NBPG
;
overage
= (size
- limit
) / NBPG
;
vm_pageout_map_deactivate_pages(&p
->p_vmspace
->vm_map
,
(vm_map_entry_t
) 0, &overage
, vm_pageout_object_deactivate_pages
);
if (((vm_page_free_count
+ vm_page_inactive_count
) >=
(vm_page_inactive_target
+ vm_page_free_target
)) &&
(vm_page_free_count
>= vm_page_free_target
))
desired_free
= vm_page_free_target
;
* Start scanning the inactive queue for pages we can free.
* We keep scanning until we have enough free pages or
* we have scanned through the entire queue. If we
* encounter dirty pages, we start cleaning them.
maxlaunder
= (vm_page_free_target
- vm_page_free_count
);
maxscan
= vm_page_inactive_count
;
m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
if (queue_end(&vm_page_queue_inactive
, (queue_entry_t
) m
)
|| (vm_page_free_count
>= desired_free
)) {
next
= (vm_page_t
) queue_next(&m
->pageq
);
if( (m
->flags
& PG_INACTIVE
) == 0) {
printf("vm_pageout_scan: page not inactive?");
if (m
->hold_count
!= 0) {
* dont mess with busy pages
if (m
->flags
& PG_BUSY
) {
* if page is clean and but the page has been referenced,
* then reactivate the page, but if we are very low on memory
* or the page has not been referenced, then we free it to the
if (m
->flags
& PG_CLEAN
) {
if ((vm_page_free_count
> vm_pageout_free_min
) /* XXX */
&& pmap_is_referenced(VM_PAGE_TO_PHYS(m
))) {
} else if (!m
->act_count
) {
pmap_page_protect(VM_PAGE_TO_PHYS(m
),
m
->act_count
-= min(m
->act_count
, ACT_DECLINE
);
} else if ((m
->flags
& PG_LAUNDRY
) && maxlaunder
> 0) {
if (pmap_is_referenced(VM_PAGE_TO_PHYS(m
))) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m
));
* If a page is dirty, then it is either
* being washed (but not yet cleaned)
* or it is still in the laundry. If it is
* still in the laundry, then we start the
if (written
= vm_pageout_clean(m
,0)) {
* if the next page has been re-activated, start scanning again
if ((next
->flags
& PG_INACTIVE
) == 0)
} else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m
))) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m
));
* now check malloc area or swap processes out if we are in low
if (vm_page_free_count
< vm_page_free_min
) {
* swap out inactive processes
* Compute the page shortage. If we are still very low on memory
* be sure that we will move a minimal amount of pages from active
page_shortage
= vm_page_inactive_target
-
(vm_page_free_count
+ vm_page_inactive_count
);
if (page_shortage
<= 0) {
if( vm_page_free_count
< vm_page_free_min
) {
page_shortage
= vm_page_free_min
- vm_page_free_count
;
} else if(((vm_page_free_count
+ vm_page_inactive_count
) <
(vm_page_free_min
+ vm_page_inactive_target
))) {
m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
maxscan
= vm_page_active_count
;
while (maxscan
-- && (page_shortage
> 0)) {
if (queue_end(&vm_page_queue_active
, (queue_entry_t
) m
)) {
next
= (vm_page_t
) queue_next(&m
->pageq
);
* Don't deactivate pages that are busy.
if ((m
->flags
& PG_BUSY
) || (m
->hold_count
!= 0)) {
if (pmap_is_referenced(VM_PAGE_TO_PHYS(m
))) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m
));
if (m
->act_count
< ACT_MAX
)
m
->act_count
+= ACT_ADVANCE
;
queue_remove(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_remove(&m
->object
->memq
, m
, vm_page_t
, listq
);
queue_enter(&m
->object
->memq
, m
, vm_page_t
, listq
);
m
->act_count
-= min(m
->act_count
, ACT_DECLINE
);
* if the page act_count is zero -- then we deactivate
* else if on the next go-around we will deactivate the page
* we need to place the page on the end of the queue to age
* the other pages in memory.
queue_remove(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_remove(&m
->object
->memq
, m
, vm_page_t
, listq
);
queue_enter(&m
->object
->memq
, m
, vm_page_t
, listq
);
* if we have not freed any pages and we are desparate for memory
* then we keep trying until we get some (any) memory.
if( !force_wakeup
&& (swap_pager_full
|| !force_wakeup
||
(pages_freed
== 0 && (vm_page_free_count
< vm_page_free_min
)))){
vm_page_pagesfreed
+= pages_freed
;
int maxscan
, pages_scanned
, pages_referenced
, nextscan
, scantick
= hz
/20;
maxscan
= min(vm_page_active_count
, MAXSCAN
);
* Gather statistics on page usage.
m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
if (queue_end(&vm_page_queue_active
, (queue_entry_t
) m
)) {
next
= (vm_page_t
) queue_next(&m
->pageq
);
* Dont mess with pages that are busy.
if ((m
->flags
& PG_BUSY
) || (m
->hold_count
!= 0)) {
queue_remove(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
* Advance pages that have been referenced, decline pages that
if (pmap_is_referenced(VM_PAGE_TO_PHYS(m
))) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m
));
if (m
->act_count
< ACT_MAX
)
m
->act_count
+= ACT_ADVANCE
;
queue_remove(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_remove(&m
->object
->memq
, m
, vm_page_t
, listq
);
queue_enter(&m
->object
->memq
, m
, vm_page_t
, listq
);
m
->act_count
-= min(m
->act_count
, ACT_DECLINE
);
* if the page act_count is zero, and we are low on mem -- then we deactivate
(vm_page_free_count
+vm_page_inactive_count
< vm_page_free_target
+vm_page_inactive_target
)) {
* else if on the next go-around we will deactivate the page
* we need to place the page on the end of the queue to age
* the other pages in memory.
queue_remove(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
queue_remove(&m
->object
->memq
, m
, vm_page_t
, listq
);
queue_enter(&m
->object
->memq
, m
, vm_page_t
, listq
);
nextscan
= (pages_scanned
/ pages_referenced
) * scantick
;
nextscan
= max(nextscan
, scantick
);
nextscan
= min(nextscan
, hz
);
tsleep((caddr_t
) &vm_pagescanner
, PVM
, "scanw", nextscan
);
* vm_pageout is the high level pageout daemon.
extern npendingio
, swiopend
;
extern int vm_page_count
;
* Initialize some paging parameters.
vm_page_free_reserved
= 8;
if (vm_page_free_min
< 8)
if (vm_page_free_min
> 32)
vm_page_free_target
= 2*vm_page_free_min
+ vm_page_free_reserved
;
vm_page_inactive_target
= vm_page_free_count
/ 12;
vm_page_free_min
+= vm_page_free_reserved
;
(void) swap_pager_alloc(0, 0, 0, 0);
* The pageout daemon is never done, so loop
tsleep((caddr_t
) &vm_pages_needed
, PVM
, "psleep", 0);
* The force wakeup hack added to eliminate delays and potiential
* deadlock. It was possible for the page daemon to indefintely
* postpone waking up a process that it might be waiting for memory
* on. The putmulti stuff seems to have aggravated the situation.
force_wakeup
= vm_pageout_scan();
wakeup( (caddr_t
) &vm_page_free_count
);
wakeup((caddr_t
) kmem_map
);