* Copyright (c) 1988 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
* @(#)vm_mmap.c 8.1 (Berkeley) 6/18/93
* Mapped file (mmap) interface to VM
#include <sys/filedesc.h>
#include <sys/resourcevar.h>
#include <miscfs/specfs/specdev.h>
/* Not yet implemented */
/* Not yet implemented */
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
struct getpagesize_args
{
ogetpagesize(p
, uap
, retval
)
struct getpagesize_args
*uap
;
#endif /* COMPAT_43 || COMPAT_SUNOS */
register struct ommap_args
*uap
;
static const char cvtbsdprot
[8] = {
PROT_EXEC
|PROT_WRITE
|PROT_READ
,
#define OMAP_SHARED 0x0010
#define OMAP_FIXED 0x0100
#define OMAP_INHERIT 0x0800
nargs
.prot
= cvtbsdprot
[uap
->prot
&0x7];
if (uap
->flags
& OMAP_ANON
)
if (uap
->flags
& OMAP_COPY
)
if (uap
->flags
& OMAP_SHARED
)
nargs
.flags
|= MAP_SHARED
;
nargs
.flags
|= MAP_PRIVATE
;
if (uap
->flags
& OMAP_FIXED
)
nargs
.flags
|= MAP_FIXED
;
if (uap
->flags
& OMAP_INHERIT
)
nargs
.flags
|= MAP_INHERIT
;
return (mmap(p
, &nargs
, retval
));
register struct mmap_args
*uap
;
register struct filedesc
*fdp
= p
->p_fd
;
register struct file
*fp
;
prot
= uap
->prot
& VM_PROT_ALL
;
if (mmapdebug
& MDB_FOLLOW
)
printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
p
->p_pid
, uap
->addr
, uap
->len
, prot
,
flags
, uap
->fd
, (vm_offset_t
)uap
->pos
);
* Address (if FIXED) must be page aligned.
* Size is implicitly rounded to a page boundary.
addr
= (vm_offset_t
) uap
->addr
;
if (((flags
& MAP_FIXED
) && (addr
& PAGE_MASK
)) ||
(ssize_t
)uap
->len
< 0 || ((flags
& MAP_ANON
) && uap
->fd
!= -1))
size
= (vm_size_t
) round_page(uap
->len
);
* Check for illegal addresses. Watch out for address wrap...
* Note that VM_*_ADDRESS are not constants due to casts (argh).
if (VM_MAXUSER_ADDRESS
> 0 && addr
+ size
>= VM_MAXUSER_ADDRESS
)
if (VM_MIN_ADDRESS
> 0 && addr
< VM_MIN_ADDRESS
)
* XXX if no hint provided for a non-fixed mapping place it after
* the end of the largest possible heap.
* There should really be a pmap call to determine a reasonable
if (addr
== 0 && (flags
& MAP_FIXED
) == 0)
addr
= round_page(p
->p_vmspace
->vm_daddr
+ MAXDSIZ
);
* Mapping blank space is trivial.
* Mapping file, get fp for validation.
* Obtain vnode and make sure it is of appropriate type.
if (((unsigned)uap
->fd
) >= fdp
->fd_nfiles
||
(fp
= fdp
->fd_ofiles
[uap
->fd
]) == NULL
)
if (fp
->f_type
!= DTYPE_VNODE
)
vp
= (struct vnode
*)fp
->f_data
;
if (vp
->v_type
!= VREG
&& vp
->v_type
!= VCHR
)
* Ensure that file and memory protections are compatible.
* Note that we only worry about writability if mapping is
* shared; in this case, current and max prot are dictated
* XXX use the vnode instead? Problem is: what credentials
* do we use for determination? What if proc does a setuid?
maxprot
= VM_PROT_EXECUTE
; /* ??? */
else if (prot
& PROT_READ
)
if (flags
& MAP_SHARED
) {
maxprot
|= VM_PROT_WRITE
;
else if (prot
& PROT_WRITE
)
maxprot
|= VM_PROT_WRITE
;
error
= vm_mmap(&p
->p_vmspace
->vm_map
, &addr
, size
, prot
, maxprot
,
flags
, handle
, (vm_offset_t
)uap
->pos
);
vm_offset_t addr
, objoff
, oaddr
;
if (mmapdebug
& (MDB_FOLLOW
|MDB_SYNC
))
printf("msync(%d): addr %x len %x\n",
p
->p_pid
, uap
->addr
, uap
->len
);
if (((int)uap
->addr
& PAGE_MASK
) || uap
->len
< 0)
addr
= oaddr
= (vm_offset_t
)uap
->addr
;
osize
= (vm_size_t
)uap
->len
;
* Region must be entirely contained in a single entry
if (!vm_map_is_allocated(&p
->p_vmspace
->vm_map
, addr
, addr
+osize
,
* Determine the object associated with that entry
* (object is returned locked on KERN_SUCCESS)
rv
= vm_region(&p
->p_vmspace
->vm_map
, &addr
, &size
, &prot
, &mprot
,
&inherit
, &shared
, &object
, &objoff
);
if (mmapdebug
& MDB_SYNC
)
printf("msync: region: object %x addr %x size %d objoff %d\n",
object
, addr
, size
, objoff
);
* Do not msync non-vnoded backed objects.
if ((object
->flags
& OBJ_INTERNAL
) || object
->pager
== NULL
||
object
->pager
->pg_type
!= PG_VNODE
) {
vm_object_unlock(object
);
if (mmapdebug
& MDB_SYNC
)
printf("msync: cleaning/flushing object range [%x-%x)\n",
if (prot
& VM_PROT_WRITE
)
vm_object_page_clean(object
, objoff
, objoff
+osize
, FALSE
);
* Bummer, gotta flush all cached pages to ensure
* consistency with the file system cache.
vm_object_page_remove(object
, objoff
, objoff
+osize
);
vm_object_unlock(object
);
register struct munmap_args
*uap
;
if (mmapdebug
& MDB_FOLLOW
)
printf("munmap(%d): addr %x len %x\n",
p
->p_pid
, uap
->addr
, uap
->len
);
addr
= (vm_offset_t
) uap
->addr
;
if ((addr
& PAGE_MASK
) || uap
->len
< 0)
size
= (vm_size_t
) round_page(uap
->len
);
* Check for illegal addresses. Watch out for address wrap...
* Note that VM_*_ADDRESS are not constants due to casts (argh).
if (VM_MAXUSER_ADDRESS
> 0 && addr
+ size
>= VM_MAXUSER_ADDRESS
)
if (VM_MIN_ADDRESS
> 0 && addr
< VM_MIN_ADDRESS
)
if (!vm_map_is_allocated(&p
->p_vmspace
->vm_map
, addr
, addr
+ size
,
/* returns nothing but KERN_SUCCESS anyway */
(void) vm_map_remove(&p
->p_vmspace
->vm_map
, addr
, addr
+size
);
if (mmapdebug
& MDB_FOLLOW
)
printf("munmapfd(%d): fd %d\n", curproc
->p_pid
, fd
);
* XXX -- should vm_deallocate any regions mapped to this file
curproc
->p_fd
->fd_ofileflags
[fd
] &= ~UF_MAPPED
;
struct mprotect_args
*uap
;
if (mmapdebug
& MDB_FOLLOW
)
printf("mprotect(%d): addr %x len %x prot %d\n",
p
->p_pid
, uap
->addr
, uap
->len
, uap
->prot
);
addr
= (vm_offset_t
)uap
->addr
;
if ((addr
& PAGE_MASK
) || uap
->len
< 0)
size
= (vm_size_t
)uap
->len
;
prot
= uap
->prot
& VM_PROT_ALL
;
switch (vm_map_protect(&p
->p_vmspace
->vm_map
, addr
, addr
+size
, prot
,
case KERN_PROTECTION_FAILURE
:
struct madvise_args
*uap
;
/* Not yet implemented */
struct mincore_args
*uap
;
/* Not yet implemented */
extern int vm_page_max_wired
;
if (mmapdebug
& MDB_FOLLOW
)
printf("mlock(%d): addr %x len %x\n",
p
->p_pid
, uap
->addr
, uap
->len
);
addr
= (vm_offset_t
)uap
->addr
;
if ((addr
& PAGE_MASK
) || uap
->addr
+ uap
->len
< uap
->addr
)
size
= round_page((vm_size_t
)uap
->len
);
if (atop(size
) + cnt
.v_wire_count
> vm_page_max_wired
)
if (size
+ ptoa(pmap_wired_count(vm_map_pmap(&p
->p_vmspace
->vm_map
))) >
p
->p_rlimit
[RLIMIT_MEMLOCK
].rlim_cur
)
if (error
= suser(p
->p_ucred
, &p
->p_acflag
))
error
= vm_map_pageable(&p
->p_vmspace
->vm_map
, addr
, addr
+size
, FALSE
);
return (error
== KERN_SUCCESS
? 0 : ENOMEM
);
struct munlock_args
*uap
;
if (mmapdebug
& MDB_FOLLOW
)
printf("munlock(%d): addr %x len %x\n",
p
->p_pid
, uap
->addr
, uap
->len
);
addr
= (vm_offset_t
)uap
->addr
;
if ((addr
& PAGE_MASK
) || uap
->addr
+ uap
->len
< uap
->addr
)
if (error
= suser(p
->p_ucred
, &p
->p_acflag
))
size
= round_page((vm_size_t
)uap
->len
);
error
= vm_map_pageable(&p
->p_vmspace
->vm_map
, addr
, addr
+size
, TRUE
);
return (error
== KERN_SUCCESS
? 0 : ENOMEM
);
* Internal version of mmap.
* Currently used by mmap, exec, and sys5 shared memory.
* Handle is either a vnode pointer or NULL for MAP_ANON.
vm_mmap(map
, addr
, size
, prot
, maxprot
, flags
, handle
, foff
)
register vm_offset_t
*addr
;
caddr_t handle
; /* XXX should be vp */
register vm_pager_t pager
;
if ((flags
& MAP_FIXED
) == 0) {
*addr
= round_page(*addr
);
(void)vm_deallocate(map
, *addr
, size
);
* Lookup/allocate pager. All except an unnamed anonymous lookup
* gain a reference to ensure continued existance of the object.
* (XXX the exception is to appease the pageout daemon)
vp
= (struct vnode
*)handle
;
if (vp
->v_type
== VCHR
) {
handle
= (caddr_t
)vp
->v_rdev
;
pager
= vm_pager_allocate(type
, handle
, size
, prot
);
return (type
== PG_DEVICE
? EINVAL
: ENOMEM
);
* Find object and release extra reference gained by lookup
object
= vm_object_lookup(pager
);
vm_object_deallocate(object
);
rv
= vm_allocate_with_pager(map
, addr
, size
, fitit
,
if (rv
!= KERN_SUCCESS
) {
vm_pager_deallocate(pager
);
vm_object_deallocate(object
);
* Don't cache anonymous objects.
* Loses the reference gained by vm_pager_allocate.
* Note that object will be NULL when handle == NULL,
* this is ok since vm_allocate_with_pager has made
* sure that these objects are uncached.
(void) pager_cache(object
, FALSE
);
if (mmapdebug
& MDB_MAPIT
)
printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
curproc
->p_pid
, *addr
, size
, pager
);
* Distinguish between character special and regular files.
else if (vp
->v_type
== VCHR
) {
rv
= vm_allocate_with_pager(map
, addr
, size
, fitit
,
* Uncache the object and lose the reference gained
* by vm_pager_allocate(). If the call to
* vm_allocate_with_pager() was sucessful, then we
* gained an additional reference ensuring the object
* will continue to exist. If the call failed then
* the deallocate call below will terminate the
(void) pager_cache(object
, FALSE
);
printf("vm_mmap: no object: vp %x, pager %x\n",
* Allows modifications to go out to the vnode.
if (flags
& MAP_SHARED
) {
rv
= vm_allocate_with_pager(map
, addr
, size
,
if (rv
!= KERN_SUCCESS
) {
vm_object_deallocate(object
);
* Don't cache the object. This is the easiest way
* of ensuring that data gets back to the filesystem
* because vnode_pager_deallocate() will fsync the
* vnode. pager_cache() will lose the extra ref.
if (prot
& VM_PROT_WRITE
)
pager_cache(object
, FALSE
);
vm_object_deallocate(object
);
* Copy-on-write of file. Two flavors.
* MAP_COPY is true COW, you essentially get a snapshot of
* the region at the time of mapping. MAP_PRIVATE means only
* that your changes are not reflected back to the object.
* Changes made by others will be seen.
/* locate and allocate the target address space */
rv
= vm_map_find(map
, NULL
, (vm_offset_t
)0,
if (rv
!= KERN_SUCCESS
) {
vm_object_deallocate(object
);
tmap
= vm_map_create(pmap_create(size
), VM_MIN_ADDRESS
,
VM_MIN_ADDRESS
+size
, TRUE
);
rv
= vm_allocate_with_pager(tmap
, &off
, size
,
if (rv
!= KERN_SUCCESS
) {
vm_object_deallocate(object
);
* MAP_PRIVATE implies that we see changes made by
* others. To ensure that we need to guarentee that
* no copy object is created (otherwise original
* pages would be pushed to the copy object and we
* would never see changes made by others). We
* totally sleeze it right now by marking the object
if ((flags
& MAP_COPY
) == 0)
object
->flags
|= OBJ_INTERNAL
;
rv
= vm_map_copy(map
, tmap
, *addr
, size
, off
,
object
->flags
&= ~OBJ_INTERNAL
;
* My oh my, this only gets worse...
* Force creation of a shadow object so that
* vm_map_fork will do the right thing.
if ((flags
& MAP_COPY
) == 0) {
vm_map_lookup(&tmap
, *addr
, VM_PROT_WRITE
,
&tentry
, &tobject
, &toffset
,
vm_map_lookup_done(tmap
, tentry
);
* Map copy code cannot detect sharing unless a
* sharing map is involved. So we cheat and write
* protect everything ourselves.
vm_object_pmap_copy(object
, foff
, foff
+ size
);
vm_object_deallocate(object
);
if (mmapdebug
& MDB_MAPIT
)
printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
curproc
->p_pid
, *addr
, size
, pager
);
* Correct protection (default is VM_PROT_ALL).
* If maxprot is different than prot, we must set both explicitly.
if (maxprot
!= VM_PROT_ALL
)
rv
= vm_map_protect(map
, *addr
, *addr
+size
, maxprot
, TRUE
);
if (rv
== KERN_SUCCESS
&& prot
!= maxprot
)
rv
= vm_map_protect(map
, *addr
, *addr
+size
, prot
, FALSE
);
if (rv
!= KERN_SUCCESS
) {
(void) vm_deallocate(map
, *addr
, size
);
* Shared memory is also shared with children.
if (flags
& MAP_SHARED
) {
rv
= vm_inherit(map
, *addr
, size
, VM_INHERIT_SHARE
);
if (rv
!= KERN_SUCCESS
) {
(void) vm_deallocate(map
, *addr
, size
);
if (mmapdebug
& MDB_MAPIT
)
printf("vm_mmap: rv %d\n", rv
);
case KERN_INVALID_ADDRESS
:
case KERN_PROTECTION_FAILURE
:
* Internal bastardized version of MACHs vm_region system call.
* Given address and size it returns map attributes as well
* as the (locked) object mapped at that location.
vm_region(map
, addr
, size
, prot
, max_prot
, inheritance
, shared
, object
, objoff
)
vm_offset_t
*addr
; /* IN/OUT */
vm_size_t
*size
; /* OUT */
vm_prot_t
*prot
; /* OUT */
vm_prot_t
*max_prot
; /* OUT */
vm_inherit_t
*inheritance
; /* OUT */
boolean_t
*shared
; /* OUT */
vm_object_t
*object
; /* OUT */
vm_offset_t
*objoff
; /* OUT */
vm_map_entry_t tmp_entry
;
return(KERN_INVALID_ARGUMENT
);
if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
if ((entry
= tmp_entry
->next
) == &map
->header
) {
*prot
= entry
->protection
;
*max_prot
= entry
->max_protection
;
*inheritance
= entry
->inheritance
;
tmp_offset
= entry
->offset
+ (start
- entry
->start
);
*size
= (entry
->end
- start
);
register vm_map_t share_map
;
share_map
= entry
->object
.share_map
;
vm_map_lock_read(share_map
);
(void) vm_map_lookup_entry(share_map
, tmp_offset
, &tmp_entry
);
if ((share_size
= (tmp_entry
->end
- tmp_offset
)) < *size
)
vm_object_lock(tmp_entry
->object
);
*object
= tmp_entry
->object
.vm_object
;
*objoff
= tmp_entry
->offset
+ (tmp_offset
- tmp_entry
->start
);
*shared
= (share_map
->ref_count
!= 1);
vm_map_unlock_read(share_map
);
vm_object_lock(entry
->object
);
*object
= entry
->object
.vm_object
;
* Yet another bastard routine.
vm_allocate_with_pager(map
, addr
, size
, fitit
, pager
, poffset
, internal
)
register vm_offset_t
*addr
;
register vm_object_t object
;
return(KERN_INVALID_ARGUMENT
);
*addr
= trunc_page(*addr
);
* Lookup the pager/paging-space in the object cache.
* If it's not there, then create a new object and cache
object
= vm_object_lookup(pager
);
object
= vm_object_allocate(size
);
* From Mike Hibler: "unnamed anonymous objects should never
* be on the hash list ... For now you can just change
* vm_allocate_with_pager to not do vm_object_enter if this
* is an internal object ..."
vm_object_enter(object
, pager
);
object
->flags
|= OBJ_INTERNAL
;
object
->flags
&= ~OBJ_INTERNAL
;
result
= vm_map_find(map
, object
, poffset
, addr
, size
, fitit
);
if (result
!= KERN_SUCCESS
)
vm_object_deallocate(object
);
vm_object_setpager(object
, pager
, (vm_offset_t
) 0, TRUE
);
* XXX: this routine belongs in vm_map.c.
* Returns TRUE if the range [start - end) is allocated in either
* a single entry (single_entry == TRUE) or multiple contiguous
* entries (single_entry == FALSE).
* start and end should be page aligned.
vm_map_is_allocated(map
, start
, end
, single_entry
)
register vm_offset_t nend
;
* Start address not in any entry
if (!vm_map_lookup_entry(map
, start
, &mapent
)) {
* Find the maximum stretch of contiguously allocated space
while (mapent
!= &map
->header
&& mapent
->start
== nend
) {