fix munmapfd's arguments; it takes a struct proc _then_ an int.
[unix-history] / usr / src / sys / vm / vm_mmap.c
index 69473da..9be0c84 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1988 University of Utah.
 /*
  * Copyright (c) 1988 University of Utah.
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1991, 1993
+ *     The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
@@ -9,29 +9,30 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- * from: Utah $Hdr: vm_mmap.c 1.3 90/01/21$
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
  *
  *
- *     @(#)vm_mmap.c   7.5 (Berkeley) %G%
+ *     @(#)vm_mmap.c   8.9 (Berkeley) %G%
  */
 
 /*
  * Mapped file (mmap) interface to VM
  */
 
  */
 
 /*
  * Mapped file (mmap) interface to VM
  */
 
-#include "param.h"
-#include "systm.h"
-#include "filedesc.h"
-#include "proc.h"
-#include "vnode.h"
-#include "specdev.h"
-#include "file.h"
-#include "mman.h"
-#include "conf.h"
-
-#include "vm.h"
-#include "vm_pager.h"
-#include "vm_prot.h"
-#include "vm_statistics.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_prot.h>
 
 #ifdef DEBUG
 int mmapdebug = 0;
 
 #ifdef DEBUG
 int mmapdebug = 0;
@@ -40,23 +41,29 @@ int mmapdebug = 0;
 #define MDB_MAPIT      0x04
 #endif
 
 #define MDB_MAPIT      0x04
 #endif
 
+struct sbrk_args {
+       int     incr;
+};
 /* ARGSUSED */
 /* ARGSUSED */
-getpagesize(p, uap, retval)
+int
+sbrk(p, uap, retval)
        struct proc *p;
        struct proc *p;
-       void *uap;
+       struct sbrk_args *uap;
        int *retval;
 {
 
        int *retval;
 {
 
-       *retval = NBPG * CLSIZE;
-       return (0);
+       /* Not yet implemented */
+       return (EOPNOTSUPP);
 }
 
 }
 
+struct sstk_args {
+       int     incr;
+};
 /* ARGSUSED */
 /* ARGSUSED */
-sbrk(p, uap, retval)
+int
+sstk(p, uap, retval)
        struct proc *p;
        struct proc *p;
-       struct args {
-               int     incr;
-       } *uap;
+       struct sstk_args *uap;
        int *retval;
 {
 
        int *retval;
 {
 
@@ -64,209 +71,301 @@ sbrk(p, uap, retval)
        return (EOPNOTSUPP);
 }
 
        return (EOPNOTSUPP);
 }
 
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct getpagesize_args {
+       int     dummy;
+};
 /* ARGSUSED */
 /* ARGSUSED */
-sstk(p, uap, retval)
+int
+ogetpagesize(p, uap, retval)
        struct proc *p;
        struct proc *p;
-       struct args {
-               int     incr;
-       } *uap;
+       struct getpagesize_args *uap;
        int *retval;
 {
 
        int *retval;
 {
 
-       /* Not yet implemented */
-       return (EOPNOTSUPP);
+       *retval = PAGE_SIZE;
+       return (0);
 }
 }
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct mmap_args {
+       caddr_t addr;
+       size_t  len;
+       int     prot;
+       int     flags;
+       int     fd;
+       long    pad;
+       off_t   pos;
+};
+
+#ifdef COMPAT_43
+struct ommap_args {
+       caddr_t addr;
+       int     len;
+       int     prot;
+       int     flags;
+       int     fd;
+       long    pos;
+};
+int
+ommap(p, uap, retval)
+       struct proc *p;
+       register struct ommap_args *uap;
+       int *retval;
+{
+       struct mmap_args nargs;
+       static const char cvtbsdprot[8] = {
+               0,
+               PROT_EXEC,
+               PROT_WRITE,
+               PROT_EXEC|PROT_WRITE,
+               PROT_READ,
+               PROT_EXEC|PROT_READ,
+               PROT_WRITE|PROT_READ,
+               PROT_EXEC|PROT_WRITE|PROT_READ,
+       };
+#define        OMAP_ANON       0x0002
+#define        OMAP_COPY       0x0020
+#define        OMAP_SHARED     0x0010
+#define        OMAP_FIXED      0x0100
+#define        OMAP_INHERIT    0x0800
+
+       nargs.addr = uap->addr;
+       nargs.len = uap->len;
+       nargs.prot = cvtbsdprot[uap->prot&0x7];
+       nargs.flags = 0;
+       if (uap->flags & OMAP_ANON)
+               nargs.flags |= MAP_ANON;
+       if (uap->flags & OMAP_COPY)
+               nargs.flags |= MAP_COPY;
+       if (uap->flags & OMAP_SHARED)
+               nargs.flags |= MAP_SHARED;
+       else
+               nargs.flags |= MAP_PRIVATE;
+       if (uap->flags & OMAP_FIXED)
+               nargs.flags |= MAP_FIXED;
+       if (uap->flags & OMAP_INHERIT)
+               nargs.flags |= MAP_INHERIT;
+       nargs.fd = uap->fd;
+       nargs.pos = uap->pos;
+       return (mmap(p, &nargs, retval));
+}
+#endif
 
 
-smmap(p, uap, retval)
+int
+mmap(p, uap, retval)
        struct proc *p;
        struct proc *p;
-       register struct args {
-               caddr_t addr;
-               int     len;
-               int     prot;
-               int     flags;
-               int     fd;
-               off_t   pos;
-       } *uap;
+       register struct mmap_args *uap;
        int *retval;
 {
        register struct filedesc *fdp = p->p_fd;
        register struct file *fp;
        struct vnode *vp;
        int *retval;
 {
        register struct filedesc *fdp = p->p_fd;
        register struct file *fp;
        struct vnode *vp;
-       vm_offset_t addr;
+       vm_offset_t addr, pos;
        vm_size_t size;
        vm_size_t size;
-       vm_prot_t prot;
+       vm_prot_t prot, maxprot;
        caddr_t handle;
        caddr_t handle;
-       int mtype, error;
+       int flags, error;
 
 
+       prot = uap->prot & VM_PROT_ALL;
+       flags = uap->flags;
+       pos = uap->pos;
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
                printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
                printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
-                      p->p_pid, uap->addr, uap->len, uap->prot,
-                      uap->flags, uap->fd, uap->pos);
+                      p->p_pid, uap->addr, uap->len, prot,
+                      flags, uap->fd, pos);
 #endif
 #endif
-       /*
-        * Make sure one of the sharing types is specified
-        */
-       mtype = uap->flags & MAP_TYPE;
-       switch (mtype) {
-       case MAP_FILE:
-       case MAP_ANON:
-               break;
-       default:
-               return(EINVAL);
-       }
        /*
         * Address (if FIXED) must be page aligned.
         * Size is implicitly rounded to a page boundary.
        /*
         * Address (if FIXED) must be page aligned.
         * Size is implicitly rounded to a page boundary.
+        *
+        * XXX most (all?) vendors require that the file offset be
+        * page aligned as well.  However, we already have applications
+        * (e.g. nlist) that rely on unrestricted alignment.  Since we
+        * support it, let it happen.
         */
        addr = (vm_offset_t) uap->addr;
         */
        addr = (vm_offset_t) uap->addr;
-       if ((uap->flags & MAP_FIXED) && (addr & page_mask) || uap->len < 0)
-               return(EINVAL);
+       if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
+#if 0
+           ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
+#endif
+           (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
+               return (EINVAL);
        size = (vm_size_t) round_page(uap->len);
        /*
        size = (vm_size_t) round_page(uap->len);
        /*
-        * XXX if no hint provided for a non-fixed mapping place it after
-        * the end of the largest possible heap.
+        * Check for illegal addresses.  Watch out for address wrap...
+        * Note that VM_*_ADDRESS are not constants due to casts (argh).
+        */
+       if (flags & MAP_FIXED) {
+               if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+                       return (EINVAL);
+               if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+                       return (EINVAL);
+               if (addr > addr + size)
+                       return (EINVAL);
+       }
+       /*
+        * XXX for non-fixed mappings where no hint is provided or
+        * the hint would fall in the potential heap space,
+        * place it after the end of the largest possible heap.
         *
         * There should really be a pmap call to determine a reasonable
         * location.
         */
         *
         * There should really be a pmap call to determine a reasonable
         * location.
         */
-       if (addr == 0 && (uap->flags & MAP_FIXED) == 0)
+       else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
                addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
                addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
-       /*
-        * Mapping file or named anonymous, get fp for validation
-        */
-       if (mtype == MAP_FILE || uap->fd != -1) {
-               if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
-                   (fp = fdp->fd_ofiles[uap->fd]) == NULL)
-                       return(EBADF);
-       }
-       /*
-        * If we are mapping a file we need to check various
-        * file/vnode related things.
-        */
-       if (mtype == MAP_FILE) {
+       if (flags & MAP_ANON) {
                /*
                /*
-                * Obtain vnode and make sure it is of appropriate type
+                * Mapping blank space is trivial.
                 */
                 */
+               handle = NULL;
+               maxprot = VM_PROT_ALL;
+               pos = 0;
+       } else {
+               /*
+                * Mapping file, get fp for validation.
+                * Obtain vnode and make sure it is of appropriate type.
+                */
+               if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
+                   (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+                       return (EBADF);
                if (fp->f_type != DTYPE_VNODE)
                if (fp->f_type != DTYPE_VNODE)
-                       return(EINVAL);
+                       return (EINVAL);
                vp = (struct vnode *)fp->f_data;
                if (vp->v_type != VREG && vp->v_type != VCHR)
                vp = (struct vnode *)fp->f_data;
                if (vp->v_type != VREG && vp->v_type != VCHR)
-                       return(EINVAL);
+                       return (EINVAL);
                /*
                /*
-                * Ensure that file protection and desired protection
-                * are compatible.  Note that we only worry about writability
-                * if mapping is shared.
+                * XXX hack to handle use of /dev/zero to map anon
+                * memory (ala SunOS).
                 */
                 */
-               if ((uap->prot & PROT_READ) && (fp->f_flag & FREAD) == 0 ||
-                   ((uap->flags & MAP_SHARED) &&
-                    (uap->prot & PROT_WRITE) && (fp->f_flag & FWRITE) == 0))
-                       return(EACCES);
-               handle = (caddr_t)vp;
-       } else if (uap->fd != -1)
-               handle = (caddr_t)fp;
-       else
-               handle = NULL;
-       /*
-        * Map protections to MACH style
-        */
-       prot = VM_PROT_NONE;
-       if (uap->prot & PROT_READ)
-               prot |= VM_PROT_READ;
-       if (uap->prot & PROT_WRITE)
-               prot |= VM_PROT_WRITE;
-       if (uap->prot & PROT_EXEC)
-               prot |= VM_PROT_EXECUTE;
-
-       error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot,
-                       uap->flags, handle, (vm_offset_t)uap->pos);
+               if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+                       handle = NULL;
+                       maxprot = VM_PROT_ALL;
+                       flags |= MAP_ANON;
+               } else {
+                       /*
+                        * Ensure that file and memory protections are
+                        * compatible.  Note that we only worry about
+                        * writability if mapping is shared; in this case,
+                        * current and max prot are dictated by the open file.
+                        * XXX use the vnode instead?  Problem is: what
+                        * credentials do we use for determination?
+                        * What if proc does a setuid?
+                        */
+                       maxprot = VM_PROT_EXECUTE;      /* ??? */
+                       if (fp->f_flag & FREAD)
+                               maxprot |= VM_PROT_READ;
+                       else if (prot & PROT_READ)
+                               return (EACCES);
+                       if (flags & MAP_SHARED) {
+                               if (fp->f_flag & FWRITE)
+                                       maxprot |= VM_PROT_WRITE;
+                               else if (prot & PROT_WRITE)
+                                       return (EACCES);
+                       } else
+                               maxprot |= VM_PROT_WRITE;
+                       handle = (caddr_t)vp;
+               }
+       }
+       error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+           flags, handle, pos);
        if (error == 0)
        if (error == 0)
-               *retval = (int) addr;
-       return(error);
+               *retval = (int)addr;
+       return (error);
 }
 
 }
 
+struct msync_args {
+       caddr_t addr;
+       int     len;
+};
+int
 msync(p, uap, retval)
        struct proc *p;
 msync(p, uap, retval)
        struct proc *p;
-       struct args {
-               caddr_t addr;
-               int     len;
-       } *uap;
+       struct msync_args *uap;
        int *retval;
 {
        int *retval;
 {
-       vm_offset_t addr, objoff, oaddr;
-       vm_size_t size, osize;
-       vm_prot_t prot, mprot;
-       vm_inherit_t inherit;
-       vm_object_t object;
-       boolean_t shared;
+       vm_offset_t addr;
+       vm_size_t size;
+       vm_map_t map;
        int rv;
        int rv;
+       boolean_t syncio, invalidate;
 
 #ifdef DEBUG
        if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
                printf("msync(%d): addr %x len %x\n",
                       p->p_pid, uap->addr, uap->len);
 #endif
 
 #ifdef DEBUG
        if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
                printf("msync(%d): addr %x len %x\n",
                       p->p_pid, uap->addr, uap->len);
 #endif
-       if (((int)uap->addr & page_mask) || uap->len < 0)
-               return(EINVAL);
-       addr = oaddr = (vm_offset_t)uap->addr;
-       osize = (vm_size_t)uap->len;
-       /*
-        * Region must be entirely contained in a single entry
-        */
-       if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+osize,
-           TRUE))
-               return(EINVAL);
+       if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+               return (EINVAL);
+       map = &p->p_vmspace->vm_map;
+       addr = (vm_offset_t)uap->addr;
+       size = (vm_size_t)uap->len;
        /*
        /*
-        * Determine the object associated with that entry
-        * (object is returned locked on KERN_SUCCESS)
+        * XXX Gak!  If size is zero we are supposed to sync "all modified
+        * pages with the region containing addr".  Unfortunately, we
+        * don't really keep track of individual mmaps so we approximate
+        * by flushing the range of the map entry containing addr.
+        * This can be incorrect if the region splits or is coalesced
+        * with a neighbor.
         */
         */
-       rv = vm_region(&p->p_vmspace->vm_map, &addr, &size, &prot, &mprot,
-                      &inherit, &shared, &object, &objoff);
-       if (rv != KERN_SUCCESS)
-               return(EINVAL);
+       if (size == 0) {
+               vm_map_entry_t entry;
+
+               vm_map_lock_read(map);
+               rv = vm_map_lookup_entry(map, addr, &entry);
+               vm_map_unlock_read(map);
+               if (!rv)
+                       return (EINVAL);
+               addr = entry->start;
+               size = entry->end - entry->start;
+       }
 #ifdef DEBUG
        if (mmapdebug & MDB_SYNC)
 #ifdef DEBUG
        if (mmapdebug & MDB_SYNC)
-               printf("msync: region: object %x addr %x size %d objoff %d\n",
-                      object, addr, size, objoff);
+               printf("msync: cleaning/flushing address range [%x-%x)\n",
+                      addr, addr+size);
 #endif
        /*
 #endif
        /*
-        * Do not msync non-vnoded backed objects.
+        * Could pass this in as a third flag argument to implement
+        * Sun's MS_ASYNC.
         */
         */
-       if (object->internal || object->pager == NULL ||
-           object->pager->pg_type != PG_VNODE) {
-               vm_object_unlock(object);
-               return(EINVAL);
-       }
-       objoff += oaddr - addr;
-       if (osize == 0)
-               osize = size;
-#ifdef DEBUG
-       if (mmapdebug & MDB_SYNC)
-               printf("msync: cleaning/flushing object range [%x-%x)\n",
-                      objoff, objoff+osize);
-#endif
-       if (prot & VM_PROT_WRITE)
-               vm_object_page_clean(object, objoff, objoff+osize);
+       syncio = TRUE;
        /*
        /*
-        * (XXX)
-        * Bummer, gotta flush all cached pages to ensure
-        * consistency with the file system cache.
+        * XXX bummer, gotta flush all cached pages to ensure
+        * consistency with the file system cache.  Otherwise, we could
+        * pass this in to implement Sun's MS_INVALIDATE.
         */
         */
-       vm_object_page_remove(object, objoff, objoff+osize);
-       vm_object_unlock(object);
-       return(0);
+       invalidate = TRUE;
+       /*
+        * Clean the pages and interpret the return value.
+        */
+       rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
+       switch (rv) {
+       case KERN_SUCCESS:
+               break;
+       case KERN_INVALID_ADDRESS:
+               return (EINVAL);        /* Sun returns ENOMEM? */
+       case KERN_FAILURE:
+               return (EIO);
+       default:
+               return (EINVAL);
+       }
+       return (0);
 }
 
 }
 
+struct munmap_args {
+       caddr_t addr;
+       int     len;
+};
+int
 munmap(p, uap, retval)
        register struct proc *p;
 munmap(p, uap, retval)
        register struct proc *p;
-       register struct args {
-               caddr_t addr;
-               int     len;
-       } *uap;
+       register struct munmap_args *uap;
        int *retval;
 {
        vm_offset_t addr;
        vm_size_t size;
        int *retval;
 {
        vm_offset_t addr;
        vm_size_t size;
+       vm_map_t map;
 
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
 
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
@@ -275,39 +374,60 @@ munmap(p, uap, retval)
 #endif
 
        addr = (vm_offset_t) uap->addr;
 #endif
 
        addr = (vm_offset_t) uap->addr;
-       if ((addr & page_mask) || uap->len < 0)
+       if ((addr & PAGE_MASK) || uap->len < 0)
                return(EINVAL);
        size = (vm_size_t) round_page(uap->len);
        if (size == 0)
                return(0);
                return(EINVAL);
        size = (vm_size_t) round_page(uap->len);
        if (size == 0)
                return(0);
-       if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+size,
-           FALSE))
+       /*
+        * Check for illegal addresses.  Watch out for address wrap...
+        * Note that VM_*_ADDRESS are not constants due to casts (argh).
+        */
+       if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+               return (EINVAL);
+       if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+               return (EINVAL);
+       if (addr > addr + size)
+               return (EINVAL);
+       map = &p->p_vmspace->vm_map;
+       /*
+        * Make sure entire range is allocated.
+        * XXX this seemed overly restrictive, so we relaxed it.
+        */
+#if 0
+       if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
                return(EINVAL);
                return(EINVAL);
+#endif
        /* returns nothing but KERN_SUCCESS anyway */
        /* returns nothing but KERN_SUCCESS anyway */
-       (void) vm_map_remove(&p->p_vmspace->vm_map, addr, addr+size);
+       (void) vm_map_remove(map, addr, addr+size);
        return(0);
 }
 
        return(0);
 }
 
-munmapfd(fd)
+void
+munmapfd(p, fd)
+       struct proc *p;
+       int fd;
 {
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
 {
 #ifdef DEBUG
        if (mmapdebug & MDB_FOLLOW)
-               printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
+               printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
 #endif
 
        /*
 #endif
 
        /*
-        * XXX -- should vm_deallocate any regions mapped to this file
+        * XXX should vm_deallocate any regions mapped to this file
         */
         */
-       curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+       p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
 }
 
 }
 
+struct mprotect_args {
+       caddr_t addr;
+       int     len;
+       int     prot;
+};
+int
 mprotect(p, uap, retval)
        struct proc *p;
 mprotect(p, uap, retval)
        struct proc *p;
-       struct args {
-               caddr_t addr;
-               int     len;
-               int     prot;
-       } *uap;
+       struct mprotect_args *uap;
        int *retval;
 {
        vm_offset_t addr;
        int *retval;
 {
        vm_offset_t addr;
@@ -320,20 +440,11 @@ mprotect(p, uap, retval)
                       p->p_pid, uap->addr, uap->len, uap->prot);
 #endif
 
                       p->p_pid, uap->addr, uap->len, uap->prot);
 #endif
 
-       addr = (vm_offset_t) uap->addr;
-       if ((addr & page_mask) || uap->len < 0)
+       addr = (vm_offset_t)uap->addr;
+       if ((addr & PAGE_MASK) || uap->len < 0)
                return(EINVAL);
                return(EINVAL);
-       size = (vm_size_t) uap->len;
-       /*
-        * Map protections
-        */
-       prot = VM_PROT_NONE;
-       if (uap->prot & PROT_READ)
-               prot |= VM_PROT_READ;
-       if (uap->prot & PROT_WRITE)
-               prot |= VM_PROT_WRITE;
-       if (uap->prot & PROT_EXEC)
-               prot |= VM_PROT_EXECUTE;
+       size = (vm_size_t)uap->len;
+       prot = uap->prot & VM_PROT_ALL;
 
        switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
            FALSE)) {
 
        switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
            FALSE)) {
@@ -345,14 +456,16 @@ mprotect(p, uap, retval)
        return (EINVAL);
 }
 
        return (EINVAL);
 }
 
+struct madvise_args {
+       caddr_t addr;
+       int     len;
+       int     behav;
+};
 /* ARGSUSED */
 /* ARGSUSED */
+int
 madvise(p, uap, retval)
        struct proc *p;
 madvise(p, uap, retval)
        struct proc *p;
-       struct args {
-               caddr_t addr;
-               int     len;
-               int     behav;
-       } *uap;
+       struct madvise_args *uap;
        int *retval;
 {
 
        int *retval;
 {
 
@@ -360,14 +473,16 @@ madvise(p, uap, retval)
        return (EOPNOTSUPP);
 }
 
        return (EOPNOTSUPP);
 }
 
+struct mincore_args {
+       caddr_t addr;
+       int     len;
+       char    *vec;
+};
 /* ARGSUSED */
 /* ARGSUSED */
+int
 mincore(p, uap, retval)
        struct proc *p;
 mincore(p, uap, retval)
        struct proc *p;
-       struct args {
-               caddr_t addr;
-               int     len;
-               char    *vec;
-       } *uap;
+       struct mincore_args *uap;
        int *retval;
 {
 
        int *retval;
 {
 
@@ -375,18 +490,88 @@ mincore(p, uap, retval)
        return (EOPNOTSUPP);
 }
 
        return (EOPNOTSUPP);
 }
 
+struct mlock_args {
+       caddr_t addr;
+       size_t  len;
+};
+int
+mlock(p, uap, retval)
+       struct proc *p;
+       struct mlock_args *uap;
+       int *retval;
+{
+       vm_offset_t addr;
+       vm_size_t size;
+       int error;
+       extern int vm_page_max_wired;
+
+#ifdef DEBUG
+       if (mmapdebug & MDB_FOLLOW)
+               printf("mlock(%d): addr %x len %x\n",
+                      p->p_pid, uap->addr, uap->len);
+#endif
+       addr = (vm_offset_t)uap->addr;
+       if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+               return (EINVAL);
+       size = round_page((vm_size_t)uap->len);
+       if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
+               return (EAGAIN);
+#ifdef pmap_wired_count
+       if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+           p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+               return (EAGAIN);
+#else
+       if (error = suser(p->p_ucred, &p->p_acflag))
+               return (error);
+#endif
+
+       error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+       return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+struct munlock_args {
+       caddr_t addr;
+       size_t  len;
+};
+int
+munlock(p, uap, retval)
+       struct proc *p;
+       struct munlock_args *uap;
+       int *retval;
+{
+       vm_offset_t addr;
+       vm_size_t size;
+       int error;
+
+#ifdef DEBUG
+       if (mmapdebug & MDB_FOLLOW)
+               printf("munlock(%d): addr %x len %x\n",
+                      p->p_pid, uap->addr, uap->len);
+#endif
+       addr = (vm_offset_t)uap->addr;
+       if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+               return (EINVAL);
+#ifndef pmap_wired_count
+       if (error = suser(p->p_ucred, &p->p_acflag))
+               return (error);
+#endif
+       size = round_page((vm_size_t)uap->len);
+
+       error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+       return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
 /*
  * Internal version of mmap.
  * Currently used by mmap, exec, and sys5 shared memory.
 /*
  * Internal version of mmap.
  * Currently used by mmap, exec, and sys5 shared memory.
- * Handle is:
- *     MAP_FILE: a vnode pointer
- *     MAP_ANON: NULL or a file pointer
+ * Handle is either a vnode pointer or NULL for MAP_ANON.
  */
  */
-vm_mmap(map, addr, size, prot, flags, handle, foff)
+int
+vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
        register vm_map_t map;
        register vm_offset_t *addr;
        register vm_size_t size;
        register vm_map_t map;
        register vm_offset_t *addr;
        register vm_size_t size;
-       vm_prot_t prot;
+       vm_prot_t prot, maxprot;
        register int flags;
        caddr_t handle;         /* XXX should be vp */
        vm_offset_t foff;
        register int flags;
        caddr_t handle;         /* XXX should be vp */
        vm_offset_t foff;
@@ -394,7 +579,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
        register vm_pager_t pager;
        boolean_t fitit;
        vm_object_t object;
        register vm_pager_t pager;
        boolean_t fitit;
        vm_object_t object;
-       struct vnode *vp;
+       struct vnode *vp = NULL;
        int type;
        int rv = KERN_SUCCESS;
 
        int type;
        int rv = KERN_SUCCESS;
 
@@ -406,7 +591,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                *addr = round_page(*addr);
        } else {
                fitit = FALSE;
                *addr = round_page(*addr);
        } else {
                fitit = FALSE;
-               (void) vm_deallocate(map, *addr, size);
+               (void)vm_deallocate(map, *addr, size);
        }
 
        /*
        }
 
        /*
@@ -414,7 +599,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
         * gain a reference to ensure continued existance of the object.
         * (XXX the exception is to appease the pageout daemon)
         */
         * gain a reference to ensure continued existance of the object.
         * (XXX the exception is to appease the pageout daemon)
         */
-       if ((flags & MAP_TYPE) == MAP_ANON)
+       if (flags & MAP_ANON)
                type = PG_DFLT;
        else {
                vp = (struct vnode *)handle;
                type = PG_DFLT;
        else {
                vp = (struct vnode *)handle;
@@ -424,7 +609,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                } else
                        type = PG_VNODE;
        }
                } else
                        type = PG_VNODE;
        }
-       pager = vm_pager_allocate(type, handle, size, prot);
+       pager = vm_pager_allocate(type, handle, size, prot, foff);
        if (pager == NULL)
                return (type == PG_DEVICE ? EINVAL : ENOMEM);
        /*
        if (pager == NULL)
                return (type == PG_DEVICE ? EINVAL : ENOMEM);
        /*
@@ -436,9 +621,9 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
        /*
         * Anonymous memory.
         */
        /*
         * Anonymous memory.
         */
-       if ((flags & MAP_TYPE) == MAP_ANON) {
+       if (flags & MAP_ANON) {
                rv = vm_allocate_with_pager(map, addr, size, fitit,
                rv = vm_allocate_with_pager(map, addr, size, fitit,
-                                           pager, (vm_offset_t)foff, TRUE);
+                                           pager, foff, TRUE);
                if (rv != KERN_SUCCESS) {
                        if (handle == NULL)
                                vm_pager_deallocate(pager);
                if (rv != KERN_SUCCESS) {
                        if (handle == NULL)
                                vm_pager_deallocate(pager);
@@ -449,6 +634,9 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                /*
                 * Don't cache anonymous objects.
                 * Loses the reference gained by vm_pager_allocate.
                /*
                 * Don't cache anonymous objects.
                 * Loses the reference gained by vm_pager_allocate.
+                * Note that object will be NULL when handle == NULL,
+                * this is ok since vm_allocate_with_pager has made
+                * sure that these objects are uncached.
                 */
                (void) pager_cache(object, FALSE);
 #ifdef DEBUG
                 */
                (void) pager_cache(object, FALSE);
 #ifdef DEBUG
@@ -458,12 +646,12 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
 #endif
        }
        /*
 #endif
        }
        /*
-        * Must be type MAP_FILE.
+        * Must be a mapped file.
         * Distinguish between character special and regular files.
         */
        else if (vp->v_type == VCHR) {
                rv = vm_allocate_with_pager(map, addr, size, fitit,
         * Distinguish between character special and regular files.
         */
        else if (vp->v_type == VCHR) {
                rv = vm_allocate_with_pager(map, addr, size, fitit,
-                                           pager, (vm_offset_t)foff, FALSE);
+                                           pager, foff, FALSE);
                /*
                 * Uncache the object and lose the reference gained
                 * by vm_pager_allocate().  If the call to
                /*
                 * Uncache the object and lose the reference gained
                 * by vm_pager_allocate().  If the call to
@@ -493,7 +681,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                if (flags & MAP_SHARED) {
                        rv = vm_allocate_with_pager(map, addr, size,
                                                    fitit, pager,
                if (flags & MAP_SHARED) {
                        rv = vm_allocate_with_pager(map, addr, size,
                                                    fitit, pager,
-                                                   (vm_offset_t)foff, FALSE);
+                                                   foff, FALSE);
                        if (rv != KERN_SUCCESS) {
                                vm_object_deallocate(object);
                                goto out;
                        if (rv != KERN_SUCCESS) {
                                vm_object_deallocate(object);
                                goto out;
@@ -532,7 +720,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                        off = VM_MIN_ADDRESS;
                        rv = vm_allocate_with_pager(tmap, &off, size,
                                                    TRUE, pager,
                        off = VM_MIN_ADDRESS;
                        rv = vm_allocate_with_pager(tmap, &off, size,
                                                    TRUE, pager,
-                                                   (vm_offset_t)foff, FALSE);
+                                                   foff, FALSE);
                        if (rv != KERN_SUCCESS) {
                                vm_object_deallocate(object);
                                vm_map_deallocate(tmap);
                        if (rv != KERN_SUCCESS) {
                                vm_object_deallocate(object);
                                vm_map_deallocate(tmap);
@@ -549,10 +737,10 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                         * internal temporarily.
                         */
                        if ((flags & MAP_COPY) == 0)
                         * internal temporarily.
                         */
                        if ((flags & MAP_COPY) == 0)
-                               object->internal = TRUE;
+                               object->flags |= OBJ_INTERNAL;
                        rv = vm_map_copy(map, tmap, *addr, size, off,
                                         FALSE, FALSE);
                        rv = vm_map_copy(map, tmap, *addr, size, off,
                                         FALSE, FALSE);
-                       object->internal = FALSE;
+                       object->flags &= ~OBJ_INTERNAL;
                        /*
                         * (XXX)
                         * My oh my, this only gets worse...
                        /*
                         * (XXX)
                         * My oh my, this only gets worse...
@@ -579,8 +767,7 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
                         * sharing map is involved.  So we cheat and write
                         * protect everything ourselves.
                         */
                         * sharing map is involved.  So we cheat and write
                         * protect everything ourselves.
                         */
-                       vm_object_pmap_copy(object, (vm_offset_t)foff,
-                                           (vm_offset_t)foff+size);
+                       vm_object_pmap_copy(object, foff, foff + size);
                        vm_object_deallocate(object);
                        vm_map_deallocate(tmap);
                        if (rv != KERN_SUCCESS)
                        vm_object_deallocate(object);
                        vm_map_deallocate(tmap);
                        if (rv != KERN_SUCCESS)
@@ -594,24 +781,22 @@ vm_mmap(map, addr, size, prot, flags, handle, foff)
        }
        /*
         * Correct protection (default is VM_PROT_ALL).
        }
        /*
         * Correct protection (default is VM_PROT_ALL).
-        * Note that we set the maximum protection.  This may not be
-        * entirely correct.  Maybe the maximum protection should be based
-        * on the object permissions where it makes sense (e.g. a vnode).
-        *
-        * Changed my mind: leave max prot at VM_PROT_ALL.
+        * If maxprot is different than prot, we must set both explicitly.
         */
         */
-       if (prot != VM_PROT_ALL) {
+       rv = KERN_SUCCESS;
+       if (maxprot != VM_PROT_ALL)
+               rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
+       if (rv == KERN_SUCCESS && prot != maxprot)
                rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
                rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
-               if (rv != KERN_SUCCESS) {
-                       (void) vm_deallocate(map, *addr, size);
-                       goto out;
-               }
+       if (rv != KERN_SUCCESS) {
+               (void) vm_deallocate(map, *addr, size);
+               goto out;
        }
        /*
         * Shared memory is also shared with children.
         */
        if (flags & MAP_SHARED) {
        }
        /*
         * Shared memory is also shared with children.
         */
        if (flags & MAP_SHARED) {
-               rv = vm_inherit(map, *addr, size, VM_INHERIT_SHARE);
+               rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
                if (rv != KERN_SUCCESS) {
                        (void) vm_deallocate(map, *addr, size);
                        goto out;
                if (rv != KERN_SUCCESS) {
                        (void) vm_deallocate(map, *addr, size);
                        goto out;
@@ -634,166 +819,3 @@ out:
                return (EINVAL);
        }
 }
                return (EINVAL);
        }
 }
-
-/*
- * Internal bastardized version of MACHs vm_region system call.
- * Given address and size it returns map attributes as well
- * as the (locked) object mapped at that location. 
- */
-vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff)
-       vm_map_t        map;
-       vm_offset_t     *addr;          /* IN/OUT */
-       vm_size_t       *size;          /* OUT */
-       vm_prot_t       *prot;          /* OUT */
-       vm_prot_t       *max_prot;      /* OUT */
-       vm_inherit_t    *inheritance;   /* OUT */
-       boolean_t       *shared;        /* OUT */
-       vm_object_t     *object;        /* OUT */
-       vm_offset_t     *objoff;        /* OUT */
-{
-       vm_map_entry_t  tmp_entry;
-       register
-       vm_map_entry_t  entry;
-       register
-       vm_offset_t     tmp_offset;
-       vm_offset_t     start;
-
-       if (map == NULL)
-               return(KERN_INVALID_ARGUMENT);
-       
-       start = *addr;
-
-       vm_map_lock_read(map);
-       if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
-               if ((entry = tmp_entry->next) == &map->header) {
-                       vm_map_unlock_read(map);
-                       return(KERN_NO_SPACE);
-               }
-               start = entry->start;
-               *addr = start;
-       } else
-               entry = tmp_entry;
-
-       *prot = entry->protection;
-       *max_prot = entry->max_protection;
-       *inheritance = entry->inheritance;
-
-       tmp_offset = entry->offset + (start - entry->start);
-       *size = (entry->end - start);
-
-       if (entry->is_a_map) {
-               register vm_map_t share_map;
-               vm_size_t share_size;
-
-               share_map = entry->object.share_map;
-
-               vm_map_lock_read(share_map);
-               (void) vm_map_lookup_entry(share_map, tmp_offset, &tmp_entry);
-
-               if ((share_size = (tmp_entry->end - tmp_offset)) < *size)
-                       *size = share_size;
-
-               vm_object_lock(tmp_entry->object);
-               *object = tmp_entry->object.vm_object;
-               *objoff = tmp_entry->offset + (tmp_offset - tmp_entry->start);
-
-               *shared = (share_map->ref_count != 1);
-               vm_map_unlock_read(share_map);
-       } else {
-               vm_object_lock(entry->object);
-               *object = entry->object.vm_object;
-               *objoff = tmp_offset;
-
-               *shared = FALSE;
-       }
-
-       vm_map_unlock_read(map);
-
-       return(KERN_SUCCESS);
-}
-
-/*
- * Yet another bastard routine.
- */
-vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal)
-       register vm_map_t       map;
-       register vm_offset_t    *addr;
-       register vm_size_t      size;
-       boolean_t               fitit;
-       vm_pager_t              pager;
-       vm_offset_t             poffset;
-       boolean_t               internal;
-{
-       register vm_object_t    object;
-       register int            result;
-
-       if (map == NULL)
-               return(KERN_INVALID_ARGUMENT);
-
-       *addr = trunc_page(*addr);
-       size = round_page(size);
-
-       /*
-        *      Lookup the pager/paging-space in the object cache.
-        *      If it's not there, then create a new object and cache
-        *      it.
-        */
-       object = vm_object_lookup(pager);
-       vm_stat.lookups++;
-       if (object == NULL) {
-               object = vm_object_allocate(size);
-               vm_object_enter(object, pager);
-       } else
-               vm_stat.hits++;
-       object->internal = internal;
-
-       result = vm_map_find(map, object, poffset, addr, size, fitit);
-       if (result != KERN_SUCCESS)
-               vm_object_deallocate(object);
-       else if (pager != NULL)
-               vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
-       return(result);
-}
-
-/*
- * XXX: this routine belongs in vm_map.c.
- *
- * Returns TRUE if the range [start - end) is allocated in either
- * a single entry (single_entry == TRUE) or multiple contiguous
- * entries (single_entry == FALSE).
- *
- * start and end should be page aligned.
- */
-boolean_t
-vm_map_is_allocated(map, start, end, single_entry)
-       vm_map_t map;
-       vm_offset_t start, end;
-       boolean_t single_entry;
-{
-       vm_map_entry_t mapent;
-       register vm_offset_t nend;
-
-       vm_map_lock_read(map);
-
-       /*
-        * Start address not in any entry
-        */
-       if (!vm_map_lookup_entry(map, start, &mapent)) {
-               vm_map_unlock_read(map);
-               return (FALSE);
-       }
-       /*
-        * Find the maximum stretch of contiguously allocated space
-        */
-       nend = mapent->end;
-       if (!single_entry) {
-               mapent = mapent->next;
-               while (mapent != &map->header && mapent->start == nend) {
-                       nend = mapent->end;
-                       mapent = mapent->next;
-               }
-       }
-
-       vm_map_unlock_read(map);
-       return (end <= nend);
-}