after carefully constructing the correct value for "pos",
[unix-history] / usr / src / sys / vm / vm_mmap.c
CommitLineData
619edcce
KM
1/*
2 * Copyright (c) 1988 University of Utah.
4a8de8a3
KB
3 * Copyright (c) 1991, 1993
4 * The Regents of the University of California. All rights reserved.
619edcce
KM
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * %sccs.include.redist.c%
11 *
dbc00f04 12 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
619edcce 13 *
9f6c8c1f 14 * @(#)vm_mmap.c 8.6 (Berkeley) %G%
619edcce
KM
15 */
16
17/*
18 * Mapped file (mmap) interface to VM
19 */
20
3266719e
KB
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/filedesc.h>
2a32d5bc 24#include <sys/resourcevar.h>
3266719e
KB
25#include <sys/proc.h>
26#include <sys/vnode.h>
3266719e
KB
27#include <sys/file.h>
28#include <sys/mman.h>
29#include <sys/conf.h>
619edcce 30
ba19ac11
JSP
31#include <miscfs/specfs/specdev.h>
32
3266719e
KB
33#include <vm/vm.h>
34#include <vm/vm_pager.h>
35#include <vm/vm_prot.h>
619edcce
KM
36
37#ifdef DEBUG
38int mmapdebug = 0;
39#define MDB_FOLLOW 0x01
40#define MDB_SYNC 0x02
41#define MDB_MAPIT 0x04
42#endif
43
dd89ed8a
CT
44struct sbrk_args {
45 int incr;
46};
619edcce 47/* ARGSUSED */
3266719e 48int
619edcce
KM
49sbrk(p, uap, retval)
50 struct proc *p;
dd89ed8a 51 struct sbrk_args *uap;
619edcce
KM
52 int *retval;
53{
54
55 /* Not yet implemented */
56 return (EOPNOTSUPP);
57}
58
dd89ed8a
CT
59struct sstk_args {
60 int incr;
61};
619edcce 62/* ARGSUSED */
3266719e 63int
619edcce
KM
64sstk(p, uap, retval)
65 struct proc *p;
dd89ed8a 66 struct sstk_args *uap;
619edcce
KM
67 int *retval;
68{
69
70 /* Not yet implemented */
71 return (EOPNOTSUPP);
72}
73
dba7fe12 74#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
abf092ba
KM
75struct getpagesize_args {
76 int dummy;
77};
78/* ARGSUSED */
79int
7c3e64db 80ogetpagesize(p, uap, retval)
abf092ba
KM
81 struct proc *p;
82 struct getpagesize_args *uap;
83 int *retval;
84{
85
86 *retval = PAGE_SIZE;
87 return (0);
88}
dba7fe12 89#endif /* COMPAT_43 || COMPAT_SUNOS */
abf092ba 90
d74ab96d
KB
91struct mmap_args {
92 caddr_t addr;
93 size_t len;
94 int prot;
95 int flags;
96 int fd;
97 long pad;
98 off_t pos;
99};
100
dba7fe12 101#ifdef COMPAT_43
d74ab96d 102struct ommap_args {
dd89ed8a
CT
103 caddr_t addr;
104 int len;
105 int prot;
106 int flags;
107 int fd;
108 long pos;
109};
3266719e 110int
d74ab96d 111ommap(p, uap, retval)
cc3c05c4 112 struct proc *p;
d74ab96d 113 register struct ommap_args *uap;
619edcce 114 int *retval;
eaf887ea
KM
115{
116 struct mmap_args nargs;
117 static const char cvtbsdprot[8] = {
118 0,
119 PROT_EXEC,
120 PROT_WRITE,
121 PROT_EXEC|PROT_WRITE,
122 PROT_READ,
123 PROT_EXEC|PROT_READ,
124 PROT_WRITE|PROT_READ,
125 PROT_EXEC|PROT_WRITE|PROT_READ,
126 };
127#define OMAP_ANON 0x0002
128#define OMAP_COPY 0x0020
129#define OMAP_SHARED 0x0010
130#define OMAP_FIXED 0x0100
131#define OMAP_INHERIT 0x0800
132
133 nargs.addr = uap->addr;
134 nargs.len = uap->len;
135 nargs.prot = cvtbsdprot[uap->prot&0x7];
136 nargs.flags = 0;
137 if (uap->flags & OMAP_ANON)
138 nargs.flags |= MAP_ANON;
139 if (uap->flags & OMAP_COPY)
140 nargs.flags |= MAP_COPY;
141 if (uap->flags & OMAP_SHARED)
142 nargs.flags |= MAP_SHARED;
143 else
144 nargs.flags |= MAP_PRIVATE;
145 if (uap->flags & OMAP_FIXED)
146 nargs.flags |= MAP_FIXED;
147 if (uap->flags & OMAP_INHERIT)
148 nargs.flags |= MAP_INHERIT;
149 nargs.fd = uap->fd;
eaf887ea 150 nargs.pos = uap->pos;
d74ab96d 151 return (mmap(p, &nargs, retval));
eaf887ea
KM
152}
153#endif
154
155int
d74ab96d 156mmap(p, uap, retval)
eaf887ea
KM
157 struct proc *p;
158 register struct mmap_args *uap;
159 int *retval;
619edcce 160{
cc3c05c4
KM
161 register struct filedesc *fdp = p->p_fd;
162 register struct file *fp;
619edcce 163 struct vnode *vp;
0589b1c5 164 vm_offset_t addr, pos;
619edcce 165 vm_size_t size;
f06c50cb 166 vm_prot_t prot, maxprot;
619edcce 167 caddr_t handle;
be5cfeb2 168 int flags, error;
619edcce 169
263fc7c5 170 prot = uap->prot & VM_PROT_ALL;
be5cfeb2 171 flags = uap->flags;
0589b1c5 172 pos = uap->pos;
619edcce
KM
173#ifdef DEBUG
174 if (mmapdebug & MDB_FOLLOW)
175 printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
263fc7c5 176 p->p_pid, uap->addr, uap->len, prot,
0589b1c5 177 flags, uap->fd, pos);
619edcce 178#endif
619edcce 179 /*
04107e61
MH
180 * Address (if FIXED) must be page aligned.
181 * Size is implicitly rounded to a page boundary.
619edcce 182 */
04107e61 183 addr = (vm_offset_t) uap->addr;
263fc7c5 184 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
9f6c8c1f 185 ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
263fc7c5 186 (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
eaf887ea 187 return (EINVAL);
04107e61 188 size = (vm_size_t) round_page(uap->len);
dda79633
KM
189 /*
190 * Check for illegal addresses. Watch out for address wrap...
191 * Note that VM_*_ADDRESS are not constants due to casts (argh).
192 */
be5cfeb2 193 if (flags & MAP_FIXED) {
dda79633
KM
194 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
195 return (EINVAL);
196 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
197 return (EINVAL);
198 if (addr > addr + size)
199 return (EINVAL);
200 }
04107e61 201 /*
0589b1c5
MH
202 * XXX for non-fixed mappings where no hint is provided or
203 * the hint would fall in the potential heap space,
204 * place it after the end of the largest possible heap.
04107e61
MH
205 *
206 * There should really be a pmap call to determine a reasonable
207 * location.
208 */
0589b1c5 209 else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
04107e61 210 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
f06c50cb 211 if (flags & MAP_ANON) {
263fc7c5
KM
212 /*
213 * Mapping blank space is trivial.
214 */
eaf887ea 215 handle = NULL;
f06c50cb 216 maxprot = VM_PROT_ALL;
0589b1c5 217 pos = 0;
f06c50cb 218 } else {
619edcce 219 /*
eaf887ea 220 * Mapping file, get fp for validation.
263fc7c5 221 * Obtain vnode and make sure it is of appropriate type.
619edcce 222 */
eaf887ea
KM
223 if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
224 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
263fc7c5 225 return (EBADF);
619edcce 226 if (fp->f_type != DTYPE_VNODE)
263fc7c5 227 return (EINVAL);
619edcce
KM
228 vp = (struct vnode *)fp->f_data;
229 if (vp->v_type != VREG && vp->v_type != VCHR)
263fc7c5 230 return (EINVAL);
f06c50cb 231 /*
4d8cfd32
MH
232 * XXX hack to handle use of /dev/zero to map anon
233 * memory (ala SunOS).
f06c50cb 234 */
4d8cfd32
MH
235 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
236 handle = NULL;
237 maxprot = VM_PROT_ALL;
238 flags |= MAP_ANON;
239 } else {
240 /*
241 * Ensure that file and memory protections are
242 * compatible. Note that we only worry about
243 * writability if mapping is shared; in this case,
244 * current and max prot are dictated by the open file.
245 * XXX use the vnode instead? Problem is: what
246 * credentials do we use for determination?
247 * What if proc does a setuid?
248 */
249 maxprot = VM_PROT_EXECUTE; /* ??? */
250 if (fp->f_flag & FREAD)
251 maxprot |= VM_PROT_READ;
252 else if (prot & PROT_READ)
263fc7c5 253 return (EACCES);
4d8cfd32
MH
254 if (flags & MAP_SHARED) {
255 if (fp->f_flag & FWRITE)
256 maxprot |= VM_PROT_WRITE;
257 else if (prot & PROT_WRITE)
258 return (EACCES);
259 } else
260 maxprot |= VM_PROT_WRITE;
261 handle = (caddr_t)vp;
262 }
eaf887ea 263 }
f06c50cb 264 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
9f6c8c1f 265 flags, handle, pos);
619edcce 266 if (error == 0)
263fc7c5
KM
267 *retval = (int)addr;
268 return (error);
619edcce
KM
269}
270
dd89ed8a
CT
271struct msync_args {
272 caddr_t addr;
273 int len;
274};
3266719e 275int
619edcce
KM
276msync(p, uap, retval)
277 struct proc *p;
dd89ed8a 278 struct msync_args *uap;
619edcce
KM
279 int *retval;
280{
d97d2118
MH
281 vm_offset_t addr;
282 vm_size_t size;
283 vm_map_t map;
619edcce 284 int rv;
d97d2118 285 boolean_t syncio, invalidate;
619edcce
KM
286
287#ifdef DEBUG
288 if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
289 printf("msync(%d): addr %x len %x\n",
290 p->p_pid, uap->addr, uap->len);
291#endif
d97d2118
MH
292 if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
293 return (EINVAL);
294 map = &p->p_vmspace->vm_map;
295 addr = (vm_offset_t)uap->addr;
296 size = (vm_size_t)uap->len;
619edcce 297 /*
d97d2118
MH
298 * XXX Gak! If size is zero we are supposed to sync "all modified
299 * pages with the region containing addr". Unfortunately, we
300 * don't really keep track of individual mmaps so we approximate
301 * by flushing the range of the map entry containing addr.
302 * This can be incorrect if the region splits or is coalesced
303 * with a neighbor.
619edcce 304 */
d97d2118
MH
305 if (size == 0) {
306 vm_map_entry_t entry;
307
308 vm_map_lock_read(map);
309 rv = vm_map_lookup_entry(map, addr, &entry);
310 vm_map_unlock_read(map);
311 if (rv)
312 return (EINVAL);
313 addr = entry->start;
314 size = entry->end - entry->start;
315 }
619edcce
KM
316#ifdef DEBUG
317 if (mmapdebug & MDB_SYNC)
d97d2118
MH
318 printf("msync: cleaning/flushing address range [%x-%x)\n",
319 addr, addr+size);
619edcce
KM
320#endif
321 /*
d97d2118
MH
322 * Could pass this in as a third flag argument to implement
323 * Sun's MS_ASYNC.
619edcce 324 */
d97d2118 325 syncio = TRUE;
619edcce 326 /*
d97d2118
MH
327 * XXX bummer, gotta flush all cached pages to ensure
328 * consistency with the file system cache. Otherwise, we could
329 * pass this in to implement Sun's MS_INVALIDATE.
619edcce 330 */
d97d2118
MH
331 invalidate = TRUE;
332 /*
333 * Clean the pages and interpret the return value.
334 */
335 rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
336 switch (rv) {
337 case KERN_SUCCESS:
338 break;
339 case KERN_INVALID_ADDRESS:
340 return (EINVAL); /* Sun returns ENOMEM? */
341 case KERN_FAILURE:
342 return (EIO);
343 default:
344 return (EINVAL);
345 }
346 return (0);
619edcce
KM
347}
348
dd89ed8a
CT
349struct munmap_args {
350 caddr_t addr;
351 int len;
352};
3266719e 353int
619edcce
KM
354munmap(p, uap, retval)
355 register struct proc *p;
dd89ed8a 356 register struct munmap_args *uap;
619edcce
KM
357 int *retval;
358{
359 vm_offset_t addr;
360 vm_size_t size;
d97d2118 361 vm_map_t map;
619edcce
KM
362
363#ifdef DEBUG
364 if (mmapdebug & MDB_FOLLOW)
365 printf("munmap(%d): addr %x len %x\n",
366 p->p_pid, uap->addr, uap->len);
367#endif
368
369 addr = (vm_offset_t) uap->addr;
08cd4915 370 if ((addr & PAGE_MASK) || uap->len < 0)
619edcce 371 return(EINVAL);
04107e61 372 size = (vm_size_t) round_page(uap->len);
619edcce
KM
373 if (size == 0)
374 return(0);
dda79633
KM
375 /*
376 * Check for illegal addresses. Watch out for address wrap...
377 * Note that VM_*_ADDRESS are not constants due to casts (argh).
378 */
379 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
380 return (EINVAL);
381 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
382 return (EINVAL);
383 if (addr > addr + size)
384 return (EINVAL);
d97d2118
MH
385 map = &p->p_vmspace->vm_map;
386 /*
387 * Make sure entire range is allocated.
388 */
389 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
619edcce
KM
390 return(EINVAL);
391 /* returns nothing but KERN_SUCCESS anyway */
d97d2118 392 (void) vm_map_remove(map, addr, addr+size);
619edcce
KM
393 return(0);
394}
395
3266719e 396void
619edcce 397munmapfd(fd)
67f54264 398 int fd;
619edcce
KM
399{
400#ifdef DEBUG
401 if (mmapdebug & MDB_FOLLOW)
451a445a 402 printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
619edcce
KM
403#endif
404
405 /*
d97d2118 406 * XXX should vm_deallocate any regions mapped to this file
619edcce 407 */
451a445a 408 curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
619edcce
KM
409}
410
dd89ed8a
CT
411struct mprotect_args {
412 caddr_t addr;
413 int len;
414 int prot;
415};
3266719e 416int
619edcce
KM
417mprotect(p, uap, retval)
418 struct proc *p;
dd89ed8a 419 struct mprotect_args *uap;
619edcce
KM
420 int *retval;
421{
422 vm_offset_t addr;
423 vm_size_t size;
424 register vm_prot_t prot;
425
426#ifdef DEBUG
427 if (mmapdebug & MDB_FOLLOW)
428 printf("mprotect(%d): addr %x len %x prot %d\n",
429 p->p_pid, uap->addr, uap->len, uap->prot);
430#endif
431
dd89ed8a 432 addr = (vm_offset_t)uap->addr;
08cd4915 433 if ((addr & PAGE_MASK) || uap->len < 0)
619edcce 434 return(EINVAL);
dd89ed8a 435 size = (vm_size_t)uap->len;
f06c50cb 436 prot = uap->prot & VM_PROT_ALL;
619edcce 437
451a445a
MK
438 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
439 FALSE)) {
619edcce
KM
440 case KERN_SUCCESS:
441 return (0);
442 case KERN_PROTECTION_FAILURE:
443 return (EACCES);
444 }
445 return (EINVAL);
446}
447
dd89ed8a
CT
448struct madvise_args {
449 caddr_t addr;
450 int len;
451 int behav;
452};
619edcce 453/* ARGSUSED */
3266719e 454int
619edcce
KM
455madvise(p, uap, retval)
456 struct proc *p;
dd89ed8a 457 struct madvise_args *uap;
619edcce
KM
458 int *retval;
459{
460
461 /* Not yet implemented */
462 return (EOPNOTSUPP);
463}
464
dd89ed8a
CT
465struct mincore_args {
466 caddr_t addr;
467 int len;
468 char *vec;
469};
619edcce 470/* ARGSUSED */
3266719e 471int
619edcce
KM
472mincore(p, uap, retval)
473 struct proc *p;
dd89ed8a 474 struct mincore_args *uap;
619edcce
KM
475 int *retval;
476{
477
478 /* Not yet implemented */
479 return (EOPNOTSUPP);
480}
481
2a32d5bc
MH
482struct mlock_args {
483 caddr_t addr;
d4f27e41 484 size_t len;
2a32d5bc
MH
485};
486int
487mlock(p, uap, retval)
488 struct proc *p;
489 struct mlock_args *uap;
490 int *retval;
491{
492 vm_offset_t addr;
493 vm_size_t size;
494 int error;
495 extern int vm_page_max_wired;
496
497#ifdef DEBUG
498 if (mmapdebug & MDB_FOLLOW)
499 printf("mlock(%d): addr %x len %x\n",
500 p->p_pid, uap->addr, uap->len);
501#endif
502 addr = (vm_offset_t)uap->addr;
72012dca 503 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
2a32d5bc
MH
504 return (EINVAL);
505 size = round_page((vm_size_t)uap->len);
506 if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
28ed9ed6 507 return (EAGAIN);
2a32d5bc
MH
508#ifdef pmap_wired_count
509 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
510 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
28ed9ed6 511 return (EAGAIN);
2a32d5bc
MH
512#else
513 if (error = suser(p->p_ucred, &p->p_acflag))
514 return (error);
515#endif
516
517 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
518 return (error == KERN_SUCCESS ? 0 : ENOMEM);
519}
520
521struct munlock_args {
522 caddr_t addr;
d4f27e41 523 size_t len;
2a32d5bc
MH
524};
525int
526munlock(p, uap, retval)
527 struct proc *p;
528 struct munlock_args *uap;
529 int *retval;
530{
531 vm_offset_t addr;
532 vm_size_t size;
533 int error;
534
535#ifdef DEBUG
536 if (mmapdebug & MDB_FOLLOW)
537 printf("munlock(%d): addr %x len %x\n",
538 p->p_pid, uap->addr, uap->len);
539#endif
540 addr = (vm_offset_t)uap->addr;
72012dca 541 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
2a32d5bc
MH
542 return (EINVAL);
543#ifndef pmap_wired_count
544 if (error = suser(p->p_ucred, &p->p_acflag))
545 return (error);
546#endif
547 size = round_page((vm_size_t)uap->len);
548
549 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
550 return (error == KERN_SUCCESS ? 0 : ENOMEM);
551}
552
619edcce
KM
553/*
554 * Internal version of mmap.
555 * Currently used by mmap, exec, and sys5 shared memory.
eaf887ea 556 * Handle is either a vnode pointer or NULL for MAP_ANON.
619edcce 557 */
3266719e 558int
f06c50cb 559vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
619edcce
KM
560 register vm_map_t map;
561 register vm_offset_t *addr;
562 register vm_size_t size;
f06c50cb 563 vm_prot_t prot, maxprot;
619edcce
KM
564 register int flags;
565 caddr_t handle; /* XXX should be vp */
566 vm_offset_t foff;
567{
568 register vm_pager_t pager;
569 boolean_t fitit;
570 vm_object_t object;
72012dca 571 struct vnode *vp = NULL;
619edcce
KM
572 int type;
573 int rv = KERN_SUCCESS;
574
575 if (size == 0)
576 return (0);
577
578 if ((flags & MAP_FIXED) == 0) {
579 fitit = TRUE;
580 *addr = round_page(*addr);
581 } else {
582 fitit = FALSE;
dd89ed8a 583 (void)vm_deallocate(map, *addr, size);
619edcce
KM
584 }
585
586 /*
587 * Lookup/allocate pager. All except an unnamed anonymous lookup
588 * gain a reference to ensure continued existance of the object.
589 * (XXX the exception is to appease the pageout daemon)
590 */
eaf887ea 591 if (flags & MAP_ANON)
619edcce
KM
592 type = PG_DFLT;
593 else {
594 vp = (struct vnode *)handle;
595 if (vp->v_type == VCHR) {
596 type = PG_DEVICE;
597 handle = (caddr_t)vp->v_rdev;
598 } else
599 type = PG_VNODE;
600 }
93a6792f 601 pager = vm_pager_allocate(type, handle, size, prot, foff);
451a445a 602 if (pager == NULL)
619edcce
KM
603 return (type == PG_DEVICE ? EINVAL : ENOMEM);
604 /*
605 * Find object and release extra reference gained by lookup
606 */
607 object = vm_object_lookup(pager);
608 vm_object_deallocate(object);
609
610 /*
611 * Anonymous memory.
612 */
eaf887ea 613 if (flags & MAP_ANON) {
619edcce 614 rv = vm_allocate_with_pager(map, addr, size, fitit,
c0b1c663 615 pager, foff, TRUE);
619edcce
KM
616 if (rv != KERN_SUCCESS) {
617 if (handle == NULL)
618 vm_pager_deallocate(pager);
619 else
620 vm_object_deallocate(object);
621 goto out;
622 }
623 /*
624 * Don't cache anonymous objects.
625 * Loses the reference gained by vm_pager_allocate.
f06c50cb
MH
626 * Note that object will be NULL when handle == NULL,
627 * this is ok since vm_allocate_with_pager has made
628 * sure that these objects are uncached.
619edcce
KM
629 */
630 (void) pager_cache(object, FALSE);
631#ifdef DEBUG
632 if (mmapdebug & MDB_MAPIT)
633 printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
451a445a 634 curproc->p_pid, *addr, size, pager);
619edcce
KM
635#endif
636 }
637 /*
eaf887ea 638 * Must be a mapped file.
619edcce
KM
639 * Distinguish between character special and regular files.
640 */
641 else if (vp->v_type == VCHR) {
642 rv = vm_allocate_with_pager(map, addr, size, fitit,
c0b1c663 643 pager, foff, FALSE);
619edcce
KM
644 /*
645 * Uncache the object and lose the reference gained
646 * by vm_pager_allocate(). If the call to
647 * vm_allocate_with_pager() was sucessful, then we
648 * gained an additional reference ensuring the object
649 * will continue to exist. If the call failed then
650 * the deallocate call below will terminate the
651 * object which is fine.
652 */
653 (void) pager_cache(object, FALSE);
654 if (rv != KERN_SUCCESS)
655 goto out;
656 }
657 /*
658 * A regular file
659 */
660 else {
661#ifdef DEBUG
451a445a 662 if (object == NULL)
619edcce
KM
663 printf("vm_mmap: no object: vp %x, pager %x\n",
664 vp, pager);
665#endif
666 /*
667 * Map it directly.
668 * Allows modifications to go out to the vnode.
669 */
670 if (flags & MAP_SHARED) {
671 rv = vm_allocate_with_pager(map, addr, size,
672 fitit, pager,
c0b1c663 673 foff, FALSE);
619edcce
KM
674 if (rv != KERN_SUCCESS) {
675 vm_object_deallocate(object);
676 goto out;
677 }
678 /*
679 * Don't cache the object. This is the easiest way
680 * of ensuring that data gets back to the filesystem
681 * because vnode_pager_deallocate() will fsync the
682 * vnode. pager_cache() will lose the extra ref.
683 */
684 if (prot & VM_PROT_WRITE)
685 pager_cache(object, FALSE);
686 else
687 vm_object_deallocate(object);
688 }
689 /*
690 * Copy-on-write of file. Two flavors.
691 * MAP_COPY is true COW, you essentially get a snapshot of
692 * the region at the time of mapping. MAP_PRIVATE means only
693 * that your changes are not reflected back to the object.
694 * Changes made by others will be seen.
695 */
696 else {
697 vm_map_t tmap;
698 vm_offset_t off;
699
700 /* locate and allocate the target address space */
451a445a 701 rv = vm_map_find(map, NULL, (vm_offset_t)0,
619edcce
KM
702 addr, size, fitit);
703 if (rv != KERN_SUCCESS) {
704 vm_object_deallocate(object);
705 goto out;
706 }
707 tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
708 VM_MIN_ADDRESS+size, TRUE);
709 off = VM_MIN_ADDRESS;
710 rv = vm_allocate_with_pager(tmap, &off, size,
711 TRUE, pager,
c0b1c663 712 foff, FALSE);
619edcce
KM
713 if (rv != KERN_SUCCESS) {
714 vm_object_deallocate(object);
715 vm_map_deallocate(tmap);
716 goto out;
717 }
718 /*
719 * (XXX)
720 * MAP_PRIVATE implies that we see changes made by
721 * others. To ensure that we need to guarentee that
722 * no copy object is created (otherwise original
723 * pages would be pushed to the copy object and we
724 * would never see changes made by others). We
725 * totally sleeze it right now by marking the object
726 * internal temporarily.
727 */
728 if ((flags & MAP_COPY) == 0)
208697bf 729 object->flags |= OBJ_INTERNAL;
619edcce
KM
730 rv = vm_map_copy(map, tmap, *addr, size, off,
731 FALSE, FALSE);
208697bf 732 object->flags &= ~OBJ_INTERNAL;
619edcce
KM
733 /*
734 * (XXX)
735 * My oh my, this only gets worse...
736 * Force creation of a shadow object so that
737 * vm_map_fork will do the right thing.
738 */
739 if ((flags & MAP_COPY) == 0) {
740 vm_map_t tmap;
741 vm_map_entry_t tentry;
742 vm_object_t tobject;
743 vm_offset_t toffset;
744 vm_prot_t tprot;
745 boolean_t twired, tsu;
746
747 tmap = map;
748 vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
749 &tentry, &tobject, &toffset,
750 &tprot, &twired, &tsu);
751 vm_map_lookup_done(tmap, tentry);
752 }
753 /*
754 * (XXX)
755 * Map copy code cannot detect sharing unless a
756 * sharing map is involved. So we cheat and write
64691901 757 * protect everything ourselves.
619edcce 758 */
c0b1c663 759 vm_object_pmap_copy(object, foff, foff + size);
619edcce
KM
760 vm_object_deallocate(object);
761 vm_map_deallocate(tmap);
762 if (rv != KERN_SUCCESS)
763 goto out;
764 }
765#ifdef DEBUG
766 if (mmapdebug & MDB_MAPIT)
767 printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
451a445a 768 curproc->p_pid, *addr, size, pager);
619edcce
KM
769#endif
770 }
771 /*
772 * Correct protection (default is VM_PROT_ALL).
f06c50cb 773 * If maxprot is different than prot, we must set both explicitly.
619edcce 774 */
f06c50cb
MH
775 rv = KERN_SUCCESS;
776 if (maxprot != VM_PROT_ALL)
777 rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
778 if (rv == KERN_SUCCESS && prot != maxprot)
619edcce 779 rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
f06c50cb
MH
780 if (rv != KERN_SUCCESS) {
781 (void) vm_deallocate(map, *addr, size);
782 goto out;
619edcce
KM
783 }
784 /*
785 * Shared memory is also shared with children.
786 */
787 if (flags & MAP_SHARED) {
d97d2118 788 rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
619edcce
KM
789 if (rv != KERN_SUCCESS) {
790 (void) vm_deallocate(map, *addr, size);
791 goto out;
792 }
793 }
794out:
795#ifdef DEBUG
796 if (mmapdebug & MDB_MAPIT)
797 printf("vm_mmap: rv %d\n", rv);
798#endif
799 switch (rv) {
800 case KERN_SUCCESS:
801 return (0);
802 case KERN_INVALID_ADDRESS:
803 case KERN_NO_SPACE:
804 return (ENOMEM);
805 case KERN_PROTECTION_FAILURE:
806 return (EACCES);
807 default:
808 return (EINVAL);
809 }
810}