massive overhaul of msync, get rid of hack support routines.
[unix-history] / usr / src / sys / vm / vm_mmap.c
CommitLineData
619edcce
KM
1/*
2 * Copyright (c) 1988 University of Utah.
4a8de8a3
KB
3 * Copyright (c) 1991, 1993
4 * The Regents of the University of California. All rights reserved.
619edcce
KM
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * %sccs.include.redist.c%
11 *
dbc00f04 12 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
619edcce 13 *
d97d2118 14 * @(#)vm_mmap.c 8.4 (Berkeley) %G%
619edcce
KM
15 */
16
17/*
18 * Mapped file (mmap) interface to VM
19 */
20
3266719e
KB
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/filedesc.h>
2a32d5bc 24#include <sys/resourcevar.h>
3266719e
KB
25#include <sys/proc.h>
26#include <sys/vnode.h>
3266719e
KB
27#include <sys/file.h>
28#include <sys/mman.h>
29#include <sys/conf.h>
619edcce 30
ba19ac11
JSP
31#include <miscfs/specfs/specdev.h>
32
3266719e
KB
33#include <vm/vm.h>
34#include <vm/vm_pager.h>
35#include <vm/vm_prot.h>
619edcce
KM
36
37#ifdef DEBUG
38int mmapdebug = 0;
39#define MDB_FOLLOW 0x01
40#define MDB_SYNC 0x02
41#define MDB_MAPIT 0x04
42#endif
43
dd89ed8a
CT
44struct sbrk_args {
45 int incr;
46};
619edcce 47/* ARGSUSED */
3266719e 48int
619edcce
KM
49sbrk(p, uap, retval)
50 struct proc *p;
dd89ed8a 51 struct sbrk_args *uap;
619edcce
KM
52 int *retval;
53{
54
55 /* Not yet implemented */
56 return (EOPNOTSUPP);
57}
58
dd89ed8a
CT
59struct sstk_args {
60 int incr;
61};
619edcce 62/* ARGSUSED */
3266719e 63int
619edcce
KM
64sstk(p, uap, retval)
65 struct proc *p;
dd89ed8a 66 struct sstk_args *uap;
619edcce
KM
67 int *retval;
68{
69
70 /* Not yet implemented */
71 return (EOPNOTSUPP);
72}
73
dba7fe12 74#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
abf092ba
KM
75struct getpagesize_args {
76 int dummy;
77};
78/* ARGSUSED */
79int
7c3e64db 80ogetpagesize(p, uap, retval)
abf092ba
KM
81 struct proc *p;
82 struct getpagesize_args *uap;
83 int *retval;
84{
85
86 *retval = PAGE_SIZE;
87 return (0);
88}
dba7fe12 89#endif /* COMPAT_43 || COMPAT_SUNOS */
abf092ba 90
d74ab96d
KB
91struct mmap_args {
92 caddr_t addr;
93 size_t len;
94 int prot;
95 int flags;
96 int fd;
97 long pad;
98 off_t pos;
99};
100
dba7fe12 101#ifdef COMPAT_43
d74ab96d 102struct ommap_args {
dd89ed8a
CT
103 caddr_t addr;
104 int len;
105 int prot;
106 int flags;
107 int fd;
108 long pos;
109};
3266719e 110int
d74ab96d 111ommap(p, uap, retval)
cc3c05c4 112 struct proc *p;
d74ab96d 113 register struct ommap_args *uap;
619edcce 114 int *retval;
eaf887ea
KM
115{
116 struct mmap_args nargs;
117 static const char cvtbsdprot[8] = {
118 0,
119 PROT_EXEC,
120 PROT_WRITE,
121 PROT_EXEC|PROT_WRITE,
122 PROT_READ,
123 PROT_EXEC|PROT_READ,
124 PROT_WRITE|PROT_READ,
125 PROT_EXEC|PROT_WRITE|PROT_READ,
126 };
127#define OMAP_ANON 0x0002
128#define OMAP_COPY 0x0020
129#define OMAP_SHARED 0x0010
130#define OMAP_FIXED 0x0100
131#define OMAP_INHERIT 0x0800
132
133 nargs.addr = uap->addr;
134 nargs.len = uap->len;
135 nargs.prot = cvtbsdprot[uap->prot&0x7];
136 nargs.flags = 0;
137 if (uap->flags & OMAP_ANON)
138 nargs.flags |= MAP_ANON;
139 if (uap->flags & OMAP_COPY)
140 nargs.flags |= MAP_COPY;
141 if (uap->flags & OMAP_SHARED)
142 nargs.flags |= MAP_SHARED;
143 else
144 nargs.flags |= MAP_PRIVATE;
145 if (uap->flags & OMAP_FIXED)
146 nargs.flags |= MAP_FIXED;
147 if (uap->flags & OMAP_INHERIT)
148 nargs.flags |= MAP_INHERIT;
149 nargs.fd = uap->fd;
eaf887ea 150 nargs.pos = uap->pos;
d74ab96d 151 return (mmap(p, &nargs, retval));
eaf887ea
KM
152}
153#endif
154
155int
d74ab96d 156mmap(p, uap, retval)
eaf887ea
KM
157 struct proc *p;
158 register struct mmap_args *uap;
159 int *retval;
619edcce 160{
cc3c05c4
KM
161 register struct filedesc *fdp = p->p_fd;
162 register struct file *fp;
619edcce
KM
163 struct vnode *vp;
164 vm_offset_t addr;
165 vm_size_t size;
f06c50cb 166 vm_prot_t prot, maxprot;
619edcce 167 caddr_t handle;
be5cfeb2 168 int flags, error;
619edcce 169
263fc7c5 170 prot = uap->prot & VM_PROT_ALL;
be5cfeb2 171 flags = uap->flags;
619edcce
KM
172#ifdef DEBUG
173 if (mmapdebug & MDB_FOLLOW)
174 printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
263fc7c5 175 p->p_pid, uap->addr, uap->len, prot,
c0b1c663 176 flags, uap->fd, (vm_offset_t)uap->pos);
619edcce 177#endif
619edcce 178 /*
04107e61
MH
179 * Address (if FIXED) must be page aligned.
180 * Size is implicitly rounded to a page boundary.
619edcce 181 */
04107e61 182 addr = (vm_offset_t) uap->addr;
263fc7c5
KM
183 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
184 (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
eaf887ea 185 return (EINVAL);
04107e61 186 size = (vm_size_t) round_page(uap->len);
dda79633
KM
187 /*
188 * Check for illegal addresses. Watch out for address wrap...
189 * Note that VM_*_ADDRESS are not constants due to casts (argh).
190 */
be5cfeb2 191 if (flags & MAP_FIXED) {
dda79633
KM
192 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
193 return (EINVAL);
194 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
195 return (EINVAL);
196 if (addr > addr + size)
197 return (EINVAL);
198 }
04107e61
MH
199 /*
200 * XXX if no hint provided for a non-fixed mapping place it after
201 * the end of the largest possible heap.
202 *
203 * There should really be a pmap call to determine a reasonable
204 * location.
205 */
be5cfeb2 206 if (addr == 0 && (flags & MAP_FIXED) == 0)
04107e61 207 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
f06c50cb 208 if (flags & MAP_ANON) {
263fc7c5
KM
209 /*
210 * Mapping blank space is trivial.
211 */
eaf887ea 212 handle = NULL;
f06c50cb
MH
213 maxprot = VM_PROT_ALL;
214 } else {
619edcce 215 /*
eaf887ea 216 * Mapping file, get fp for validation.
263fc7c5 217 * Obtain vnode and make sure it is of appropriate type.
619edcce 218 */
eaf887ea
KM
219 if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
220 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
263fc7c5 221 return (EBADF);
619edcce 222 if (fp->f_type != DTYPE_VNODE)
263fc7c5 223 return (EINVAL);
619edcce
KM
224 vp = (struct vnode *)fp->f_data;
225 if (vp->v_type != VREG && vp->v_type != VCHR)
263fc7c5 226 return (EINVAL);
f06c50cb 227 /*
4d8cfd32
MH
228 * XXX hack to handle use of /dev/zero to map anon
229 * memory (ala SunOS).
f06c50cb 230 */
4d8cfd32
MH
231 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
232 handle = NULL;
233 maxprot = VM_PROT_ALL;
234 flags |= MAP_ANON;
235 } else {
236 /*
237 * Ensure that file and memory protections are
238 * compatible. Note that we only worry about
239 * writability if mapping is shared; in this case,
240 * current and max prot are dictated by the open file.
241 * XXX use the vnode instead? Problem is: what
242 * credentials do we use for determination?
243 * What if proc does a setuid?
244 */
245 maxprot = VM_PROT_EXECUTE; /* ??? */
246 if (fp->f_flag & FREAD)
247 maxprot |= VM_PROT_READ;
248 else if (prot & PROT_READ)
263fc7c5 249 return (EACCES);
4d8cfd32
MH
250 if (flags & MAP_SHARED) {
251 if (fp->f_flag & FWRITE)
252 maxprot |= VM_PROT_WRITE;
253 else if (prot & PROT_WRITE)
254 return (EACCES);
255 } else
256 maxprot |= VM_PROT_WRITE;
257 handle = (caddr_t)vp;
258 }
eaf887ea 259 }
f06c50cb 260 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
263fc7c5 261 flags, handle, (vm_offset_t)uap->pos);
619edcce 262 if (error == 0)
263fc7c5
KM
263 *retval = (int)addr;
264 return (error);
619edcce
KM
265}
266
dd89ed8a
CT
267struct msync_args {
268 caddr_t addr;
269 int len;
270};
3266719e 271int
619edcce
KM
272msync(p, uap, retval)
273 struct proc *p;
dd89ed8a 274 struct msync_args *uap;
619edcce
KM
275 int *retval;
276{
d97d2118
MH
277 vm_offset_t addr;
278 vm_size_t size;
279 vm_map_t map;
619edcce 280 int rv;
d97d2118 281 boolean_t syncio, invalidate;
619edcce
KM
282
283#ifdef DEBUG
284 if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
285 printf("msync(%d): addr %x len %x\n",
286 p->p_pid, uap->addr, uap->len);
287#endif
d97d2118
MH
288 if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
289 return (EINVAL);
290 map = &p->p_vmspace->vm_map;
291 addr = (vm_offset_t)uap->addr;
292 size = (vm_size_t)uap->len;
619edcce 293 /*
d97d2118
MH
294 * XXX Gak! If size is zero we are supposed to sync "all modified
295 * pages with the region containing addr". Unfortunately, we
296 * don't really keep track of individual mmaps so we approximate
297 * by flushing the range of the map entry containing addr.
298 * This can be incorrect if the region splits or is coalesced
299 * with a neighbor.
619edcce 300 */
d97d2118
MH
301 if (size == 0) {
302 vm_map_entry_t entry;
303
304 vm_map_lock_read(map);
305 rv = vm_map_lookup_entry(map, addr, &entry);
306 vm_map_unlock_read(map);
307 if (rv)
308 return (EINVAL);
309 addr = entry->start;
310 size = entry->end - entry->start;
311 }
619edcce
KM
312#ifdef DEBUG
313 if (mmapdebug & MDB_SYNC)
d97d2118
MH
314 printf("msync: cleaning/flushing address range [%x-%x)\n",
315 addr, addr+size);
619edcce
KM
316#endif
317 /*
d97d2118
MH
318 * Could pass this in as a third flag argument to implement
319 * Sun's MS_ASYNC.
619edcce 320 */
d97d2118 321 syncio = TRUE;
619edcce 322 /*
d97d2118
MH
323 * XXX bummer, gotta flush all cached pages to ensure
324 * consistency with the file system cache. Otherwise, we could
325 * pass this in to implement Sun's MS_INVALIDATE.
619edcce 326 */
d97d2118
MH
327 invalidate = TRUE;
328 /*
329 * Clean the pages and interpret the return value.
330 */
331 rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
332 switch (rv) {
333 case KERN_SUCCESS:
334 break;
335 case KERN_INVALID_ADDRESS:
336 return (EINVAL); /* Sun returns ENOMEM? */
337 case KERN_FAILURE:
338 return (EIO);
339 default:
340 return (EINVAL);
341 }
342 return (0);
619edcce
KM
343}
344
dd89ed8a
CT
345struct munmap_args {
346 caddr_t addr;
347 int len;
348};
3266719e 349int
619edcce
KM
350munmap(p, uap, retval)
351 register struct proc *p;
dd89ed8a 352 register struct munmap_args *uap;
619edcce
KM
353 int *retval;
354{
355 vm_offset_t addr;
356 vm_size_t size;
d97d2118 357 vm_map_t map;
619edcce
KM
358
359#ifdef DEBUG
360 if (mmapdebug & MDB_FOLLOW)
361 printf("munmap(%d): addr %x len %x\n",
362 p->p_pid, uap->addr, uap->len);
363#endif
364
365 addr = (vm_offset_t) uap->addr;
08cd4915 366 if ((addr & PAGE_MASK) || uap->len < 0)
619edcce 367 return(EINVAL);
04107e61 368 size = (vm_size_t) round_page(uap->len);
619edcce
KM
369 if (size == 0)
370 return(0);
dda79633
KM
371 /*
372 * Check for illegal addresses. Watch out for address wrap...
373 * Note that VM_*_ADDRESS are not constants due to casts (argh).
374 */
375 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
376 return (EINVAL);
377 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
378 return (EINVAL);
379 if (addr > addr + size)
380 return (EINVAL);
d97d2118
MH
381 map = &p->p_vmspace->vm_map;
382 /*
383 * Make sure entire range is allocated.
384 */
385 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
619edcce
KM
386 return(EINVAL);
387 /* returns nothing but KERN_SUCCESS anyway */
d97d2118 388 (void) vm_map_remove(map, addr, addr+size);
619edcce
KM
389 return(0);
390}
391
3266719e 392void
619edcce 393munmapfd(fd)
67f54264 394 int fd;
619edcce
KM
395{
396#ifdef DEBUG
397 if (mmapdebug & MDB_FOLLOW)
451a445a 398 printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
619edcce
KM
399#endif
400
401 /*
d97d2118 402 * XXX should vm_deallocate any regions mapped to this file
619edcce 403 */
451a445a 404 curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
619edcce
KM
405}
406
dd89ed8a
CT
407struct mprotect_args {
408 caddr_t addr;
409 int len;
410 int prot;
411};
3266719e 412int
619edcce
KM
413mprotect(p, uap, retval)
414 struct proc *p;
dd89ed8a 415 struct mprotect_args *uap;
619edcce
KM
416 int *retval;
417{
418 vm_offset_t addr;
419 vm_size_t size;
420 register vm_prot_t prot;
421
422#ifdef DEBUG
423 if (mmapdebug & MDB_FOLLOW)
424 printf("mprotect(%d): addr %x len %x prot %d\n",
425 p->p_pid, uap->addr, uap->len, uap->prot);
426#endif
427
dd89ed8a 428 addr = (vm_offset_t)uap->addr;
08cd4915 429 if ((addr & PAGE_MASK) || uap->len < 0)
619edcce 430 return(EINVAL);
dd89ed8a 431 size = (vm_size_t)uap->len;
f06c50cb 432 prot = uap->prot & VM_PROT_ALL;
619edcce 433
451a445a
MK
434 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
435 FALSE)) {
619edcce
KM
436 case KERN_SUCCESS:
437 return (0);
438 case KERN_PROTECTION_FAILURE:
439 return (EACCES);
440 }
441 return (EINVAL);
442}
443
dd89ed8a
CT
444struct madvise_args {
445 caddr_t addr;
446 int len;
447 int behav;
448};
619edcce 449/* ARGSUSED */
3266719e 450int
619edcce
KM
451madvise(p, uap, retval)
452 struct proc *p;
dd89ed8a 453 struct madvise_args *uap;
619edcce
KM
454 int *retval;
455{
456
457 /* Not yet implemented */
458 return (EOPNOTSUPP);
459}
460
dd89ed8a
CT
461struct mincore_args {
462 caddr_t addr;
463 int len;
464 char *vec;
465};
619edcce 466/* ARGSUSED */
3266719e 467int
619edcce
KM
468mincore(p, uap, retval)
469 struct proc *p;
dd89ed8a 470 struct mincore_args *uap;
619edcce
KM
471 int *retval;
472{
473
474 /* Not yet implemented */
475 return (EOPNOTSUPP);
476}
477
2a32d5bc
MH
478struct mlock_args {
479 caddr_t addr;
d4f27e41 480 size_t len;
2a32d5bc
MH
481};
482int
483mlock(p, uap, retval)
484 struct proc *p;
485 struct mlock_args *uap;
486 int *retval;
487{
488 vm_offset_t addr;
489 vm_size_t size;
490 int error;
491 extern int vm_page_max_wired;
492
493#ifdef DEBUG
494 if (mmapdebug & MDB_FOLLOW)
495 printf("mlock(%d): addr %x len %x\n",
496 p->p_pid, uap->addr, uap->len);
497#endif
498 addr = (vm_offset_t)uap->addr;
72012dca 499 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
2a32d5bc
MH
500 return (EINVAL);
501 size = round_page((vm_size_t)uap->len);
502 if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
28ed9ed6 503 return (EAGAIN);
2a32d5bc
MH
504#ifdef pmap_wired_count
505 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
506 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
28ed9ed6 507 return (EAGAIN);
2a32d5bc
MH
508#else
509 if (error = suser(p->p_ucred, &p->p_acflag))
510 return (error);
511#endif
512
513 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
514 return (error == KERN_SUCCESS ? 0 : ENOMEM);
515}
516
517struct munlock_args {
518 caddr_t addr;
d4f27e41 519 size_t len;
2a32d5bc
MH
520};
521int
522munlock(p, uap, retval)
523 struct proc *p;
524 struct munlock_args *uap;
525 int *retval;
526{
527 vm_offset_t addr;
528 vm_size_t size;
529 int error;
530
531#ifdef DEBUG
532 if (mmapdebug & MDB_FOLLOW)
533 printf("munlock(%d): addr %x len %x\n",
534 p->p_pid, uap->addr, uap->len);
535#endif
536 addr = (vm_offset_t)uap->addr;
72012dca 537 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
2a32d5bc
MH
538 return (EINVAL);
539#ifndef pmap_wired_count
540 if (error = suser(p->p_ucred, &p->p_acflag))
541 return (error);
542#endif
543 size = round_page((vm_size_t)uap->len);
544
545 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
546 return (error == KERN_SUCCESS ? 0 : ENOMEM);
547}
548
619edcce
KM
549/*
550 * Internal version of mmap.
551 * Currently used by mmap, exec, and sys5 shared memory.
eaf887ea 552 * Handle is either a vnode pointer or NULL for MAP_ANON.
619edcce 553 */
3266719e 554int
f06c50cb 555vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
619edcce
KM
556 register vm_map_t map;
557 register vm_offset_t *addr;
558 register vm_size_t size;
f06c50cb 559 vm_prot_t prot, maxprot;
619edcce
KM
560 register int flags;
561 caddr_t handle; /* XXX should be vp */
562 vm_offset_t foff;
563{
564 register vm_pager_t pager;
565 boolean_t fitit;
566 vm_object_t object;
72012dca 567 struct vnode *vp = NULL;
619edcce
KM
568 int type;
569 int rv = KERN_SUCCESS;
570
571 if (size == 0)
572 return (0);
573
574 if ((flags & MAP_FIXED) == 0) {
575 fitit = TRUE;
576 *addr = round_page(*addr);
577 } else {
578 fitit = FALSE;
dd89ed8a 579 (void)vm_deallocate(map, *addr, size);
619edcce
KM
580 }
581
582 /*
583 * Lookup/allocate pager. All except an unnamed anonymous lookup
584 * gain a reference to ensure continued existance of the object.
585 * (XXX the exception is to appease the pageout daemon)
586 */
eaf887ea 587 if (flags & MAP_ANON)
619edcce
KM
588 type = PG_DFLT;
589 else {
590 vp = (struct vnode *)handle;
591 if (vp->v_type == VCHR) {
592 type = PG_DEVICE;
593 handle = (caddr_t)vp->v_rdev;
594 } else
595 type = PG_VNODE;
596 }
93a6792f 597 pager = vm_pager_allocate(type, handle, size, prot, foff);
451a445a 598 if (pager == NULL)
619edcce
KM
599 return (type == PG_DEVICE ? EINVAL : ENOMEM);
600 /*
601 * Find object and release extra reference gained by lookup
602 */
603 object = vm_object_lookup(pager);
604 vm_object_deallocate(object);
605
606 /*
607 * Anonymous memory.
608 */
eaf887ea 609 if (flags & MAP_ANON) {
619edcce 610 rv = vm_allocate_with_pager(map, addr, size, fitit,
c0b1c663 611 pager, foff, TRUE);
619edcce
KM
612 if (rv != KERN_SUCCESS) {
613 if (handle == NULL)
614 vm_pager_deallocate(pager);
615 else
616 vm_object_deallocate(object);
617 goto out;
618 }
619 /*
620 * Don't cache anonymous objects.
621 * Loses the reference gained by vm_pager_allocate.
f06c50cb
MH
622 * Note that object will be NULL when handle == NULL,
623 * this is ok since vm_allocate_with_pager has made
624 * sure that these objects are uncached.
619edcce
KM
625 */
626 (void) pager_cache(object, FALSE);
627#ifdef DEBUG
628 if (mmapdebug & MDB_MAPIT)
629 printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
451a445a 630 curproc->p_pid, *addr, size, pager);
619edcce
KM
631#endif
632 }
633 /*
eaf887ea 634 * Must be a mapped file.
619edcce
KM
635 * Distinguish between character special and regular files.
636 */
637 else if (vp->v_type == VCHR) {
638 rv = vm_allocate_with_pager(map, addr, size, fitit,
c0b1c663 639 pager, foff, FALSE);
619edcce
KM
640 /*
641 * Uncache the object and lose the reference gained
642 * by vm_pager_allocate(). If the call to
643 * vm_allocate_with_pager() was sucessful, then we
644 * gained an additional reference ensuring the object
645 * will continue to exist. If the call failed then
646 * the deallocate call below will terminate the
647 * object which is fine.
648 */
649 (void) pager_cache(object, FALSE);
650 if (rv != KERN_SUCCESS)
651 goto out;
652 }
653 /*
654 * A regular file
655 */
656 else {
657#ifdef DEBUG
451a445a 658 if (object == NULL)
619edcce
KM
659 printf("vm_mmap: no object: vp %x, pager %x\n",
660 vp, pager);
661#endif
662 /*
663 * Map it directly.
664 * Allows modifications to go out to the vnode.
665 */
666 if (flags & MAP_SHARED) {
667 rv = vm_allocate_with_pager(map, addr, size,
668 fitit, pager,
c0b1c663 669 foff, FALSE);
619edcce
KM
670 if (rv != KERN_SUCCESS) {
671 vm_object_deallocate(object);
672 goto out;
673 }
674 /*
675 * Don't cache the object. This is the easiest way
676 * of ensuring that data gets back to the filesystem
677 * because vnode_pager_deallocate() will fsync the
678 * vnode. pager_cache() will lose the extra ref.
679 */
680 if (prot & VM_PROT_WRITE)
681 pager_cache(object, FALSE);
682 else
683 vm_object_deallocate(object);
684 }
685 /*
686 * Copy-on-write of file. Two flavors.
687 * MAP_COPY is true COW, you essentially get a snapshot of
688 * the region at the time of mapping. MAP_PRIVATE means only
689 * that your changes are not reflected back to the object.
690 * Changes made by others will be seen.
691 */
692 else {
693 vm_map_t tmap;
694 vm_offset_t off;
695
696 /* locate and allocate the target address space */
451a445a 697 rv = vm_map_find(map, NULL, (vm_offset_t)0,
619edcce
KM
698 addr, size, fitit);
699 if (rv != KERN_SUCCESS) {
700 vm_object_deallocate(object);
701 goto out;
702 }
703 tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
704 VM_MIN_ADDRESS+size, TRUE);
705 off = VM_MIN_ADDRESS;
706 rv = vm_allocate_with_pager(tmap, &off, size,
707 TRUE, pager,
c0b1c663 708 foff, FALSE);
619edcce
KM
709 if (rv != KERN_SUCCESS) {
710 vm_object_deallocate(object);
711 vm_map_deallocate(tmap);
712 goto out;
713 }
714 /*
715 * (XXX)
716 * MAP_PRIVATE implies that we see changes made by
717 * others. To ensure that we need to guarentee that
718 * no copy object is created (otherwise original
719 * pages would be pushed to the copy object and we
720 * would never see changes made by others). We
721 * totally sleeze it right now by marking the object
722 * internal temporarily.
723 */
724 if ((flags & MAP_COPY) == 0)
208697bf 725 object->flags |= OBJ_INTERNAL;
619edcce
KM
726 rv = vm_map_copy(map, tmap, *addr, size, off,
727 FALSE, FALSE);
208697bf 728 object->flags &= ~OBJ_INTERNAL;
619edcce
KM
729 /*
730 * (XXX)
731 * My oh my, this only gets worse...
732 * Force creation of a shadow object so that
733 * vm_map_fork will do the right thing.
734 */
735 if ((flags & MAP_COPY) == 0) {
736 vm_map_t tmap;
737 vm_map_entry_t tentry;
738 vm_object_t tobject;
739 vm_offset_t toffset;
740 vm_prot_t tprot;
741 boolean_t twired, tsu;
742
743 tmap = map;
744 vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
745 &tentry, &tobject, &toffset,
746 &tprot, &twired, &tsu);
747 vm_map_lookup_done(tmap, tentry);
748 }
749 /*
750 * (XXX)
751 * Map copy code cannot detect sharing unless a
752 * sharing map is involved. So we cheat and write
64691901 753 * protect everything ourselves.
619edcce 754 */
c0b1c663 755 vm_object_pmap_copy(object, foff, foff + size);
619edcce
KM
756 vm_object_deallocate(object);
757 vm_map_deallocate(tmap);
758 if (rv != KERN_SUCCESS)
759 goto out;
760 }
761#ifdef DEBUG
762 if (mmapdebug & MDB_MAPIT)
763 printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
451a445a 764 curproc->p_pid, *addr, size, pager);
619edcce
KM
765#endif
766 }
767 /*
768 * Correct protection (default is VM_PROT_ALL).
f06c50cb 769 * If maxprot is different than prot, we must set both explicitly.
619edcce 770 */
f06c50cb
MH
771 rv = KERN_SUCCESS;
772 if (maxprot != VM_PROT_ALL)
773 rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
774 if (rv == KERN_SUCCESS && prot != maxprot)
619edcce 775 rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
f06c50cb
MH
776 if (rv != KERN_SUCCESS) {
777 (void) vm_deallocate(map, *addr, size);
778 goto out;
619edcce
KM
779 }
780 /*
781 * Shared memory is also shared with children.
782 */
783 if (flags & MAP_SHARED) {
d97d2118 784 rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
619edcce
KM
785 if (rv != KERN_SUCCESS) {
786 (void) vm_deallocate(map, *addr, size);
787 goto out;
788 }
789 }
790out:
791#ifdef DEBUG
792 if (mmapdebug & MDB_MAPIT)
793 printf("vm_mmap: rv %d\n", rv);
794#endif
795 switch (rv) {
796 case KERN_SUCCESS:
797 return (0);
798 case KERN_INVALID_ADDRESS:
799 case KERN_NO_SPACE:
800 return (ENOMEM);
801 case KERN_PROTECTION_FAILURE:
802 return (EACCES);
803 default:
804 return (EINVAL);
805 }
806}