Commit | Line | Data |
---|---|---|
619edcce KM |
1 | /* |
2 | * Copyright (c) 1988 University of Utah. | |
4a8de8a3 KB |
3 | * Copyright (c) 1991, 1993 |
4 | * The Regents of the University of California. All rights reserved. | |
619edcce KM |
5 | * |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * the Systems Programming Group of the University of Utah Computer | |
8 | * Science Department. | |
9 | * | |
10 | * %sccs.include.redist.c% | |
11 | * | |
dbc00f04 | 12 | * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ |
619edcce | 13 | * |
d97d2118 | 14 | * @(#)vm_mmap.c 8.4 (Berkeley) %G% |
619edcce KM |
15 | */ |
16 | ||
17 | /* | |
18 | * Mapped file (mmap) interface to VM | |
19 | */ | |
20 | ||
3266719e KB |
21 | #include <sys/param.h> |
22 | #include <sys/systm.h> | |
23 | #include <sys/filedesc.h> | |
2a32d5bc | 24 | #include <sys/resourcevar.h> |
3266719e KB |
25 | #include <sys/proc.h> |
26 | #include <sys/vnode.h> | |
3266719e KB |
27 | #include <sys/file.h> |
28 | #include <sys/mman.h> | |
29 | #include <sys/conf.h> | |
619edcce | 30 | |
ba19ac11 JSP |
31 | #include <miscfs/specfs/specdev.h> |
32 | ||
3266719e KB |
33 | #include <vm/vm.h> |
34 | #include <vm/vm_pager.h> | |
35 | #include <vm/vm_prot.h> | |
619edcce KM |
36 | |
37 | #ifdef DEBUG | |
38 | int mmapdebug = 0; | |
39 | #define MDB_FOLLOW 0x01 | |
40 | #define MDB_SYNC 0x02 | |
41 | #define MDB_MAPIT 0x04 | |
42 | #endif | |
43 | ||
dd89ed8a CT |
44 | struct sbrk_args { |
45 | int incr; | |
46 | }; | |
619edcce | 47 | /* ARGSUSED */ |
3266719e | 48 | int |
619edcce KM |
49 | sbrk(p, uap, retval) |
50 | struct proc *p; | |
dd89ed8a | 51 | struct sbrk_args *uap; |
619edcce KM |
52 | int *retval; |
53 | { | |
54 | ||
55 | /* Not yet implemented */ | |
56 | return (EOPNOTSUPP); | |
57 | } | |
58 | ||
dd89ed8a CT |
59 | struct sstk_args { |
60 | int incr; | |
61 | }; | |
619edcce | 62 | /* ARGSUSED */ |
3266719e | 63 | int |
619edcce KM |
64 | sstk(p, uap, retval) |
65 | struct proc *p; | |
dd89ed8a | 66 | struct sstk_args *uap; |
619edcce KM |
67 | int *retval; |
68 | { | |
69 | ||
70 | /* Not yet implemented */ | |
71 | return (EOPNOTSUPP); | |
72 | } | |
73 | ||
dba7fe12 | 74 | #if defined(COMPAT_43) || defined(COMPAT_SUNOS) |
abf092ba KM |
75 | struct getpagesize_args { |
76 | int dummy; | |
77 | }; | |
78 | /* ARGSUSED */ | |
79 | int | |
7c3e64db | 80 | ogetpagesize(p, uap, retval) |
abf092ba KM |
81 | struct proc *p; |
82 | struct getpagesize_args *uap; | |
83 | int *retval; | |
84 | { | |
85 | ||
86 | *retval = PAGE_SIZE; | |
87 | return (0); | |
88 | } | |
dba7fe12 | 89 | #endif /* COMPAT_43 || COMPAT_SUNOS */ |
abf092ba | 90 | |
d74ab96d KB |
91 | struct mmap_args { |
92 | caddr_t addr; | |
93 | size_t len; | |
94 | int prot; | |
95 | int flags; | |
96 | int fd; | |
97 | long pad; | |
98 | off_t pos; | |
99 | }; | |
100 | ||
dba7fe12 | 101 | #ifdef COMPAT_43 |
d74ab96d | 102 | struct ommap_args { |
dd89ed8a CT |
103 | caddr_t addr; |
104 | int len; | |
105 | int prot; | |
106 | int flags; | |
107 | int fd; | |
108 | long pos; | |
109 | }; | |
3266719e | 110 | int |
d74ab96d | 111 | ommap(p, uap, retval) |
cc3c05c4 | 112 | struct proc *p; |
d74ab96d | 113 | register struct ommap_args *uap; |
619edcce | 114 | int *retval; |
eaf887ea KM |
115 | { |
116 | struct mmap_args nargs; | |
117 | static const char cvtbsdprot[8] = { | |
118 | 0, | |
119 | PROT_EXEC, | |
120 | PROT_WRITE, | |
121 | PROT_EXEC|PROT_WRITE, | |
122 | PROT_READ, | |
123 | PROT_EXEC|PROT_READ, | |
124 | PROT_WRITE|PROT_READ, | |
125 | PROT_EXEC|PROT_WRITE|PROT_READ, | |
126 | }; | |
127 | #define OMAP_ANON 0x0002 | |
128 | #define OMAP_COPY 0x0020 | |
129 | #define OMAP_SHARED 0x0010 | |
130 | #define OMAP_FIXED 0x0100 | |
131 | #define OMAP_INHERIT 0x0800 | |
132 | ||
133 | nargs.addr = uap->addr; | |
134 | nargs.len = uap->len; | |
135 | nargs.prot = cvtbsdprot[uap->prot&0x7]; | |
136 | nargs.flags = 0; | |
137 | if (uap->flags & OMAP_ANON) | |
138 | nargs.flags |= MAP_ANON; | |
139 | if (uap->flags & OMAP_COPY) | |
140 | nargs.flags |= MAP_COPY; | |
141 | if (uap->flags & OMAP_SHARED) | |
142 | nargs.flags |= MAP_SHARED; | |
143 | else | |
144 | nargs.flags |= MAP_PRIVATE; | |
145 | if (uap->flags & OMAP_FIXED) | |
146 | nargs.flags |= MAP_FIXED; | |
147 | if (uap->flags & OMAP_INHERIT) | |
148 | nargs.flags |= MAP_INHERIT; | |
149 | nargs.fd = uap->fd; | |
eaf887ea | 150 | nargs.pos = uap->pos; |
d74ab96d | 151 | return (mmap(p, &nargs, retval)); |
eaf887ea KM |
152 | } |
153 | #endif | |
154 | ||
155 | int | |
d74ab96d | 156 | mmap(p, uap, retval) |
eaf887ea KM |
157 | struct proc *p; |
158 | register struct mmap_args *uap; | |
159 | int *retval; | |
619edcce | 160 | { |
cc3c05c4 KM |
161 | register struct filedesc *fdp = p->p_fd; |
162 | register struct file *fp; | |
619edcce KM |
163 | struct vnode *vp; |
164 | vm_offset_t addr; | |
165 | vm_size_t size; | |
f06c50cb | 166 | vm_prot_t prot, maxprot; |
619edcce | 167 | caddr_t handle; |
be5cfeb2 | 168 | int flags, error; |
619edcce | 169 | |
263fc7c5 | 170 | prot = uap->prot & VM_PROT_ALL; |
be5cfeb2 | 171 | flags = uap->flags; |
619edcce KM |
172 | #ifdef DEBUG |
173 | if (mmapdebug & MDB_FOLLOW) | |
174 | printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", | |
263fc7c5 | 175 | p->p_pid, uap->addr, uap->len, prot, |
c0b1c663 | 176 | flags, uap->fd, (vm_offset_t)uap->pos); |
619edcce | 177 | #endif |
619edcce | 178 | /* |
04107e61 MH |
179 | * Address (if FIXED) must be page aligned. |
180 | * Size is implicitly rounded to a page boundary. | |
619edcce | 181 | */ |
04107e61 | 182 | addr = (vm_offset_t) uap->addr; |
263fc7c5 KM |
183 | if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || |
184 | (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) | |
eaf887ea | 185 | return (EINVAL); |
04107e61 | 186 | size = (vm_size_t) round_page(uap->len); |
dda79633 KM |
187 | /* |
188 | * Check for illegal addresses. Watch out for address wrap... | |
189 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
190 | */ | |
be5cfeb2 | 191 | if (flags & MAP_FIXED) { |
dda79633 KM |
192 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) |
193 | return (EINVAL); | |
194 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
195 | return (EINVAL); | |
196 | if (addr > addr + size) | |
197 | return (EINVAL); | |
198 | } | |
04107e61 MH |
199 | /* |
200 | * XXX if no hint provided for a non-fixed mapping place it after | |
201 | * the end of the largest possible heap. | |
202 | * | |
203 | * There should really be a pmap call to determine a reasonable | |
204 | * location. | |
205 | */ | |
be5cfeb2 | 206 | if (addr == 0 && (flags & MAP_FIXED) == 0) |
04107e61 | 207 | addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); |
f06c50cb | 208 | if (flags & MAP_ANON) { |
263fc7c5 KM |
209 | /* |
210 | * Mapping blank space is trivial. | |
211 | */ | |
eaf887ea | 212 | handle = NULL; |
f06c50cb MH |
213 | maxprot = VM_PROT_ALL; |
214 | } else { | |
619edcce | 215 | /* |
eaf887ea | 216 | * Mapping file, get fp for validation. |
263fc7c5 | 217 | * Obtain vnode and make sure it is of appropriate type. |
619edcce | 218 | */ |
eaf887ea KM |
219 | if (((unsigned)uap->fd) >= fdp->fd_nfiles || |
220 | (fp = fdp->fd_ofiles[uap->fd]) == NULL) | |
263fc7c5 | 221 | return (EBADF); |
619edcce | 222 | if (fp->f_type != DTYPE_VNODE) |
263fc7c5 | 223 | return (EINVAL); |
619edcce KM |
224 | vp = (struct vnode *)fp->f_data; |
225 | if (vp->v_type != VREG && vp->v_type != VCHR) | |
263fc7c5 | 226 | return (EINVAL); |
f06c50cb | 227 | /* |
4d8cfd32 MH |
228 | * XXX hack to handle use of /dev/zero to map anon |
229 | * memory (ala SunOS). | |
f06c50cb | 230 | */ |
4d8cfd32 MH |
231 | if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { |
232 | handle = NULL; | |
233 | maxprot = VM_PROT_ALL; | |
234 | flags |= MAP_ANON; | |
235 | } else { | |
236 | /* | |
237 | * Ensure that file and memory protections are | |
238 | * compatible. Note that we only worry about | |
239 | * writability if mapping is shared; in this case, | |
240 | * current and max prot are dictated by the open file. | |
241 | * XXX use the vnode instead? Problem is: what | |
242 | * credentials do we use for determination? | |
243 | * What if proc does a setuid? | |
244 | */ | |
245 | maxprot = VM_PROT_EXECUTE; /* ??? */ | |
246 | if (fp->f_flag & FREAD) | |
247 | maxprot |= VM_PROT_READ; | |
248 | else if (prot & PROT_READ) | |
263fc7c5 | 249 | return (EACCES); |
4d8cfd32 MH |
250 | if (flags & MAP_SHARED) { |
251 | if (fp->f_flag & FWRITE) | |
252 | maxprot |= VM_PROT_WRITE; | |
253 | else if (prot & PROT_WRITE) | |
254 | return (EACCES); | |
255 | } else | |
256 | maxprot |= VM_PROT_WRITE; | |
257 | handle = (caddr_t)vp; | |
258 | } | |
eaf887ea | 259 | } |
f06c50cb | 260 | error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, |
263fc7c5 | 261 | flags, handle, (vm_offset_t)uap->pos); |
619edcce | 262 | if (error == 0) |
263fc7c5 KM |
263 | *retval = (int)addr; |
264 | return (error); | |
619edcce KM |
265 | } |
266 | ||
dd89ed8a CT |
267 | struct msync_args { |
268 | caddr_t addr; | |
269 | int len; | |
270 | }; | |
3266719e | 271 | int |
619edcce KM |
272 | msync(p, uap, retval) |
273 | struct proc *p; | |
dd89ed8a | 274 | struct msync_args *uap; |
619edcce KM |
275 | int *retval; |
276 | { | |
d97d2118 MH |
277 | vm_offset_t addr; |
278 | vm_size_t size; | |
279 | vm_map_t map; | |
619edcce | 280 | int rv; |
d97d2118 | 281 | boolean_t syncio, invalidate; |
619edcce KM |
282 | |
283 | #ifdef DEBUG | |
284 | if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) | |
285 | printf("msync(%d): addr %x len %x\n", | |
286 | p->p_pid, uap->addr, uap->len); | |
287 | #endif | |
d97d2118 MH |
288 | if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
289 | return (EINVAL); | |
290 | map = &p->p_vmspace->vm_map; | |
291 | addr = (vm_offset_t)uap->addr; | |
292 | size = (vm_size_t)uap->len; | |
619edcce | 293 | /* |
d97d2118 MH |
294 | * XXX Gak! If size is zero we are supposed to sync "all modified |
295 | * pages with the region containing addr". Unfortunately, we | |
296 | * don't really keep track of individual mmaps so we approximate | |
297 | * by flushing the range of the map entry containing addr. | |
298 | * This can be incorrect if the region splits or is coalesced | |
299 | * with a neighbor. | |
619edcce | 300 | */ |
d97d2118 MH |
301 | if (size == 0) { |
302 | vm_map_entry_t entry; | |
303 | ||
304 | vm_map_lock_read(map); | |
305 | rv = vm_map_lookup_entry(map, addr, &entry); | |
306 | vm_map_unlock_read(map); | |
307 | if (rv) | |
308 | return (EINVAL); | |
309 | addr = entry->start; | |
310 | size = entry->end - entry->start; | |
311 | } | |
619edcce KM |
312 | #ifdef DEBUG |
313 | if (mmapdebug & MDB_SYNC) | |
d97d2118 MH |
314 | printf("msync: cleaning/flushing address range [%x-%x)\n", |
315 | addr, addr+size); | |
619edcce KM |
316 | #endif |
317 | /* | |
d97d2118 MH |
318 | * Could pass this in as a third flag argument to implement |
319 | * Sun's MS_ASYNC. | |
619edcce | 320 | */ |
d97d2118 | 321 | syncio = TRUE; |
619edcce | 322 | /* |
d97d2118 MH |
323 | * XXX bummer, gotta flush all cached pages to ensure |
324 | * consistency with the file system cache. Otherwise, we could | |
325 | * pass this in to implement Sun's MS_INVALIDATE. | |
619edcce | 326 | */ |
d97d2118 MH |
327 | invalidate = TRUE; |
328 | /* | |
329 | * Clean the pages and interpret the return value. | |
330 | */ | |
331 | rv = vm_map_clean(map, addr, addr+size, syncio, invalidate); | |
332 | switch (rv) { | |
333 | case KERN_SUCCESS: | |
334 | break; | |
335 | case KERN_INVALID_ADDRESS: | |
336 | return (EINVAL); /* Sun returns ENOMEM? */ | |
337 | case KERN_FAILURE: | |
338 | return (EIO); | |
339 | default: | |
340 | return (EINVAL); | |
341 | } | |
342 | return (0); | |
619edcce KM |
343 | } |
344 | ||
dd89ed8a CT |
345 | struct munmap_args { |
346 | caddr_t addr; | |
347 | int len; | |
348 | }; | |
3266719e | 349 | int |
619edcce KM |
350 | munmap(p, uap, retval) |
351 | register struct proc *p; | |
dd89ed8a | 352 | register struct munmap_args *uap; |
619edcce KM |
353 | int *retval; |
354 | { | |
355 | vm_offset_t addr; | |
356 | vm_size_t size; | |
d97d2118 | 357 | vm_map_t map; |
619edcce KM |
358 | |
359 | #ifdef DEBUG | |
360 | if (mmapdebug & MDB_FOLLOW) | |
361 | printf("munmap(%d): addr %x len %x\n", | |
362 | p->p_pid, uap->addr, uap->len); | |
363 | #endif | |
364 | ||
365 | addr = (vm_offset_t) uap->addr; | |
08cd4915 | 366 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 367 | return(EINVAL); |
04107e61 | 368 | size = (vm_size_t) round_page(uap->len); |
619edcce KM |
369 | if (size == 0) |
370 | return(0); | |
dda79633 KM |
371 | /* |
372 | * Check for illegal addresses. Watch out for address wrap... | |
373 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
374 | */ | |
375 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) | |
376 | return (EINVAL); | |
377 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
378 | return (EINVAL); | |
379 | if (addr > addr + size) | |
380 | return (EINVAL); | |
d97d2118 MH |
381 | map = &p->p_vmspace->vm_map; |
382 | /* | |
383 | * Make sure entire range is allocated. | |
384 | */ | |
385 | if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) | |
619edcce KM |
386 | return(EINVAL); |
387 | /* returns nothing but KERN_SUCCESS anyway */ | |
d97d2118 | 388 | (void) vm_map_remove(map, addr, addr+size); |
619edcce KM |
389 | return(0); |
390 | } | |
391 | ||
3266719e | 392 | void |
619edcce | 393 | munmapfd(fd) |
67f54264 | 394 | int fd; |
619edcce KM |
395 | { |
396 | #ifdef DEBUG | |
397 | if (mmapdebug & MDB_FOLLOW) | |
451a445a | 398 | printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd); |
619edcce KM |
399 | #endif |
400 | ||
401 | /* | |
d97d2118 | 402 | * XXX should vm_deallocate any regions mapped to this file |
619edcce | 403 | */ |
451a445a | 404 | curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; |
619edcce KM |
405 | } |
406 | ||
dd89ed8a CT |
407 | struct mprotect_args { |
408 | caddr_t addr; | |
409 | int len; | |
410 | int prot; | |
411 | }; | |
3266719e | 412 | int |
619edcce KM |
413 | mprotect(p, uap, retval) |
414 | struct proc *p; | |
dd89ed8a | 415 | struct mprotect_args *uap; |
619edcce KM |
416 | int *retval; |
417 | { | |
418 | vm_offset_t addr; | |
419 | vm_size_t size; | |
420 | register vm_prot_t prot; | |
421 | ||
422 | #ifdef DEBUG | |
423 | if (mmapdebug & MDB_FOLLOW) | |
424 | printf("mprotect(%d): addr %x len %x prot %d\n", | |
425 | p->p_pid, uap->addr, uap->len, uap->prot); | |
426 | #endif | |
427 | ||
dd89ed8a | 428 | addr = (vm_offset_t)uap->addr; |
08cd4915 | 429 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 430 | return(EINVAL); |
dd89ed8a | 431 | size = (vm_size_t)uap->len; |
f06c50cb | 432 | prot = uap->prot & VM_PROT_ALL; |
619edcce | 433 | |
451a445a MK |
434 | switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, |
435 | FALSE)) { | |
619edcce KM |
436 | case KERN_SUCCESS: |
437 | return (0); | |
438 | case KERN_PROTECTION_FAILURE: | |
439 | return (EACCES); | |
440 | } | |
441 | return (EINVAL); | |
442 | } | |
443 | ||
dd89ed8a CT |
444 | struct madvise_args { |
445 | caddr_t addr; | |
446 | int len; | |
447 | int behav; | |
448 | }; | |
619edcce | 449 | /* ARGSUSED */ |
3266719e | 450 | int |
619edcce KM |
451 | madvise(p, uap, retval) |
452 | struct proc *p; | |
dd89ed8a | 453 | struct madvise_args *uap; |
619edcce KM |
454 | int *retval; |
455 | { | |
456 | ||
457 | /* Not yet implemented */ | |
458 | return (EOPNOTSUPP); | |
459 | } | |
460 | ||
dd89ed8a CT |
461 | struct mincore_args { |
462 | caddr_t addr; | |
463 | int len; | |
464 | char *vec; | |
465 | }; | |
619edcce | 466 | /* ARGSUSED */ |
3266719e | 467 | int |
619edcce KM |
468 | mincore(p, uap, retval) |
469 | struct proc *p; | |
dd89ed8a | 470 | struct mincore_args *uap; |
619edcce KM |
471 | int *retval; |
472 | { | |
473 | ||
474 | /* Not yet implemented */ | |
475 | return (EOPNOTSUPP); | |
476 | } | |
477 | ||
2a32d5bc MH |
478 | struct mlock_args { |
479 | caddr_t addr; | |
d4f27e41 | 480 | size_t len; |
2a32d5bc MH |
481 | }; |
482 | int | |
483 | mlock(p, uap, retval) | |
484 | struct proc *p; | |
485 | struct mlock_args *uap; | |
486 | int *retval; | |
487 | { | |
488 | vm_offset_t addr; | |
489 | vm_size_t size; | |
490 | int error; | |
491 | extern int vm_page_max_wired; | |
492 | ||
493 | #ifdef DEBUG | |
494 | if (mmapdebug & MDB_FOLLOW) | |
495 | printf("mlock(%d): addr %x len %x\n", | |
496 | p->p_pid, uap->addr, uap->len); | |
497 | #endif | |
498 | addr = (vm_offset_t)uap->addr; | |
72012dca | 499 | if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
2a32d5bc MH |
500 | return (EINVAL); |
501 | size = round_page((vm_size_t)uap->len); | |
502 | if (atop(size) + cnt.v_wire_count > vm_page_max_wired) | |
28ed9ed6 | 503 | return (EAGAIN); |
2a32d5bc MH |
504 | #ifdef pmap_wired_count |
505 | if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > | |
506 | p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) | |
28ed9ed6 | 507 | return (EAGAIN); |
2a32d5bc MH |
508 | #else |
509 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
510 | return (error); | |
511 | #endif | |
512 | ||
513 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE); | |
514 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
515 | } | |
516 | ||
517 | struct munlock_args { | |
518 | caddr_t addr; | |
d4f27e41 | 519 | size_t len; |
2a32d5bc MH |
520 | }; |
521 | int | |
522 | munlock(p, uap, retval) | |
523 | struct proc *p; | |
524 | struct munlock_args *uap; | |
525 | int *retval; | |
526 | { | |
527 | vm_offset_t addr; | |
528 | vm_size_t size; | |
529 | int error; | |
530 | ||
531 | #ifdef DEBUG | |
532 | if (mmapdebug & MDB_FOLLOW) | |
533 | printf("munlock(%d): addr %x len %x\n", | |
534 | p->p_pid, uap->addr, uap->len); | |
535 | #endif | |
536 | addr = (vm_offset_t)uap->addr; | |
72012dca | 537 | if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
2a32d5bc MH |
538 | return (EINVAL); |
539 | #ifndef pmap_wired_count | |
540 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
541 | return (error); | |
542 | #endif | |
543 | size = round_page((vm_size_t)uap->len); | |
544 | ||
545 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE); | |
546 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
547 | } | |
548 | ||
619edcce KM |
549 | /* |
550 | * Internal version of mmap. | |
551 | * Currently used by mmap, exec, and sys5 shared memory. | |
eaf887ea | 552 | * Handle is either a vnode pointer or NULL for MAP_ANON. |
619edcce | 553 | */ |
3266719e | 554 | int |
f06c50cb | 555 | vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) |
619edcce KM |
556 | register vm_map_t map; |
557 | register vm_offset_t *addr; | |
558 | register vm_size_t size; | |
f06c50cb | 559 | vm_prot_t prot, maxprot; |
619edcce KM |
560 | register int flags; |
561 | caddr_t handle; /* XXX should be vp */ | |
562 | vm_offset_t foff; | |
563 | { | |
564 | register vm_pager_t pager; | |
565 | boolean_t fitit; | |
566 | vm_object_t object; | |
72012dca | 567 | struct vnode *vp = NULL; |
619edcce KM |
568 | int type; |
569 | int rv = KERN_SUCCESS; | |
570 | ||
571 | if (size == 0) | |
572 | return (0); | |
573 | ||
574 | if ((flags & MAP_FIXED) == 0) { | |
575 | fitit = TRUE; | |
576 | *addr = round_page(*addr); | |
577 | } else { | |
578 | fitit = FALSE; | |
dd89ed8a | 579 | (void)vm_deallocate(map, *addr, size); |
619edcce KM |
580 | } |
581 | ||
582 | /* | |
583 | * Lookup/allocate pager. All except an unnamed anonymous lookup | |
584 | * gain a reference to ensure continued existance of the object. | |
585 | * (XXX the exception is to appease the pageout daemon) | |
586 | */ | |
eaf887ea | 587 | if (flags & MAP_ANON) |
619edcce KM |
588 | type = PG_DFLT; |
589 | else { | |
590 | vp = (struct vnode *)handle; | |
591 | if (vp->v_type == VCHR) { | |
592 | type = PG_DEVICE; | |
593 | handle = (caddr_t)vp->v_rdev; | |
594 | } else | |
595 | type = PG_VNODE; | |
596 | } | |
93a6792f | 597 | pager = vm_pager_allocate(type, handle, size, prot, foff); |
451a445a | 598 | if (pager == NULL) |
619edcce KM |
599 | return (type == PG_DEVICE ? EINVAL : ENOMEM); |
600 | /* | |
601 | * Find object and release extra reference gained by lookup | |
602 | */ | |
603 | object = vm_object_lookup(pager); | |
604 | vm_object_deallocate(object); | |
605 | ||
606 | /* | |
607 | * Anonymous memory. | |
608 | */ | |
eaf887ea | 609 | if (flags & MAP_ANON) { |
619edcce | 610 | rv = vm_allocate_with_pager(map, addr, size, fitit, |
c0b1c663 | 611 | pager, foff, TRUE); |
619edcce KM |
612 | if (rv != KERN_SUCCESS) { |
613 | if (handle == NULL) | |
614 | vm_pager_deallocate(pager); | |
615 | else | |
616 | vm_object_deallocate(object); | |
617 | goto out; | |
618 | } | |
619 | /* | |
620 | * Don't cache anonymous objects. | |
621 | * Loses the reference gained by vm_pager_allocate. | |
f06c50cb MH |
622 | * Note that object will be NULL when handle == NULL, |
623 | * this is ok since vm_allocate_with_pager has made | |
624 | * sure that these objects are uncached. | |
619edcce KM |
625 | */ |
626 | (void) pager_cache(object, FALSE); | |
627 | #ifdef DEBUG | |
628 | if (mmapdebug & MDB_MAPIT) | |
629 | printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", | |
451a445a | 630 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
631 | #endif |
632 | } | |
633 | /* | |
eaf887ea | 634 | * Must be a mapped file. |
619edcce KM |
635 | * Distinguish between character special and regular files. |
636 | */ | |
637 | else if (vp->v_type == VCHR) { | |
638 | rv = vm_allocate_with_pager(map, addr, size, fitit, | |
c0b1c663 | 639 | pager, foff, FALSE); |
619edcce KM |
640 | /* |
641 | * Uncache the object and lose the reference gained | |
642 | * by vm_pager_allocate(). If the call to | |
643 | * vm_allocate_with_pager() was sucessful, then we | |
644 | * gained an additional reference ensuring the object | |
645 | * will continue to exist. If the call failed then | |
646 | * the deallocate call below will terminate the | |
647 | * object which is fine. | |
648 | */ | |
649 | (void) pager_cache(object, FALSE); | |
650 | if (rv != KERN_SUCCESS) | |
651 | goto out; | |
652 | } | |
653 | /* | |
654 | * A regular file | |
655 | */ | |
656 | else { | |
657 | #ifdef DEBUG | |
451a445a | 658 | if (object == NULL) |
619edcce KM |
659 | printf("vm_mmap: no object: vp %x, pager %x\n", |
660 | vp, pager); | |
661 | #endif | |
662 | /* | |
663 | * Map it directly. | |
664 | * Allows modifications to go out to the vnode. | |
665 | */ | |
666 | if (flags & MAP_SHARED) { | |
667 | rv = vm_allocate_with_pager(map, addr, size, | |
668 | fitit, pager, | |
c0b1c663 | 669 | foff, FALSE); |
619edcce KM |
670 | if (rv != KERN_SUCCESS) { |
671 | vm_object_deallocate(object); | |
672 | goto out; | |
673 | } | |
674 | /* | |
675 | * Don't cache the object. This is the easiest way | |
676 | * of ensuring that data gets back to the filesystem | |
677 | * because vnode_pager_deallocate() will fsync the | |
678 | * vnode. pager_cache() will lose the extra ref. | |
679 | */ | |
680 | if (prot & VM_PROT_WRITE) | |
681 | pager_cache(object, FALSE); | |
682 | else | |
683 | vm_object_deallocate(object); | |
684 | } | |
685 | /* | |
686 | * Copy-on-write of file. Two flavors. | |
687 | * MAP_COPY is true COW, you essentially get a snapshot of | |
688 | * the region at the time of mapping. MAP_PRIVATE means only | |
689 | * that your changes are not reflected back to the object. | |
690 | * Changes made by others will be seen. | |
691 | */ | |
692 | else { | |
693 | vm_map_t tmap; | |
694 | vm_offset_t off; | |
695 | ||
696 | /* locate and allocate the target address space */ | |
451a445a | 697 | rv = vm_map_find(map, NULL, (vm_offset_t)0, |
619edcce KM |
698 | addr, size, fitit); |
699 | if (rv != KERN_SUCCESS) { | |
700 | vm_object_deallocate(object); | |
701 | goto out; | |
702 | } | |
703 | tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, | |
704 | VM_MIN_ADDRESS+size, TRUE); | |
705 | off = VM_MIN_ADDRESS; | |
706 | rv = vm_allocate_with_pager(tmap, &off, size, | |
707 | TRUE, pager, | |
c0b1c663 | 708 | foff, FALSE); |
619edcce KM |
709 | if (rv != KERN_SUCCESS) { |
710 | vm_object_deallocate(object); | |
711 | vm_map_deallocate(tmap); | |
712 | goto out; | |
713 | } | |
714 | /* | |
715 | * (XXX) | |
716 | * MAP_PRIVATE implies that we see changes made by | |
717 | * others. To ensure that we need to guarentee that | |
718 | * no copy object is created (otherwise original | |
719 | * pages would be pushed to the copy object and we | |
720 | * would never see changes made by others). We | |
721 | * totally sleeze it right now by marking the object | |
722 | * internal temporarily. | |
723 | */ | |
724 | if ((flags & MAP_COPY) == 0) | |
208697bf | 725 | object->flags |= OBJ_INTERNAL; |
619edcce KM |
726 | rv = vm_map_copy(map, tmap, *addr, size, off, |
727 | FALSE, FALSE); | |
208697bf | 728 | object->flags &= ~OBJ_INTERNAL; |
619edcce KM |
729 | /* |
730 | * (XXX) | |
731 | * My oh my, this only gets worse... | |
732 | * Force creation of a shadow object so that | |
733 | * vm_map_fork will do the right thing. | |
734 | */ | |
735 | if ((flags & MAP_COPY) == 0) { | |
736 | vm_map_t tmap; | |
737 | vm_map_entry_t tentry; | |
738 | vm_object_t tobject; | |
739 | vm_offset_t toffset; | |
740 | vm_prot_t tprot; | |
741 | boolean_t twired, tsu; | |
742 | ||
743 | tmap = map; | |
744 | vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, | |
745 | &tentry, &tobject, &toffset, | |
746 | &tprot, &twired, &tsu); | |
747 | vm_map_lookup_done(tmap, tentry); | |
748 | } | |
749 | /* | |
750 | * (XXX) | |
751 | * Map copy code cannot detect sharing unless a | |
752 | * sharing map is involved. So we cheat and write | |
64691901 | 753 | * protect everything ourselves. |
619edcce | 754 | */ |
c0b1c663 | 755 | vm_object_pmap_copy(object, foff, foff + size); |
619edcce KM |
756 | vm_object_deallocate(object); |
757 | vm_map_deallocate(tmap); | |
758 | if (rv != KERN_SUCCESS) | |
759 | goto out; | |
760 | } | |
761 | #ifdef DEBUG | |
762 | if (mmapdebug & MDB_MAPIT) | |
763 | printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", | |
451a445a | 764 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
765 | #endif |
766 | } | |
767 | /* | |
768 | * Correct protection (default is VM_PROT_ALL). | |
f06c50cb | 769 | * If maxprot is different than prot, we must set both explicitly. |
619edcce | 770 | */ |
f06c50cb MH |
771 | rv = KERN_SUCCESS; |
772 | if (maxprot != VM_PROT_ALL) | |
773 | rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); | |
774 | if (rv == KERN_SUCCESS && prot != maxprot) | |
619edcce | 775 | rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); |
f06c50cb MH |
776 | if (rv != KERN_SUCCESS) { |
777 | (void) vm_deallocate(map, *addr, size); | |
778 | goto out; | |
619edcce KM |
779 | } |
780 | /* | |
781 | * Shared memory is also shared with children. | |
782 | */ | |
783 | if (flags & MAP_SHARED) { | |
d97d2118 | 784 | rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE); |
619edcce KM |
785 | if (rv != KERN_SUCCESS) { |
786 | (void) vm_deallocate(map, *addr, size); | |
787 | goto out; | |
788 | } | |
789 | } | |
790 | out: | |
791 | #ifdef DEBUG | |
792 | if (mmapdebug & MDB_MAPIT) | |
793 | printf("vm_mmap: rv %d\n", rv); | |
794 | #endif | |
795 | switch (rv) { | |
796 | case KERN_SUCCESS: | |
797 | return (0); | |
798 | case KERN_INVALID_ADDRESS: | |
799 | case KERN_NO_SPACE: | |
800 | return (ENOMEM); | |
801 | case KERN_PROTECTION_FAILURE: | |
802 | return (EACCES); | |
803 | default: | |
804 | return (EINVAL); | |
805 | } | |
806 | } |