Commit | Line | Data |
---|---|---|
619edcce KM |
1 | /* |
2 | * Copyright (c) 1988 University of Utah. | |
4a8de8a3 KB |
3 | * Copyright (c) 1991, 1993 |
4 | * The Regents of the University of California. All rights reserved. | |
619edcce KM |
5 | * |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * the Systems Programming Group of the University of Utah Computer | |
8 | * Science Department. | |
9 | * | |
10 | * %sccs.include.redist.c% | |
11 | * | |
dbc00f04 | 12 | * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ |
619edcce | 13 | * |
9f6c8c1f | 14 | * @(#)vm_mmap.c 8.6 (Berkeley) %G% |
619edcce KM |
15 | */ |
16 | ||
17 | /* | |
18 | * Mapped file (mmap) interface to VM | |
19 | */ | |
20 | ||
3266719e KB |
21 | #include <sys/param.h> |
22 | #include <sys/systm.h> | |
23 | #include <sys/filedesc.h> | |
2a32d5bc | 24 | #include <sys/resourcevar.h> |
3266719e KB |
25 | #include <sys/proc.h> |
26 | #include <sys/vnode.h> | |
3266719e KB |
27 | #include <sys/file.h> |
28 | #include <sys/mman.h> | |
29 | #include <sys/conf.h> | |
619edcce | 30 | |
ba19ac11 JSP |
31 | #include <miscfs/specfs/specdev.h> |
32 | ||
3266719e KB |
33 | #include <vm/vm.h> |
34 | #include <vm/vm_pager.h> | |
35 | #include <vm/vm_prot.h> | |
619edcce KM |
36 | |
37 | #ifdef DEBUG | |
38 | int mmapdebug = 0; | |
39 | #define MDB_FOLLOW 0x01 | |
40 | #define MDB_SYNC 0x02 | |
41 | #define MDB_MAPIT 0x04 | |
42 | #endif | |
43 | ||
dd89ed8a CT |
44 | struct sbrk_args { |
45 | int incr; | |
46 | }; | |
619edcce | 47 | /* ARGSUSED */ |
3266719e | 48 | int |
619edcce KM |
49 | sbrk(p, uap, retval) |
50 | struct proc *p; | |
dd89ed8a | 51 | struct sbrk_args *uap; |
619edcce KM |
52 | int *retval; |
53 | { | |
54 | ||
55 | /* Not yet implemented */ | |
56 | return (EOPNOTSUPP); | |
57 | } | |
58 | ||
dd89ed8a CT |
59 | struct sstk_args { |
60 | int incr; | |
61 | }; | |
619edcce | 62 | /* ARGSUSED */ |
3266719e | 63 | int |
619edcce KM |
64 | sstk(p, uap, retval) |
65 | struct proc *p; | |
dd89ed8a | 66 | struct sstk_args *uap; |
619edcce KM |
67 | int *retval; |
68 | { | |
69 | ||
70 | /* Not yet implemented */ | |
71 | return (EOPNOTSUPP); | |
72 | } | |
73 | ||
dba7fe12 | 74 | #if defined(COMPAT_43) || defined(COMPAT_SUNOS) |
abf092ba KM |
75 | struct getpagesize_args { |
76 | int dummy; | |
77 | }; | |
78 | /* ARGSUSED */ | |
79 | int | |
7c3e64db | 80 | ogetpagesize(p, uap, retval) |
abf092ba KM |
81 | struct proc *p; |
82 | struct getpagesize_args *uap; | |
83 | int *retval; | |
84 | { | |
85 | ||
86 | *retval = PAGE_SIZE; | |
87 | return (0); | |
88 | } | |
dba7fe12 | 89 | #endif /* COMPAT_43 || COMPAT_SUNOS */ |
abf092ba | 90 | |
d74ab96d KB |
91 | struct mmap_args { |
92 | caddr_t addr; | |
93 | size_t len; | |
94 | int prot; | |
95 | int flags; | |
96 | int fd; | |
97 | long pad; | |
98 | off_t pos; | |
99 | }; | |
100 | ||
dba7fe12 | 101 | #ifdef COMPAT_43 |
d74ab96d | 102 | struct ommap_args { |
dd89ed8a CT |
103 | caddr_t addr; |
104 | int len; | |
105 | int prot; | |
106 | int flags; | |
107 | int fd; | |
108 | long pos; | |
109 | }; | |
3266719e | 110 | int |
d74ab96d | 111 | ommap(p, uap, retval) |
cc3c05c4 | 112 | struct proc *p; |
d74ab96d | 113 | register struct ommap_args *uap; |
619edcce | 114 | int *retval; |
eaf887ea KM |
115 | { |
116 | struct mmap_args nargs; | |
117 | static const char cvtbsdprot[8] = { | |
118 | 0, | |
119 | PROT_EXEC, | |
120 | PROT_WRITE, | |
121 | PROT_EXEC|PROT_WRITE, | |
122 | PROT_READ, | |
123 | PROT_EXEC|PROT_READ, | |
124 | PROT_WRITE|PROT_READ, | |
125 | PROT_EXEC|PROT_WRITE|PROT_READ, | |
126 | }; | |
127 | #define OMAP_ANON 0x0002 | |
128 | #define OMAP_COPY 0x0020 | |
129 | #define OMAP_SHARED 0x0010 | |
130 | #define OMAP_FIXED 0x0100 | |
131 | #define OMAP_INHERIT 0x0800 | |
132 | ||
133 | nargs.addr = uap->addr; | |
134 | nargs.len = uap->len; | |
135 | nargs.prot = cvtbsdprot[uap->prot&0x7]; | |
136 | nargs.flags = 0; | |
137 | if (uap->flags & OMAP_ANON) | |
138 | nargs.flags |= MAP_ANON; | |
139 | if (uap->flags & OMAP_COPY) | |
140 | nargs.flags |= MAP_COPY; | |
141 | if (uap->flags & OMAP_SHARED) | |
142 | nargs.flags |= MAP_SHARED; | |
143 | else | |
144 | nargs.flags |= MAP_PRIVATE; | |
145 | if (uap->flags & OMAP_FIXED) | |
146 | nargs.flags |= MAP_FIXED; | |
147 | if (uap->flags & OMAP_INHERIT) | |
148 | nargs.flags |= MAP_INHERIT; | |
149 | nargs.fd = uap->fd; | |
eaf887ea | 150 | nargs.pos = uap->pos; |
d74ab96d | 151 | return (mmap(p, &nargs, retval)); |
eaf887ea KM |
152 | } |
153 | #endif | |
154 | ||
155 | int | |
d74ab96d | 156 | mmap(p, uap, retval) |
eaf887ea KM |
157 | struct proc *p; |
158 | register struct mmap_args *uap; | |
159 | int *retval; | |
619edcce | 160 | { |
cc3c05c4 KM |
161 | register struct filedesc *fdp = p->p_fd; |
162 | register struct file *fp; | |
619edcce | 163 | struct vnode *vp; |
0589b1c5 | 164 | vm_offset_t addr, pos; |
619edcce | 165 | vm_size_t size; |
f06c50cb | 166 | vm_prot_t prot, maxprot; |
619edcce | 167 | caddr_t handle; |
be5cfeb2 | 168 | int flags, error; |
619edcce | 169 | |
263fc7c5 | 170 | prot = uap->prot & VM_PROT_ALL; |
be5cfeb2 | 171 | flags = uap->flags; |
0589b1c5 | 172 | pos = uap->pos; |
619edcce KM |
173 | #ifdef DEBUG |
174 | if (mmapdebug & MDB_FOLLOW) | |
175 | printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", | |
263fc7c5 | 176 | p->p_pid, uap->addr, uap->len, prot, |
0589b1c5 | 177 | flags, uap->fd, pos); |
619edcce | 178 | #endif |
619edcce | 179 | /* |
04107e61 MH |
180 | * Address (if FIXED) must be page aligned. |
181 | * Size is implicitly rounded to a page boundary. | |
619edcce | 182 | */ |
04107e61 | 183 | addr = (vm_offset_t) uap->addr; |
263fc7c5 | 184 | if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || |
9f6c8c1f | 185 | ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) || |
263fc7c5 | 186 | (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) |
eaf887ea | 187 | return (EINVAL); |
04107e61 | 188 | size = (vm_size_t) round_page(uap->len); |
dda79633 KM |
189 | /* |
190 | * Check for illegal addresses. Watch out for address wrap... | |
191 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
192 | */ | |
be5cfeb2 | 193 | if (flags & MAP_FIXED) { |
dda79633 KM |
194 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) |
195 | return (EINVAL); | |
196 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
197 | return (EINVAL); | |
198 | if (addr > addr + size) | |
199 | return (EINVAL); | |
200 | } | |
04107e61 | 201 | /* |
0589b1c5 MH |
202 | * XXX for non-fixed mappings where no hint is provided or |
203 | * the hint would fall in the potential heap space, | |
204 | * place it after the end of the largest possible heap. | |
04107e61 MH |
205 | * |
206 | * There should really be a pmap call to determine a reasonable | |
207 | * location. | |
208 | */ | |
0589b1c5 | 209 | else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) |
04107e61 | 210 | addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); |
f06c50cb | 211 | if (flags & MAP_ANON) { |
263fc7c5 KM |
212 | /* |
213 | * Mapping blank space is trivial. | |
214 | */ | |
eaf887ea | 215 | handle = NULL; |
f06c50cb | 216 | maxprot = VM_PROT_ALL; |
0589b1c5 | 217 | pos = 0; |
f06c50cb | 218 | } else { |
619edcce | 219 | /* |
eaf887ea | 220 | * Mapping file, get fp for validation. |
263fc7c5 | 221 | * Obtain vnode and make sure it is of appropriate type. |
619edcce | 222 | */ |
eaf887ea KM |
223 | if (((unsigned)uap->fd) >= fdp->fd_nfiles || |
224 | (fp = fdp->fd_ofiles[uap->fd]) == NULL) | |
263fc7c5 | 225 | return (EBADF); |
619edcce | 226 | if (fp->f_type != DTYPE_VNODE) |
263fc7c5 | 227 | return (EINVAL); |
619edcce KM |
228 | vp = (struct vnode *)fp->f_data; |
229 | if (vp->v_type != VREG && vp->v_type != VCHR) | |
263fc7c5 | 230 | return (EINVAL); |
f06c50cb | 231 | /* |
4d8cfd32 MH |
232 | * XXX hack to handle use of /dev/zero to map anon |
233 | * memory (ala SunOS). | |
f06c50cb | 234 | */ |
4d8cfd32 MH |
235 | if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { |
236 | handle = NULL; | |
237 | maxprot = VM_PROT_ALL; | |
238 | flags |= MAP_ANON; | |
239 | } else { | |
240 | /* | |
241 | * Ensure that file and memory protections are | |
242 | * compatible. Note that we only worry about | |
243 | * writability if mapping is shared; in this case, | |
244 | * current and max prot are dictated by the open file. | |
245 | * XXX use the vnode instead? Problem is: what | |
246 | * credentials do we use for determination? | |
247 | * What if proc does a setuid? | |
248 | */ | |
249 | maxprot = VM_PROT_EXECUTE; /* ??? */ | |
250 | if (fp->f_flag & FREAD) | |
251 | maxprot |= VM_PROT_READ; | |
252 | else if (prot & PROT_READ) | |
263fc7c5 | 253 | return (EACCES); |
4d8cfd32 MH |
254 | if (flags & MAP_SHARED) { |
255 | if (fp->f_flag & FWRITE) | |
256 | maxprot |= VM_PROT_WRITE; | |
257 | else if (prot & PROT_WRITE) | |
258 | return (EACCES); | |
259 | } else | |
260 | maxprot |= VM_PROT_WRITE; | |
261 | handle = (caddr_t)vp; | |
262 | } | |
eaf887ea | 263 | } |
f06c50cb | 264 | error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, |
9f6c8c1f | 265 | flags, handle, pos); |
619edcce | 266 | if (error == 0) |
263fc7c5 KM |
267 | *retval = (int)addr; |
268 | return (error); | |
619edcce KM |
269 | } |
270 | ||
dd89ed8a CT |
271 | struct msync_args { |
272 | caddr_t addr; | |
273 | int len; | |
274 | }; | |
3266719e | 275 | int |
619edcce KM |
276 | msync(p, uap, retval) |
277 | struct proc *p; | |
dd89ed8a | 278 | struct msync_args *uap; |
619edcce KM |
279 | int *retval; |
280 | { | |
d97d2118 MH |
281 | vm_offset_t addr; |
282 | vm_size_t size; | |
283 | vm_map_t map; | |
619edcce | 284 | int rv; |
d97d2118 | 285 | boolean_t syncio, invalidate; |
619edcce KM |
286 | |
287 | #ifdef DEBUG | |
288 | if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) | |
289 | printf("msync(%d): addr %x len %x\n", | |
290 | p->p_pid, uap->addr, uap->len); | |
291 | #endif | |
d97d2118 MH |
292 | if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
293 | return (EINVAL); | |
294 | map = &p->p_vmspace->vm_map; | |
295 | addr = (vm_offset_t)uap->addr; | |
296 | size = (vm_size_t)uap->len; | |
619edcce | 297 | /* |
d97d2118 MH |
298 | * XXX Gak! If size is zero we are supposed to sync "all modified |
299 | * pages with the region containing addr". Unfortunately, we | |
300 | * don't really keep track of individual mmaps so we approximate | |
301 | * by flushing the range of the map entry containing addr. | |
302 | * This can be incorrect if the region splits or is coalesced | |
303 | * with a neighbor. | |
619edcce | 304 | */ |
d97d2118 MH |
305 | if (size == 0) { |
306 | vm_map_entry_t entry; | |
307 | ||
308 | vm_map_lock_read(map); | |
309 | rv = vm_map_lookup_entry(map, addr, &entry); | |
310 | vm_map_unlock_read(map); | |
311 | if (rv) | |
312 | return (EINVAL); | |
313 | addr = entry->start; | |
314 | size = entry->end - entry->start; | |
315 | } | |
619edcce KM |
316 | #ifdef DEBUG |
317 | if (mmapdebug & MDB_SYNC) | |
d97d2118 MH |
318 | printf("msync: cleaning/flushing address range [%x-%x)\n", |
319 | addr, addr+size); | |
619edcce KM |
320 | #endif |
321 | /* | |
d97d2118 MH |
322 | * Could pass this in as a third flag argument to implement |
323 | * Sun's MS_ASYNC. | |
619edcce | 324 | */ |
d97d2118 | 325 | syncio = TRUE; |
619edcce | 326 | /* |
d97d2118 MH |
327 | * XXX bummer, gotta flush all cached pages to ensure |
328 | * consistency with the file system cache. Otherwise, we could | |
329 | * pass this in to implement Sun's MS_INVALIDATE. | |
619edcce | 330 | */ |
d97d2118 MH |
331 | invalidate = TRUE; |
332 | /* | |
333 | * Clean the pages and interpret the return value. | |
334 | */ | |
335 | rv = vm_map_clean(map, addr, addr+size, syncio, invalidate); | |
336 | switch (rv) { | |
337 | case KERN_SUCCESS: | |
338 | break; | |
339 | case KERN_INVALID_ADDRESS: | |
340 | return (EINVAL); /* Sun returns ENOMEM? */ | |
341 | case KERN_FAILURE: | |
342 | return (EIO); | |
343 | default: | |
344 | return (EINVAL); | |
345 | } | |
346 | return (0); | |
619edcce KM |
347 | } |
348 | ||
dd89ed8a CT |
349 | struct munmap_args { |
350 | caddr_t addr; | |
351 | int len; | |
352 | }; | |
3266719e | 353 | int |
619edcce KM |
354 | munmap(p, uap, retval) |
355 | register struct proc *p; | |
dd89ed8a | 356 | register struct munmap_args *uap; |
619edcce KM |
357 | int *retval; |
358 | { | |
359 | vm_offset_t addr; | |
360 | vm_size_t size; | |
d97d2118 | 361 | vm_map_t map; |
619edcce KM |
362 | |
363 | #ifdef DEBUG | |
364 | if (mmapdebug & MDB_FOLLOW) | |
365 | printf("munmap(%d): addr %x len %x\n", | |
366 | p->p_pid, uap->addr, uap->len); | |
367 | #endif | |
368 | ||
369 | addr = (vm_offset_t) uap->addr; | |
08cd4915 | 370 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 371 | return(EINVAL); |
04107e61 | 372 | size = (vm_size_t) round_page(uap->len); |
619edcce KM |
373 | if (size == 0) |
374 | return(0); | |
dda79633 KM |
375 | /* |
376 | * Check for illegal addresses. Watch out for address wrap... | |
377 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
378 | */ | |
379 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) | |
380 | return (EINVAL); | |
381 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
382 | return (EINVAL); | |
383 | if (addr > addr + size) | |
384 | return (EINVAL); | |
d97d2118 MH |
385 | map = &p->p_vmspace->vm_map; |
386 | /* | |
387 | * Make sure entire range is allocated. | |
388 | */ | |
389 | if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) | |
619edcce KM |
390 | return(EINVAL); |
391 | /* returns nothing but KERN_SUCCESS anyway */ | |
d97d2118 | 392 | (void) vm_map_remove(map, addr, addr+size); |
619edcce KM |
393 | return(0); |
394 | } | |
395 | ||
3266719e | 396 | void |
619edcce | 397 | munmapfd(fd) |
67f54264 | 398 | int fd; |
619edcce KM |
399 | { |
400 | #ifdef DEBUG | |
401 | if (mmapdebug & MDB_FOLLOW) | |
451a445a | 402 | printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd); |
619edcce KM |
403 | #endif |
404 | ||
405 | /* | |
d97d2118 | 406 | * XXX should vm_deallocate any regions mapped to this file |
619edcce | 407 | */ |
451a445a | 408 | curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; |
619edcce KM |
409 | } |
410 | ||
dd89ed8a CT |
411 | struct mprotect_args { |
412 | caddr_t addr; | |
413 | int len; | |
414 | int prot; | |
415 | }; | |
3266719e | 416 | int |
619edcce KM |
417 | mprotect(p, uap, retval) |
418 | struct proc *p; | |
dd89ed8a | 419 | struct mprotect_args *uap; |
619edcce KM |
420 | int *retval; |
421 | { | |
422 | vm_offset_t addr; | |
423 | vm_size_t size; | |
424 | register vm_prot_t prot; | |
425 | ||
426 | #ifdef DEBUG | |
427 | if (mmapdebug & MDB_FOLLOW) | |
428 | printf("mprotect(%d): addr %x len %x prot %d\n", | |
429 | p->p_pid, uap->addr, uap->len, uap->prot); | |
430 | #endif | |
431 | ||
dd89ed8a | 432 | addr = (vm_offset_t)uap->addr; |
08cd4915 | 433 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 434 | return(EINVAL); |
dd89ed8a | 435 | size = (vm_size_t)uap->len; |
f06c50cb | 436 | prot = uap->prot & VM_PROT_ALL; |
619edcce | 437 | |
451a445a MK |
438 | switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, |
439 | FALSE)) { | |
619edcce KM |
440 | case KERN_SUCCESS: |
441 | return (0); | |
442 | case KERN_PROTECTION_FAILURE: | |
443 | return (EACCES); | |
444 | } | |
445 | return (EINVAL); | |
446 | } | |
447 | ||
dd89ed8a CT |
448 | struct madvise_args { |
449 | caddr_t addr; | |
450 | int len; | |
451 | int behav; | |
452 | }; | |
619edcce | 453 | /* ARGSUSED */ |
3266719e | 454 | int |
619edcce KM |
455 | madvise(p, uap, retval) |
456 | struct proc *p; | |
dd89ed8a | 457 | struct madvise_args *uap; |
619edcce KM |
458 | int *retval; |
459 | { | |
460 | ||
461 | /* Not yet implemented */ | |
462 | return (EOPNOTSUPP); | |
463 | } | |
464 | ||
dd89ed8a CT |
465 | struct mincore_args { |
466 | caddr_t addr; | |
467 | int len; | |
468 | char *vec; | |
469 | }; | |
619edcce | 470 | /* ARGSUSED */ |
3266719e | 471 | int |
619edcce KM |
472 | mincore(p, uap, retval) |
473 | struct proc *p; | |
dd89ed8a | 474 | struct mincore_args *uap; |
619edcce KM |
475 | int *retval; |
476 | { | |
477 | ||
478 | /* Not yet implemented */ | |
479 | return (EOPNOTSUPP); | |
480 | } | |
481 | ||
2a32d5bc MH |
482 | struct mlock_args { |
483 | caddr_t addr; | |
d4f27e41 | 484 | size_t len; |
2a32d5bc MH |
485 | }; |
486 | int | |
487 | mlock(p, uap, retval) | |
488 | struct proc *p; | |
489 | struct mlock_args *uap; | |
490 | int *retval; | |
491 | { | |
492 | vm_offset_t addr; | |
493 | vm_size_t size; | |
494 | int error; | |
495 | extern int vm_page_max_wired; | |
496 | ||
497 | #ifdef DEBUG | |
498 | if (mmapdebug & MDB_FOLLOW) | |
499 | printf("mlock(%d): addr %x len %x\n", | |
500 | p->p_pid, uap->addr, uap->len); | |
501 | #endif | |
502 | addr = (vm_offset_t)uap->addr; | |
72012dca | 503 | if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
2a32d5bc MH |
504 | return (EINVAL); |
505 | size = round_page((vm_size_t)uap->len); | |
506 | if (atop(size) + cnt.v_wire_count > vm_page_max_wired) | |
28ed9ed6 | 507 | return (EAGAIN); |
2a32d5bc MH |
508 | #ifdef pmap_wired_count |
509 | if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > | |
510 | p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) | |
28ed9ed6 | 511 | return (EAGAIN); |
2a32d5bc MH |
512 | #else |
513 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
514 | return (error); | |
515 | #endif | |
516 | ||
517 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE); | |
518 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
519 | } | |
520 | ||
521 | struct munlock_args { | |
522 | caddr_t addr; | |
d4f27e41 | 523 | size_t len; |
2a32d5bc MH |
524 | }; |
525 | int | |
526 | munlock(p, uap, retval) | |
527 | struct proc *p; | |
528 | struct munlock_args *uap; | |
529 | int *retval; | |
530 | { | |
531 | vm_offset_t addr; | |
532 | vm_size_t size; | |
533 | int error; | |
534 | ||
535 | #ifdef DEBUG | |
536 | if (mmapdebug & MDB_FOLLOW) | |
537 | printf("munlock(%d): addr %x len %x\n", | |
538 | p->p_pid, uap->addr, uap->len); | |
539 | #endif | |
540 | addr = (vm_offset_t)uap->addr; | |
72012dca | 541 | if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) |
2a32d5bc MH |
542 | return (EINVAL); |
543 | #ifndef pmap_wired_count | |
544 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
545 | return (error); | |
546 | #endif | |
547 | size = round_page((vm_size_t)uap->len); | |
548 | ||
549 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE); | |
550 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
551 | } | |
552 | ||
619edcce KM |
553 | /* |
554 | * Internal version of mmap. | |
555 | * Currently used by mmap, exec, and sys5 shared memory. | |
eaf887ea | 556 | * Handle is either a vnode pointer or NULL for MAP_ANON. |
619edcce | 557 | */ |
3266719e | 558 | int |
f06c50cb | 559 | vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) |
619edcce KM |
560 | register vm_map_t map; |
561 | register vm_offset_t *addr; | |
562 | register vm_size_t size; | |
f06c50cb | 563 | vm_prot_t prot, maxprot; |
619edcce KM |
564 | register int flags; |
565 | caddr_t handle; /* XXX should be vp */ | |
566 | vm_offset_t foff; | |
567 | { | |
568 | register vm_pager_t pager; | |
569 | boolean_t fitit; | |
570 | vm_object_t object; | |
72012dca | 571 | struct vnode *vp = NULL; |
619edcce KM |
572 | int type; |
573 | int rv = KERN_SUCCESS; | |
574 | ||
575 | if (size == 0) | |
576 | return (0); | |
577 | ||
578 | if ((flags & MAP_FIXED) == 0) { | |
579 | fitit = TRUE; | |
580 | *addr = round_page(*addr); | |
581 | } else { | |
582 | fitit = FALSE; | |
dd89ed8a | 583 | (void)vm_deallocate(map, *addr, size); |
619edcce KM |
584 | } |
585 | ||
586 | /* | |
587 | * Lookup/allocate pager. All except an unnamed anonymous lookup | |
588 | * gain a reference to ensure continued existance of the object. | |
589 | * (XXX the exception is to appease the pageout daemon) | |
590 | */ | |
eaf887ea | 591 | if (flags & MAP_ANON) |
619edcce KM |
592 | type = PG_DFLT; |
593 | else { | |
594 | vp = (struct vnode *)handle; | |
595 | if (vp->v_type == VCHR) { | |
596 | type = PG_DEVICE; | |
597 | handle = (caddr_t)vp->v_rdev; | |
598 | } else | |
599 | type = PG_VNODE; | |
600 | } | |
93a6792f | 601 | pager = vm_pager_allocate(type, handle, size, prot, foff); |
451a445a | 602 | if (pager == NULL) |
619edcce KM |
603 | return (type == PG_DEVICE ? EINVAL : ENOMEM); |
604 | /* | |
605 | * Find object and release extra reference gained by lookup | |
606 | */ | |
607 | object = vm_object_lookup(pager); | |
608 | vm_object_deallocate(object); | |
609 | ||
610 | /* | |
611 | * Anonymous memory. | |
612 | */ | |
eaf887ea | 613 | if (flags & MAP_ANON) { |
619edcce | 614 | rv = vm_allocate_with_pager(map, addr, size, fitit, |
c0b1c663 | 615 | pager, foff, TRUE); |
619edcce KM |
616 | if (rv != KERN_SUCCESS) { |
617 | if (handle == NULL) | |
618 | vm_pager_deallocate(pager); | |
619 | else | |
620 | vm_object_deallocate(object); | |
621 | goto out; | |
622 | } | |
623 | /* | |
624 | * Don't cache anonymous objects. | |
625 | * Loses the reference gained by vm_pager_allocate. | |
f06c50cb MH |
626 | * Note that object will be NULL when handle == NULL, |
627 | * this is ok since vm_allocate_with_pager has made | |
628 | * sure that these objects are uncached. | |
619edcce KM |
629 | */ |
630 | (void) pager_cache(object, FALSE); | |
631 | #ifdef DEBUG | |
632 | if (mmapdebug & MDB_MAPIT) | |
633 | printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", | |
451a445a | 634 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
635 | #endif |
636 | } | |
637 | /* | |
eaf887ea | 638 | * Must be a mapped file. |
619edcce KM |
639 | * Distinguish between character special and regular files. |
640 | */ | |
641 | else if (vp->v_type == VCHR) { | |
642 | rv = vm_allocate_with_pager(map, addr, size, fitit, | |
c0b1c663 | 643 | pager, foff, FALSE); |
619edcce KM |
644 | /* |
645 | * Uncache the object and lose the reference gained | |
646 | * by vm_pager_allocate(). If the call to | |
647 | * vm_allocate_with_pager() was sucessful, then we | |
648 | * gained an additional reference ensuring the object | |
649 | * will continue to exist. If the call failed then | |
650 | * the deallocate call below will terminate the | |
651 | * object which is fine. | |
652 | */ | |
653 | (void) pager_cache(object, FALSE); | |
654 | if (rv != KERN_SUCCESS) | |
655 | goto out; | |
656 | } | |
657 | /* | |
658 | * A regular file | |
659 | */ | |
660 | else { | |
661 | #ifdef DEBUG | |
451a445a | 662 | if (object == NULL) |
619edcce KM |
663 | printf("vm_mmap: no object: vp %x, pager %x\n", |
664 | vp, pager); | |
665 | #endif | |
666 | /* | |
667 | * Map it directly. | |
668 | * Allows modifications to go out to the vnode. | |
669 | */ | |
670 | if (flags & MAP_SHARED) { | |
671 | rv = vm_allocate_with_pager(map, addr, size, | |
672 | fitit, pager, | |
c0b1c663 | 673 | foff, FALSE); |
619edcce KM |
674 | if (rv != KERN_SUCCESS) { |
675 | vm_object_deallocate(object); | |
676 | goto out; | |
677 | } | |
678 | /* | |
679 | * Don't cache the object. This is the easiest way | |
680 | * of ensuring that data gets back to the filesystem | |
681 | * because vnode_pager_deallocate() will fsync the | |
682 | * vnode. pager_cache() will lose the extra ref. | |
683 | */ | |
684 | if (prot & VM_PROT_WRITE) | |
685 | pager_cache(object, FALSE); | |
686 | else | |
687 | vm_object_deallocate(object); | |
688 | } | |
689 | /* | |
690 | * Copy-on-write of file. Two flavors. | |
691 | * MAP_COPY is true COW, you essentially get a snapshot of | |
692 | * the region at the time of mapping. MAP_PRIVATE means only | |
693 | * that your changes are not reflected back to the object. | |
694 | * Changes made by others will be seen. | |
695 | */ | |
696 | else { | |
697 | vm_map_t tmap; | |
698 | vm_offset_t off; | |
699 | ||
700 | /* locate and allocate the target address space */ | |
451a445a | 701 | rv = vm_map_find(map, NULL, (vm_offset_t)0, |
619edcce KM |
702 | addr, size, fitit); |
703 | if (rv != KERN_SUCCESS) { | |
704 | vm_object_deallocate(object); | |
705 | goto out; | |
706 | } | |
707 | tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, | |
708 | VM_MIN_ADDRESS+size, TRUE); | |
709 | off = VM_MIN_ADDRESS; | |
710 | rv = vm_allocate_with_pager(tmap, &off, size, | |
711 | TRUE, pager, | |
c0b1c663 | 712 | foff, FALSE); |
619edcce KM |
713 | if (rv != KERN_SUCCESS) { |
714 | vm_object_deallocate(object); | |
715 | vm_map_deallocate(tmap); | |
716 | goto out; | |
717 | } | |
718 | /* | |
719 | * (XXX) | |
720 | * MAP_PRIVATE implies that we see changes made by | |
721 | * others. To ensure that we need to guarentee that | |
722 | * no copy object is created (otherwise original | |
723 | * pages would be pushed to the copy object and we | |
724 | * would never see changes made by others). We | |
725 | * totally sleeze it right now by marking the object | |
726 | * internal temporarily. | |
727 | */ | |
728 | if ((flags & MAP_COPY) == 0) | |
208697bf | 729 | object->flags |= OBJ_INTERNAL; |
619edcce KM |
730 | rv = vm_map_copy(map, tmap, *addr, size, off, |
731 | FALSE, FALSE); | |
208697bf | 732 | object->flags &= ~OBJ_INTERNAL; |
619edcce KM |
733 | /* |
734 | * (XXX) | |
735 | * My oh my, this only gets worse... | |
736 | * Force creation of a shadow object so that | |
737 | * vm_map_fork will do the right thing. | |
738 | */ | |
739 | if ((flags & MAP_COPY) == 0) { | |
740 | vm_map_t tmap; | |
741 | vm_map_entry_t tentry; | |
742 | vm_object_t tobject; | |
743 | vm_offset_t toffset; | |
744 | vm_prot_t tprot; | |
745 | boolean_t twired, tsu; | |
746 | ||
747 | tmap = map; | |
748 | vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, | |
749 | &tentry, &tobject, &toffset, | |
750 | &tprot, &twired, &tsu); | |
751 | vm_map_lookup_done(tmap, tentry); | |
752 | } | |
753 | /* | |
754 | * (XXX) | |
755 | * Map copy code cannot detect sharing unless a | |
756 | * sharing map is involved. So we cheat and write | |
64691901 | 757 | * protect everything ourselves. |
619edcce | 758 | */ |
c0b1c663 | 759 | vm_object_pmap_copy(object, foff, foff + size); |
619edcce KM |
760 | vm_object_deallocate(object); |
761 | vm_map_deallocate(tmap); | |
762 | if (rv != KERN_SUCCESS) | |
763 | goto out; | |
764 | } | |
765 | #ifdef DEBUG | |
766 | if (mmapdebug & MDB_MAPIT) | |
767 | printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", | |
451a445a | 768 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
769 | #endif |
770 | } | |
771 | /* | |
772 | * Correct protection (default is VM_PROT_ALL). | |
f06c50cb | 773 | * If maxprot is different than prot, we must set both explicitly. |
619edcce | 774 | */ |
f06c50cb MH |
775 | rv = KERN_SUCCESS; |
776 | if (maxprot != VM_PROT_ALL) | |
777 | rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); | |
778 | if (rv == KERN_SUCCESS && prot != maxprot) | |
619edcce | 779 | rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); |
f06c50cb MH |
780 | if (rv != KERN_SUCCESS) { |
781 | (void) vm_deallocate(map, *addr, size); | |
782 | goto out; | |
619edcce KM |
783 | } |
784 | /* | |
785 | * Shared memory is also shared with children. | |
786 | */ | |
787 | if (flags & MAP_SHARED) { | |
d97d2118 | 788 | rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE); |
619edcce KM |
789 | if (rv != KERN_SUCCESS) { |
790 | (void) vm_deallocate(map, *addr, size); | |
791 | goto out; | |
792 | } | |
793 | } | |
794 | out: | |
795 | #ifdef DEBUG | |
796 | if (mmapdebug & MDB_MAPIT) | |
797 | printf("vm_mmap: rv %d\n", rv); | |
798 | #endif | |
799 | switch (rv) { | |
800 | case KERN_SUCCESS: | |
801 | return (0); | |
802 | case KERN_INVALID_ADDRESS: | |
803 | case KERN_NO_SPACE: | |
804 | return (ENOMEM); | |
805 | case KERN_PROTECTION_FAILURE: | |
806 | return (EACCES); | |
807 | default: | |
808 | return (EINVAL); | |
809 | } | |
810 | } |