Commit | Line | Data |
---|---|---|
619edcce KM |
1 | /* |
2 | * Copyright (c) 1988 University of Utah. | |
3 | * Copyright (c) 1991 The Regents of the University of California. | |
4 | * All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * the Systems Programming Group of the University of Utah Computer | |
8 | * Science Department. | |
9 | * | |
10 | * %sccs.include.redist.c% | |
11 | * | |
dbc00f04 | 12 | * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ |
619edcce | 13 | * |
ad0f93d2 | 14 | * @(#)vm_mmap.c 8.1 (Berkeley) %G% |
619edcce KM |
15 | */ |
16 | ||
17 | /* | |
18 | * Mapped file (mmap) interface to VM | |
19 | */ | |
20 | ||
3266719e KB |
21 | #include <sys/param.h> |
22 | #include <sys/systm.h> | |
23 | #include <sys/filedesc.h> | |
2a32d5bc | 24 | #include <sys/resourcevar.h> |
3266719e KB |
25 | #include <sys/proc.h> |
26 | #include <sys/vnode.h> | |
3266719e KB |
27 | #include <sys/file.h> |
28 | #include <sys/mman.h> | |
29 | #include <sys/conf.h> | |
619edcce | 30 | |
ba19ac11 JSP |
31 | #include <miscfs/specfs/specdev.h> |
32 | ||
3266719e KB |
33 | #include <vm/vm.h> |
34 | #include <vm/vm_pager.h> | |
35 | #include <vm/vm_prot.h> | |
619edcce KM |
36 | |
37 | #ifdef DEBUG | |
38 | int mmapdebug = 0; | |
39 | #define MDB_FOLLOW 0x01 | |
40 | #define MDB_SYNC 0x02 | |
41 | #define MDB_MAPIT 0x04 | |
42 | #endif | |
43 | ||
dd89ed8a CT |
44 | struct sbrk_args { |
45 | int incr; | |
46 | }; | |
619edcce | 47 | /* ARGSUSED */ |
3266719e | 48 | int |
619edcce KM |
49 | sbrk(p, uap, retval) |
50 | struct proc *p; | |
dd89ed8a | 51 | struct sbrk_args *uap; |
619edcce KM |
52 | int *retval; |
53 | { | |
54 | ||
55 | /* Not yet implemented */ | |
56 | return (EOPNOTSUPP); | |
57 | } | |
58 | ||
dd89ed8a CT |
59 | struct sstk_args { |
60 | int incr; | |
61 | }; | |
619edcce | 62 | /* ARGSUSED */ |
3266719e | 63 | int |
619edcce KM |
64 | sstk(p, uap, retval) |
65 | struct proc *p; | |
dd89ed8a | 66 | struct sstk_args *uap; |
619edcce KM |
67 | int *retval; |
68 | { | |
69 | ||
70 | /* Not yet implemented */ | |
71 | return (EOPNOTSUPP); | |
72 | } | |
73 | ||
eaf887ea KM |
74 | struct mmap_args { |
75 | caddr_t addr; | |
c0b1c663 | 76 | size_t len; |
eaf887ea KM |
77 | int prot; |
78 | int flags; | |
79 | int fd; | |
80 | long pad; | |
81 | off_t pos; | |
82 | }; | |
83 | ||
dba7fe12 | 84 | #if defined(COMPAT_43) || defined(COMPAT_SUNOS) |
abf092ba KM |
85 | struct getpagesize_args { |
86 | int dummy; | |
87 | }; | |
88 | /* ARGSUSED */ | |
89 | int | |
7c3e64db | 90 | ogetpagesize(p, uap, retval) |
abf092ba KM |
91 | struct proc *p; |
92 | struct getpagesize_args *uap; | |
93 | int *retval; | |
94 | { | |
95 | ||
96 | *retval = PAGE_SIZE; | |
97 | return (0); | |
98 | } | |
dba7fe12 | 99 | #endif /* COMPAT_43 || COMPAT_SUNOS */ |
abf092ba | 100 | |
dba7fe12 | 101 | #ifdef COMPAT_43 |
dd89ed8a CT |
102 | struct osmmap_args { |
103 | caddr_t addr; | |
104 | int len; | |
105 | int prot; | |
106 | int flags; | |
107 | int fd; | |
108 | long pos; | |
109 | }; | |
3266719e | 110 | int |
eaf887ea | 111 | osmmap(p, uap, retval) |
cc3c05c4 | 112 | struct proc *p; |
dd89ed8a | 113 | register struct osmmap_args *uap; |
619edcce | 114 | int *retval; |
eaf887ea KM |
115 | { |
116 | struct mmap_args nargs; | |
117 | static const char cvtbsdprot[8] = { | |
118 | 0, | |
119 | PROT_EXEC, | |
120 | PROT_WRITE, | |
121 | PROT_EXEC|PROT_WRITE, | |
122 | PROT_READ, | |
123 | PROT_EXEC|PROT_READ, | |
124 | PROT_WRITE|PROT_READ, | |
125 | PROT_EXEC|PROT_WRITE|PROT_READ, | |
126 | }; | |
127 | #define OMAP_ANON 0x0002 | |
128 | #define OMAP_COPY 0x0020 | |
129 | #define OMAP_SHARED 0x0010 | |
130 | #define OMAP_FIXED 0x0100 | |
131 | #define OMAP_INHERIT 0x0800 | |
132 | ||
133 | nargs.addr = uap->addr; | |
134 | nargs.len = uap->len; | |
135 | nargs.prot = cvtbsdprot[uap->prot&0x7]; | |
136 | nargs.flags = 0; | |
137 | if (uap->flags & OMAP_ANON) | |
138 | nargs.flags |= MAP_ANON; | |
139 | if (uap->flags & OMAP_COPY) | |
140 | nargs.flags |= MAP_COPY; | |
141 | if (uap->flags & OMAP_SHARED) | |
142 | nargs.flags |= MAP_SHARED; | |
143 | else | |
144 | nargs.flags |= MAP_PRIVATE; | |
145 | if (uap->flags & OMAP_FIXED) | |
146 | nargs.flags |= MAP_FIXED; | |
147 | if (uap->flags & OMAP_INHERIT) | |
148 | nargs.flags |= MAP_INHERIT; | |
149 | nargs.fd = uap->fd; | |
eaf887ea KM |
150 | nargs.pos = uap->pos; |
151 | return (smmap(p, &nargs, retval)); | |
152 | } | |
153 | #endif | |
154 | ||
155 | int | |
156 | smmap(p, uap, retval) | |
157 | struct proc *p; | |
158 | register struct mmap_args *uap; | |
159 | int *retval; | |
619edcce | 160 | { |
cc3c05c4 KM |
161 | register struct filedesc *fdp = p->p_fd; |
162 | register struct file *fp; | |
619edcce KM |
163 | struct vnode *vp; |
164 | vm_offset_t addr; | |
165 | vm_size_t size; | |
f06c50cb | 166 | vm_prot_t prot, maxprot; |
619edcce | 167 | caddr_t handle; |
be5cfeb2 | 168 | int flags, error; |
619edcce | 169 | |
263fc7c5 | 170 | prot = uap->prot & VM_PROT_ALL; |
be5cfeb2 | 171 | flags = uap->flags; |
619edcce KM |
172 | #ifdef DEBUG |
173 | if (mmapdebug & MDB_FOLLOW) | |
174 | printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", | |
263fc7c5 | 175 | p->p_pid, uap->addr, uap->len, prot, |
c0b1c663 | 176 | flags, uap->fd, (vm_offset_t)uap->pos); |
619edcce | 177 | #endif |
619edcce | 178 | /* |
04107e61 MH |
179 | * Address (if FIXED) must be page aligned. |
180 | * Size is implicitly rounded to a page boundary. | |
619edcce | 181 | */ |
04107e61 | 182 | addr = (vm_offset_t) uap->addr; |
263fc7c5 KM |
183 | if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || |
184 | (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) | |
eaf887ea | 185 | return (EINVAL); |
04107e61 | 186 | size = (vm_size_t) round_page(uap->len); |
dda79633 KM |
187 | /* |
188 | * Check for illegal addresses. Watch out for address wrap... | |
189 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
190 | */ | |
be5cfeb2 | 191 | if (flags & MAP_FIXED) { |
dda79633 KM |
192 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) |
193 | return (EINVAL); | |
194 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
195 | return (EINVAL); | |
196 | if (addr > addr + size) | |
197 | return (EINVAL); | |
198 | } | |
04107e61 MH |
199 | /* |
200 | * XXX if no hint provided for a non-fixed mapping place it after | |
201 | * the end of the largest possible heap. | |
202 | * | |
203 | * There should really be a pmap call to determine a reasonable | |
204 | * location. | |
205 | */ | |
be5cfeb2 | 206 | if (addr == 0 && (flags & MAP_FIXED) == 0) |
04107e61 | 207 | addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); |
f06c50cb | 208 | if (flags & MAP_ANON) { |
263fc7c5 KM |
209 | /* |
210 | * Mapping blank space is trivial. | |
211 | */ | |
eaf887ea | 212 | handle = NULL; |
f06c50cb MH |
213 | maxprot = VM_PROT_ALL; |
214 | } else { | |
619edcce | 215 | /* |
eaf887ea | 216 | * Mapping file, get fp for validation. |
263fc7c5 | 217 | * Obtain vnode and make sure it is of appropriate type. |
619edcce | 218 | */ |
eaf887ea KM |
219 | if (((unsigned)uap->fd) >= fdp->fd_nfiles || |
220 | (fp = fdp->fd_ofiles[uap->fd]) == NULL) | |
263fc7c5 | 221 | return (EBADF); |
619edcce | 222 | if (fp->f_type != DTYPE_VNODE) |
263fc7c5 | 223 | return (EINVAL); |
619edcce KM |
224 | vp = (struct vnode *)fp->f_data; |
225 | if (vp->v_type != VREG && vp->v_type != VCHR) | |
263fc7c5 | 226 | return (EINVAL); |
f06c50cb | 227 | /* |
263fc7c5 KM |
228 | * Ensure that file and memory protections are compatible. |
229 | * Note that we only worry about writability if mapping is | |
230 | * shared; in this case, current and max prot are dictated | |
231 | * by the open file. | |
f06c50cb MH |
232 | * XXX use the vnode instead? Problem is: what credentials |
233 | * do we use for determination? What if proc does a setuid? | |
234 | */ | |
263fc7c5 | 235 | maxprot = VM_PROT_EXECUTE; /* ??? */ |
f06c50cb | 236 | if (fp->f_flag & FREAD) |
263fc7c5 KM |
237 | maxprot |= VM_PROT_READ; |
238 | else if (prot & PROT_READ) | |
239 | return (EACCES); | |
240 | if (flags & MAP_SHARED) { | |
241 | if (fp->f_flag & FWRITE) | |
242 | maxprot |= VM_PROT_WRITE; | |
243 | else if (prot & PROT_WRITE) | |
244 | return (EACCES); | |
245 | } else | |
f06c50cb | 246 | maxprot |= VM_PROT_WRITE; |
263fc7c5 | 247 | handle = (caddr_t)vp; |
eaf887ea | 248 | } |
f06c50cb | 249 | error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, |
263fc7c5 | 250 | flags, handle, (vm_offset_t)uap->pos); |
619edcce | 251 | if (error == 0) |
263fc7c5 KM |
252 | *retval = (int)addr; |
253 | return (error); | |
619edcce KM |
254 | } |
255 | ||
dd89ed8a CT |
256 | struct msync_args { |
257 | caddr_t addr; | |
258 | int len; | |
259 | }; | |
3266719e | 260 | int |
619edcce KM |
261 | msync(p, uap, retval) |
262 | struct proc *p; | |
dd89ed8a | 263 | struct msync_args *uap; |
619edcce KM |
264 | int *retval; |
265 | { | |
266 | vm_offset_t addr, objoff, oaddr; | |
267 | vm_size_t size, osize; | |
268 | vm_prot_t prot, mprot; | |
269 | vm_inherit_t inherit; | |
270 | vm_object_t object; | |
271 | boolean_t shared; | |
272 | int rv; | |
273 | ||
274 | #ifdef DEBUG | |
275 | if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) | |
276 | printf("msync(%d): addr %x len %x\n", | |
277 | p->p_pid, uap->addr, uap->len); | |
278 | #endif | |
08cd4915 | 279 | if (((int)uap->addr & PAGE_MASK) || uap->len < 0) |
619edcce KM |
280 | return(EINVAL); |
281 | addr = oaddr = (vm_offset_t)uap->addr; | |
282 | osize = (vm_size_t)uap->len; | |
283 | /* | |
284 | * Region must be entirely contained in a single entry | |
285 | */ | |
451a445a MK |
286 | if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+osize, |
287 | TRUE)) | |
619edcce KM |
288 | return(EINVAL); |
289 | /* | |
290 | * Determine the object associated with that entry | |
291 | * (object is returned locked on KERN_SUCCESS) | |
292 | */ | |
451a445a | 293 | rv = vm_region(&p->p_vmspace->vm_map, &addr, &size, &prot, &mprot, |
619edcce KM |
294 | &inherit, &shared, &object, &objoff); |
295 | if (rv != KERN_SUCCESS) | |
296 | return(EINVAL); | |
297 | #ifdef DEBUG | |
298 | if (mmapdebug & MDB_SYNC) | |
299 | printf("msync: region: object %x addr %x size %d objoff %d\n", | |
300 | object, addr, size, objoff); | |
301 | #endif | |
302 | /* | |
303 | * Do not msync non-vnoded backed objects. | |
304 | */ | |
208697bf | 305 | if ((object->flags & OBJ_INTERNAL) || object->pager == NULL || |
619edcce KM |
306 | object->pager->pg_type != PG_VNODE) { |
307 | vm_object_unlock(object); | |
308 | return(EINVAL); | |
309 | } | |
310 | objoff += oaddr - addr; | |
311 | if (osize == 0) | |
312 | osize = size; | |
313 | #ifdef DEBUG | |
314 | if (mmapdebug & MDB_SYNC) | |
315 | printf("msync: cleaning/flushing object range [%x-%x)\n", | |
316 | objoff, objoff+osize); | |
317 | #endif | |
318 | if (prot & VM_PROT_WRITE) | |
dbc00f04 | 319 | vm_object_page_clean(object, objoff, objoff+osize, FALSE); |
619edcce KM |
320 | /* |
321 | * (XXX) | |
322 | * Bummer, gotta flush all cached pages to ensure | |
323 | * consistency with the file system cache. | |
324 | */ | |
325 | vm_object_page_remove(object, objoff, objoff+osize); | |
326 | vm_object_unlock(object); | |
327 | return(0); | |
328 | } | |
329 | ||
dd89ed8a CT |
330 | struct munmap_args { |
331 | caddr_t addr; | |
332 | int len; | |
333 | }; | |
3266719e | 334 | int |
619edcce KM |
335 | munmap(p, uap, retval) |
336 | register struct proc *p; | |
dd89ed8a | 337 | register struct munmap_args *uap; |
619edcce KM |
338 | int *retval; |
339 | { | |
340 | vm_offset_t addr; | |
341 | vm_size_t size; | |
342 | ||
343 | #ifdef DEBUG | |
344 | if (mmapdebug & MDB_FOLLOW) | |
345 | printf("munmap(%d): addr %x len %x\n", | |
346 | p->p_pid, uap->addr, uap->len); | |
347 | #endif | |
348 | ||
349 | addr = (vm_offset_t) uap->addr; | |
08cd4915 | 350 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 351 | return(EINVAL); |
04107e61 | 352 | size = (vm_size_t) round_page(uap->len); |
619edcce KM |
353 | if (size == 0) |
354 | return(0); | |
dda79633 KM |
355 | /* |
356 | * Check for illegal addresses. Watch out for address wrap... | |
357 | * Note that VM_*_ADDRESS are not constants due to casts (argh). | |
358 | */ | |
359 | if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) | |
360 | return (EINVAL); | |
361 | if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) | |
362 | return (EINVAL); | |
363 | if (addr > addr + size) | |
364 | return (EINVAL); | |
365 | if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr + size, | |
451a445a | 366 | FALSE)) |
619edcce KM |
367 | return(EINVAL); |
368 | /* returns nothing but KERN_SUCCESS anyway */ | |
451a445a | 369 | (void) vm_map_remove(&p->p_vmspace->vm_map, addr, addr+size); |
619edcce KM |
370 | return(0); |
371 | } | |
372 | ||
3266719e | 373 | void |
619edcce | 374 | munmapfd(fd) |
67f54264 | 375 | int fd; |
619edcce KM |
376 | { |
377 | #ifdef DEBUG | |
378 | if (mmapdebug & MDB_FOLLOW) | |
451a445a | 379 | printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd); |
619edcce KM |
380 | #endif |
381 | ||
382 | /* | |
383 | * XXX -- should vm_deallocate any regions mapped to this file | |
384 | */ | |
451a445a | 385 | curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; |
619edcce KM |
386 | } |
387 | ||
dd89ed8a CT |
388 | struct mprotect_args { |
389 | caddr_t addr; | |
390 | int len; | |
391 | int prot; | |
392 | }; | |
3266719e | 393 | int |
619edcce KM |
394 | mprotect(p, uap, retval) |
395 | struct proc *p; | |
dd89ed8a | 396 | struct mprotect_args *uap; |
619edcce KM |
397 | int *retval; |
398 | { | |
399 | vm_offset_t addr; | |
400 | vm_size_t size; | |
401 | register vm_prot_t prot; | |
402 | ||
403 | #ifdef DEBUG | |
404 | if (mmapdebug & MDB_FOLLOW) | |
405 | printf("mprotect(%d): addr %x len %x prot %d\n", | |
406 | p->p_pid, uap->addr, uap->len, uap->prot); | |
407 | #endif | |
408 | ||
dd89ed8a | 409 | addr = (vm_offset_t)uap->addr; |
08cd4915 | 410 | if ((addr & PAGE_MASK) || uap->len < 0) |
619edcce | 411 | return(EINVAL); |
dd89ed8a | 412 | size = (vm_size_t)uap->len; |
f06c50cb | 413 | prot = uap->prot & VM_PROT_ALL; |
619edcce | 414 | |
451a445a MK |
415 | switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, |
416 | FALSE)) { | |
619edcce KM |
417 | case KERN_SUCCESS: |
418 | return (0); | |
419 | case KERN_PROTECTION_FAILURE: | |
420 | return (EACCES); | |
421 | } | |
422 | return (EINVAL); | |
423 | } | |
424 | ||
dd89ed8a CT |
425 | struct madvise_args { |
426 | caddr_t addr; | |
427 | int len; | |
428 | int behav; | |
429 | }; | |
619edcce | 430 | /* ARGSUSED */ |
3266719e | 431 | int |
619edcce KM |
432 | madvise(p, uap, retval) |
433 | struct proc *p; | |
dd89ed8a | 434 | struct madvise_args *uap; |
619edcce KM |
435 | int *retval; |
436 | { | |
437 | ||
438 | /* Not yet implemented */ | |
439 | return (EOPNOTSUPP); | |
440 | } | |
441 | ||
dd89ed8a CT |
442 | struct mincore_args { |
443 | caddr_t addr; | |
444 | int len; | |
445 | char *vec; | |
446 | }; | |
619edcce | 447 | /* ARGSUSED */ |
3266719e | 448 | int |
619edcce KM |
449 | mincore(p, uap, retval) |
450 | struct proc *p; | |
dd89ed8a | 451 | struct mincore_args *uap; |
619edcce KM |
452 | int *retval; |
453 | { | |
454 | ||
455 | /* Not yet implemented */ | |
456 | return (EOPNOTSUPP); | |
457 | } | |
458 | ||
2a32d5bc MH |
459 | struct mlock_args { |
460 | caddr_t addr; | |
d4f27e41 | 461 | size_t len; |
2a32d5bc MH |
462 | }; |
463 | int | |
464 | mlock(p, uap, retval) | |
465 | struct proc *p; | |
466 | struct mlock_args *uap; | |
467 | int *retval; | |
468 | { | |
469 | vm_offset_t addr; | |
470 | vm_size_t size; | |
471 | int error; | |
472 | extern int vm_page_max_wired; | |
473 | ||
474 | #ifdef DEBUG | |
475 | if (mmapdebug & MDB_FOLLOW) | |
476 | printf("mlock(%d): addr %x len %x\n", | |
477 | p->p_pid, uap->addr, uap->len); | |
478 | #endif | |
479 | addr = (vm_offset_t)uap->addr; | |
480 | if ((addr & PAGE_MASK) || uap->len < 0) | |
481 | return (EINVAL); | |
482 | size = round_page((vm_size_t)uap->len); | |
483 | if (atop(size) + cnt.v_wire_count > vm_page_max_wired) | |
28ed9ed6 | 484 | return (EAGAIN); |
2a32d5bc MH |
485 | #ifdef pmap_wired_count |
486 | if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > | |
487 | p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) | |
28ed9ed6 | 488 | return (EAGAIN); |
2a32d5bc MH |
489 | #else |
490 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
491 | return (error); | |
492 | #endif | |
493 | ||
494 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE); | |
495 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
496 | } | |
497 | ||
498 | struct munlock_args { | |
499 | caddr_t addr; | |
d4f27e41 | 500 | size_t len; |
2a32d5bc MH |
501 | }; |
502 | int | |
503 | munlock(p, uap, retval) | |
504 | struct proc *p; | |
505 | struct munlock_args *uap; | |
506 | int *retval; | |
507 | { | |
508 | vm_offset_t addr; | |
509 | vm_size_t size; | |
510 | int error; | |
511 | ||
512 | #ifdef DEBUG | |
513 | if (mmapdebug & MDB_FOLLOW) | |
514 | printf("munlock(%d): addr %x len %x\n", | |
515 | p->p_pid, uap->addr, uap->len); | |
516 | #endif | |
517 | addr = (vm_offset_t)uap->addr; | |
518 | if ((addr & PAGE_MASK) || uap->len < 0) | |
519 | return (EINVAL); | |
520 | #ifndef pmap_wired_count | |
521 | if (error = suser(p->p_ucred, &p->p_acflag)) | |
522 | return (error); | |
523 | #endif | |
524 | size = round_page((vm_size_t)uap->len); | |
525 | ||
526 | error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE); | |
527 | return (error == KERN_SUCCESS ? 0 : ENOMEM); | |
528 | } | |
529 | ||
619edcce KM |
530 | /* |
531 | * Internal version of mmap. | |
532 | * Currently used by mmap, exec, and sys5 shared memory. | |
eaf887ea | 533 | * Handle is either a vnode pointer or NULL for MAP_ANON. |
619edcce | 534 | */ |
3266719e | 535 | int |
f06c50cb | 536 | vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) |
619edcce KM |
537 | register vm_map_t map; |
538 | register vm_offset_t *addr; | |
539 | register vm_size_t size; | |
f06c50cb | 540 | vm_prot_t prot, maxprot; |
619edcce KM |
541 | register int flags; |
542 | caddr_t handle; /* XXX should be vp */ | |
543 | vm_offset_t foff; | |
544 | { | |
545 | register vm_pager_t pager; | |
546 | boolean_t fitit; | |
547 | vm_object_t object; | |
548 | struct vnode *vp; | |
549 | int type; | |
550 | int rv = KERN_SUCCESS; | |
551 | ||
552 | if (size == 0) | |
553 | return (0); | |
554 | ||
555 | if ((flags & MAP_FIXED) == 0) { | |
556 | fitit = TRUE; | |
557 | *addr = round_page(*addr); | |
558 | } else { | |
559 | fitit = FALSE; | |
dd89ed8a | 560 | (void)vm_deallocate(map, *addr, size); |
619edcce KM |
561 | } |
562 | ||
563 | /* | |
564 | * Lookup/allocate pager. All except an unnamed anonymous lookup | |
565 | * gain a reference to ensure continued existance of the object. | |
566 | * (XXX the exception is to appease the pageout daemon) | |
567 | */ | |
eaf887ea | 568 | if (flags & MAP_ANON) |
619edcce KM |
569 | type = PG_DFLT; |
570 | else { | |
571 | vp = (struct vnode *)handle; | |
572 | if (vp->v_type == VCHR) { | |
573 | type = PG_DEVICE; | |
574 | handle = (caddr_t)vp->v_rdev; | |
575 | } else | |
576 | type = PG_VNODE; | |
577 | } | |
578 | pager = vm_pager_allocate(type, handle, size, prot); | |
451a445a | 579 | if (pager == NULL) |
619edcce KM |
580 | return (type == PG_DEVICE ? EINVAL : ENOMEM); |
581 | /* | |
582 | * Find object and release extra reference gained by lookup | |
583 | */ | |
584 | object = vm_object_lookup(pager); | |
585 | vm_object_deallocate(object); | |
586 | ||
587 | /* | |
588 | * Anonymous memory. | |
589 | */ | |
eaf887ea | 590 | if (flags & MAP_ANON) { |
619edcce | 591 | rv = vm_allocate_with_pager(map, addr, size, fitit, |
c0b1c663 | 592 | pager, foff, TRUE); |
619edcce KM |
593 | if (rv != KERN_SUCCESS) { |
594 | if (handle == NULL) | |
595 | vm_pager_deallocate(pager); | |
596 | else | |
597 | vm_object_deallocate(object); | |
598 | goto out; | |
599 | } | |
600 | /* | |
601 | * Don't cache anonymous objects. | |
602 | * Loses the reference gained by vm_pager_allocate. | |
f06c50cb MH |
603 | * Note that object will be NULL when handle == NULL, |
604 | * this is ok since vm_allocate_with_pager has made | |
605 | * sure that these objects are uncached. | |
619edcce KM |
606 | */ |
607 | (void) pager_cache(object, FALSE); | |
608 | #ifdef DEBUG | |
609 | if (mmapdebug & MDB_MAPIT) | |
610 | printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", | |
451a445a | 611 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
612 | #endif |
613 | } | |
614 | /* | |
eaf887ea | 615 | * Must be a mapped file. |
619edcce KM |
616 | * Distinguish between character special and regular files. |
617 | */ | |
618 | else if (vp->v_type == VCHR) { | |
619 | rv = vm_allocate_with_pager(map, addr, size, fitit, | |
c0b1c663 | 620 | pager, foff, FALSE); |
619edcce KM |
621 | /* |
622 | * Uncache the object and lose the reference gained | |
623 | * by vm_pager_allocate(). If the call to | |
624 | * vm_allocate_with_pager() was sucessful, then we | |
625 | * gained an additional reference ensuring the object | |
626 | * will continue to exist. If the call failed then | |
627 | * the deallocate call below will terminate the | |
628 | * object which is fine. | |
629 | */ | |
630 | (void) pager_cache(object, FALSE); | |
631 | if (rv != KERN_SUCCESS) | |
632 | goto out; | |
633 | } | |
634 | /* | |
635 | * A regular file | |
636 | */ | |
637 | else { | |
638 | #ifdef DEBUG | |
451a445a | 639 | if (object == NULL) |
619edcce KM |
640 | printf("vm_mmap: no object: vp %x, pager %x\n", |
641 | vp, pager); | |
642 | #endif | |
643 | /* | |
644 | * Map it directly. | |
645 | * Allows modifications to go out to the vnode. | |
646 | */ | |
647 | if (flags & MAP_SHARED) { | |
648 | rv = vm_allocate_with_pager(map, addr, size, | |
649 | fitit, pager, | |
c0b1c663 | 650 | foff, FALSE); |
619edcce KM |
651 | if (rv != KERN_SUCCESS) { |
652 | vm_object_deallocate(object); | |
653 | goto out; | |
654 | } | |
655 | /* | |
656 | * Don't cache the object. This is the easiest way | |
657 | * of ensuring that data gets back to the filesystem | |
658 | * because vnode_pager_deallocate() will fsync the | |
659 | * vnode. pager_cache() will lose the extra ref. | |
660 | */ | |
661 | if (prot & VM_PROT_WRITE) | |
662 | pager_cache(object, FALSE); | |
663 | else | |
664 | vm_object_deallocate(object); | |
665 | } | |
666 | /* | |
667 | * Copy-on-write of file. Two flavors. | |
668 | * MAP_COPY is true COW, you essentially get a snapshot of | |
669 | * the region at the time of mapping. MAP_PRIVATE means only | |
670 | * that your changes are not reflected back to the object. | |
671 | * Changes made by others will be seen. | |
672 | */ | |
673 | else { | |
674 | vm_map_t tmap; | |
675 | vm_offset_t off; | |
676 | ||
677 | /* locate and allocate the target address space */ | |
451a445a | 678 | rv = vm_map_find(map, NULL, (vm_offset_t)0, |
619edcce KM |
679 | addr, size, fitit); |
680 | if (rv != KERN_SUCCESS) { | |
681 | vm_object_deallocate(object); | |
682 | goto out; | |
683 | } | |
684 | tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, | |
685 | VM_MIN_ADDRESS+size, TRUE); | |
686 | off = VM_MIN_ADDRESS; | |
687 | rv = vm_allocate_with_pager(tmap, &off, size, | |
688 | TRUE, pager, | |
c0b1c663 | 689 | foff, FALSE); |
619edcce KM |
690 | if (rv != KERN_SUCCESS) { |
691 | vm_object_deallocate(object); | |
692 | vm_map_deallocate(tmap); | |
693 | goto out; | |
694 | } | |
695 | /* | |
696 | * (XXX) | |
697 | * MAP_PRIVATE implies that we see changes made by | |
698 | * others. To ensure that we need to guarentee that | |
699 | * no copy object is created (otherwise original | |
700 | * pages would be pushed to the copy object and we | |
701 | * would never see changes made by others). We | |
702 | * totally sleeze it right now by marking the object | |
703 | * internal temporarily. | |
704 | */ | |
705 | if ((flags & MAP_COPY) == 0) | |
208697bf | 706 | object->flags |= OBJ_INTERNAL; |
619edcce KM |
707 | rv = vm_map_copy(map, tmap, *addr, size, off, |
708 | FALSE, FALSE); | |
208697bf | 709 | object->flags &= ~OBJ_INTERNAL; |
619edcce KM |
710 | /* |
711 | * (XXX) | |
712 | * My oh my, this only gets worse... | |
713 | * Force creation of a shadow object so that | |
714 | * vm_map_fork will do the right thing. | |
715 | */ | |
716 | if ((flags & MAP_COPY) == 0) { | |
717 | vm_map_t tmap; | |
718 | vm_map_entry_t tentry; | |
719 | vm_object_t tobject; | |
720 | vm_offset_t toffset; | |
721 | vm_prot_t tprot; | |
722 | boolean_t twired, tsu; | |
723 | ||
724 | tmap = map; | |
725 | vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, | |
726 | &tentry, &tobject, &toffset, | |
727 | &tprot, &twired, &tsu); | |
728 | vm_map_lookup_done(tmap, tentry); | |
729 | } | |
730 | /* | |
731 | * (XXX) | |
732 | * Map copy code cannot detect sharing unless a | |
733 | * sharing map is involved. So we cheat and write | |
64691901 | 734 | * protect everything ourselves. |
619edcce | 735 | */ |
c0b1c663 | 736 | vm_object_pmap_copy(object, foff, foff + size); |
619edcce KM |
737 | vm_object_deallocate(object); |
738 | vm_map_deallocate(tmap); | |
739 | if (rv != KERN_SUCCESS) | |
740 | goto out; | |
741 | } | |
742 | #ifdef DEBUG | |
743 | if (mmapdebug & MDB_MAPIT) | |
744 | printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", | |
451a445a | 745 | curproc->p_pid, *addr, size, pager); |
619edcce KM |
746 | #endif |
747 | } | |
748 | /* | |
749 | * Correct protection (default is VM_PROT_ALL). | |
f06c50cb | 750 | * If maxprot is different than prot, we must set both explicitly. |
619edcce | 751 | */ |
f06c50cb MH |
752 | rv = KERN_SUCCESS; |
753 | if (maxprot != VM_PROT_ALL) | |
754 | rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); | |
755 | if (rv == KERN_SUCCESS && prot != maxprot) | |
619edcce | 756 | rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); |
f06c50cb MH |
757 | if (rv != KERN_SUCCESS) { |
758 | (void) vm_deallocate(map, *addr, size); | |
759 | goto out; | |
619edcce KM |
760 | } |
761 | /* | |
762 | * Shared memory is also shared with children. | |
763 | */ | |
764 | if (flags & MAP_SHARED) { | |
765 | rv = vm_inherit(map, *addr, size, VM_INHERIT_SHARE); | |
766 | if (rv != KERN_SUCCESS) { | |
767 | (void) vm_deallocate(map, *addr, size); | |
768 | goto out; | |
769 | } | |
770 | } | |
771 | out: | |
772 | #ifdef DEBUG | |
773 | if (mmapdebug & MDB_MAPIT) | |
774 | printf("vm_mmap: rv %d\n", rv); | |
775 | #endif | |
776 | switch (rv) { | |
777 | case KERN_SUCCESS: | |
778 | return (0); | |
779 | case KERN_INVALID_ADDRESS: | |
780 | case KERN_NO_SPACE: | |
781 | return (ENOMEM); | |
782 | case KERN_PROTECTION_FAILURE: | |
783 | return (EACCES); | |
784 | default: | |
785 | return (EINVAL); | |
786 | } | |
787 | } | |
788 | ||
789 | /* | |
790 | * Internal bastardized version of MACHs vm_region system call. | |
791 | * Given address and size it returns map attributes as well | |
792 | * as the (locked) object mapped at that location. | |
793 | */ | |
3266719e | 794 | int |
619edcce KM |
795 | vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff) |
796 | vm_map_t map; | |
797 | vm_offset_t *addr; /* IN/OUT */ | |
798 | vm_size_t *size; /* OUT */ | |
799 | vm_prot_t *prot; /* OUT */ | |
800 | vm_prot_t *max_prot; /* OUT */ | |
801 | vm_inherit_t *inheritance; /* OUT */ | |
802 | boolean_t *shared; /* OUT */ | |
803 | vm_object_t *object; /* OUT */ | |
804 | vm_offset_t *objoff; /* OUT */ | |
805 | { | |
806 | vm_map_entry_t tmp_entry; | |
807 | register | |
808 | vm_map_entry_t entry; | |
809 | register | |
810 | vm_offset_t tmp_offset; | |
811 | vm_offset_t start; | |
812 | ||
451a445a | 813 | if (map == NULL) |
619edcce KM |
814 | return(KERN_INVALID_ARGUMENT); |
815 | ||
816 | start = *addr; | |
817 | ||
818 | vm_map_lock_read(map); | |
819 | if (!vm_map_lookup_entry(map, start, &tmp_entry)) { | |
820 | if ((entry = tmp_entry->next) == &map->header) { | |
821 | vm_map_unlock_read(map); | |
822 | return(KERN_NO_SPACE); | |
823 | } | |
824 | start = entry->start; | |
825 | *addr = start; | |
826 | } else | |
827 | entry = tmp_entry; | |
828 | ||
829 | *prot = entry->protection; | |
830 | *max_prot = entry->max_protection; | |
831 | *inheritance = entry->inheritance; | |
832 | ||
833 | tmp_offset = entry->offset + (start - entry->start); | |
834 | *size = (entry->end - start); | |
835 | ||
836 | if (entry->is_a_map) { | |
837 | register vm_map_t share_map; | |
838 | vm_size_t share_size; | |
839 | ||
840 | share_map = entry->object.share_map; | |
841 | ||
842 | vm_map_lock_read(share_map); | |
843 | (void) vm_map_lookup_entry(share_map, tmp_offset, &tmp_entry); | |
844 | ||
845 | if ((share_size = (tmp_entry->end - tmp_offset)) < *size) | |
846 | *size = share_size; | |
847 | ||
848 | vm_object_lock(tmp_entry->object); | |
849 | *object = tmp_entry->object.vm_object; | |
850 | *objoff = tmp_entry->offset + (tmp_offset - tmp_entry->start); | |
851 | ||
852 | *shared = (share_map->ref_count != 1); | |
853 | vm_map_unlock_read(share_map); | |
854 | } else { | |
855 | vm_object_lock(entry->object); | |
856 | *object = entry->object.vm_object; | |
857 | *objoff = tmp_offset; | |
858 | ||
859 | *shared = FALSE; | |
860 | } | |
861 | ||
862 | vm_map_unlock_read(map); | |
863 | ||
864 | return(KERN_SUCCESS); | |
865 | } | |
866 | ||
867 | /* | |
868 | * Yet another bastard routine. | |
869 | */ | |
3266719e | 870 | int |
619edcce KM |
871 | vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal) |
872 | register vm_map_t map; | |
873 | register vm_offset_t *addr; | |
874 | register vm_size_t size; | |
875 | boolean_t fitit; | |
876 | vm_pager_t pager; | |
877 | vm_offset_t poffset; | |
878 | boolean_t internal; | |
879 | { | |
880 | register vm_object_t object; | |
881 | register int result; | |
882 | ||
451a445a | 883 | if (map == NULL) |
619edcce KM |
884 | return(KERN_INVALID_ARGUMENT); |
885 | ||
886 | *addr = trunc_page(*addr); | |
887 | size = round_page(size); | |
888 | ||
889 | /* | |
890 | * Lookup the pager/paging-space in the object cache. | |
891 | * If it's not there, then create a new object and cache | |
892 | * it. | |
893 | */ | |
894 | object = vm_object_lookup(pager); | |
208697bf | 895 | cnt.v_lookups++; |
451a445a | 896 | if (object == NULL) { |
619edcce | 897 | object = vm_object_allocate(size); |
67f54264 KM |
898 | /* |
899 | * From Mike Hibler: "unnamed anonymous objects should never | |
900 | * be on the hash list ... For now you can just change | |
901 | * vm_allocate_with_pager to not do vm_object_enter if this | |
902 | * is an internal object ..." | |
903 | */ | |
904 | if (!internal) | |
905 | vm_object_enter(object, pager); | |
619edcce | 906 | } else |
208697bf KM |
907 | cnt.v_hits++; |
908 | if (internal) | |
909 | object->flags |= OBJ_INTERNAL; | |
910 | else | |
911 | object->flags &= ~OBJ_INTERNAL; | |
619edcce KM |
912 | |
913 | result = vm_map_find(map, object, poffset, addr, size, fitit); | |
914 | if (result != KERN_SUCCESS) | |
915 | vm_object_deallocate(object); | |
451a445a | 916 | else if (pager != NULL) |
619edcce KM |
917 | vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE); |
918 | return(result); | |
919 | } | |
920 | ||
921 | /* | |
922 | * XXX: this routine belongs in vm_map.c. | |
923 | * | |
924 | * Returns TRUE if the range [start - end) is allocated in either | |
925 | * a single entry (single_entry == TRUE) or multiple contiguous | |
926 | * entries (single_entry == FALSE). | |
927 | * | |
928 | * start and end should be page aligned. | |
929 | */ | |
930 | boolean_t | |
931 | vm_map_is_allocated(map, start, end, single_entry) | |
932 | vm_map_t map; | |
933 | vm_offset_t start, end; | |
934 | boolean_t single_entry; | |
935 | { | |
936 | vm_map_entry_t mapent; | |
937 | register vm_offset_t nend; | |
938 | ||
939 | vm_map_lock_read(map); | |
940 | ||
941 | /* | |
942 | * Start address not in any entry | |
943 | */ | |
944 | if (!vm_map_lookup_entry(map, start, &mapent)) { | |
945 | vm_map_unlock_read(map); | |
946 | return (FALSE); | |
947 | } | |
948 | /* | |
949 | * Find the maximum stretch of contiguously allocated space | |
950 | */ | |
951 | nend = mapent->end; | |
952 | if (!single_entry) { | |
953 | mapent = mapent->next; | |
954 | while (mapent != &map->header && mapent->start == nend) { | |
955 | nend = mapent->end; | |
956 | mapent = mapent->next; | |
957 | } | |
958 | } | |
959 | ||
960 | vm_map_unlock_read(map); | |
961 | return (end <= nend); | |
962 | } |