new include format
[unix-history] / usr / src / sys / vm / vm_map.c
CommitLineData
175f072e 1/*
175f072e
KM
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
0e24ad83 8 * %sccs.include.redist.c%
175f072e 9 *
fc8007a4 10 * @(#)vm_map.c 7.5 (Berkeley) %G%
0e24ad83
KM
11 *
12 *
13 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14 * All rights reserved.
15 *
16 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
17 *
18 * Permission to use, copy, modify and distribute this software and
19 * its documentation is hereby granted, provided that both the copyright
20 * notice and this permission notice appear in all copies of the
21 * software, derivative works or modified versions, and any portions
22 * thereof, and that both notices appear in supporting documentation.
23 *
24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
26 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 *
28 * Carnegie Mellon requests users of this software to return to
29 *
30 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
31 * School of Computer Science
32 * Carnegie Mellon University
33 * Pittsburgh PA 15213-3890
34 *
35 * any improvements or extensions that they make and grant Carnegie the
36 * rights to redistribute these changes.
175f072e
KM
37 */
38
39/*
40 * Virtual memory mapping module.
41 */
42
5d7b9ad3 43#include "param.h"
175f072e 44#include "malloc.h"
5d7b9ad3
MK
45#include "vm.h"
46#include "vm_page.h"
47#include "vm_object.h"
175f072e
KM
48
49/*
50 * Virtual memory maps provide for the mapping, protection,
51 * and sharing of virtual memory objects. In addition,
52 * this module provides for an efficient virtual copy of
53 * memory from one map to another.
54 *
55 * Synchronization is required prior to most operations.
56 *
57 * Maps consist of an ordered doubly-linked list of simple
58 * entries; a single hint is used to speed up lookups.
59 *
60 * In order to properly represent the sharing of virtual
61 * memory regions among maps, the map structure is bi-level.
62 * Top-level ("address") maps refer to regions of sharable
63 * virtual memory. These regions are implemented as
64 * ("sharing") maps, which then refer to the actual virtual
65 * memory objects. When two address maps "share" memory,
66 * their top-level maps both have references to the same
67 * sharing map. When memory is virtual-copied from one
68 * address map to another, the references in the sharing
69 * maps are actually copied -- no copying occurs at the
70 * virtual memory object level.
71 *
72 * Since portions of maps are specified by start/end addreses,
73 * which may not align with existing map entries, all
74 * routines merely "clip" entries to these start/end values.
75 * [That is, an entry is split into two, bordering at a
76 * start or end value.] Note that these clippings may not
77 * always be necessary (as the two resulting entries are then
78 * not changed); however, the clipping is done for convenience.
79 * No attempt is currently made to "glue back together" two
80 * abutting entries.
81 *
82 * As mentioned above, virtual copy operations are performed
83 * by copying VM object references from one sharing map to
84 * another, and then marking both regions as copy-on-write.
85 * It is important to note that only one writeable reference
86 * to a VM object region exists in any map -- this means that
87 * shadow object creation can be delayed until a write operation
88 * occurs.
89 */
90
91/*
5d7b9ad3 92 * vm_map_startup:
175f072e
KM
93 *
94 * Initialize the vm_map module. Must be called before
95 * any other vm_map routines.
96 *
97 * Map and entry structures are allocated from the general
98 * purpose memory pool with some exceptions:
99 *
100 * - The kernel map and kmem submap are allocated statically.
101 * - Kernel map entries are allocated out of a static pool.
102 *
103 * These restrictions are necessary since malloc() uses the
104 * maps and requires map entries.
105 */
106
107vm_offset_t kentry_data;
108vm_size_t kentry_data_size;
109vm_map_entry_t kentry_free;
110vm_map_t kmap_free;
111
5d7b9ad3 112void vm_map_startup()
175f072e
KM
113{
114 register int i;
115 register vm_map_entry_t mep;
116 vm_map_t mp;
117
118 /*
119 * Static map structures for allocation before initialization of
120 * kernel map or kmem map. vm_map_create knows how to deal with them.
121 */
122 kmap_free = mp = (vm_map_t) kentry_data;
123 i = MAX_KMAP;
124 while (--i > 0) {
125 mp->header.next = (vm_map_entry_t) (mp + 1);
126 mp++;
127 }
5d7b9ad3 128 mp++->header.next = NULL;
175f072e
KM
129
130 /*
131 * Form a free list of statically allocated kernel map entries
132 * with the rest.
133 */
134 kentry_free = mep = (vm_map_entry_t) mp;
135 i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
136 while (--i > 0) {
137 mep->next = mep + 1;
138 mep++;
139 }
5d7b9ad3
MK
140 mep->next = NULL;
141}
142
143/*
144 * Allocate a vmspace structure, including a vm_map and pmap,
145 * and initialize those structures. The refcnt is set to 1.
146 * The remaining fields must be initialized by the caller.
147 */
148struct vmspace *
149vmspace_alloc(min, max, pageable)
150 vm_offset_t min, max;
151 int pageable;
152{
153 register struct vmspace *vm;
154
155 MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
156 bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
157 vm_map_init(&vm->vm_map, min, max, pageable);
158 pmap_pinit(&vm->vm_pmap);
159 vm->vm_map.pmap = &vm->vm_pmap; /* XXX */
160 vm->vm_refcnt = 1;
161 return (vm);
162}
163
164void
165vmspace_free(vm)
166 register struct vmspace *vm;
167{
168
169 if (--vm->vm_refcnt == 0) {
170 /*
171 * Lock the map, to wait out all other references to it.
172 * Delete all of the mappings and pages they hold,
173 * then call the pmap module to reclaim anything left.
174 */
175 vm_map_lock(&vm->vm_map);
176 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
177 vm->vm_map.max_offset);
178 pmap_release(&vm->vm_pmap);
179 FREE(vm, M_VMMAP);
180 }
175f072e
KM
181}
182
183/*
184 * vm_map_create:
185 *
186 * Creates and returns a new empty VM map with
187 * the given physical map structure, and having
188 * the given lower and upper address bounds.
189 */
190vm_map_t vm_map_create(pmap, min, max, pageable)
191 pmap_t pmap;
192 vm_offset_t min, max;
193 boolean_t pageable;
194{
195 register vm_map_t result;
196 extern vm_map_t kernel_map, kmem_map;
197
5d7b9ad3 198 if (kmem_map == NULL) {
175f072e
KM
199 result = kmap_free;
200 kmap_free = (vm_map_t) result->header.next;
5d7b9ad3
MK
201 if (result == NULL)
202 panic("vm_map_create: out of maps");
175f072e
KM
203 } else
204 MALLOC(result, vm_map_t, sizeof(struct vm_map),
205 M_VMMAP, M_WAITOK);
206
5d7b9ad3 207 vm_map_init(result, min, max, pageable);
175f072e 208 result->pmap = pmap;
175f072e
KM
209 return(result);
210}
211
5d7b9ad3
MK
212/*
213 * Initialize an existing vm_map structure
214 * such as that in the vmspace structure.
215 * The pmap is set elsewhere.
216 */
217void
218vm_map_init(map, min, max, pageable)
219 register struct vm_map *map;
220 vm_offset_t min, max;
221 boolean_t pageable;
222{
223 map->header.next = map->header.prev = &map->header;
224 map->nentries = 0;
225 map->size = 0;
226 map->ref_count = 1;
227 map->is_main_map = TRUE;
228 map->min_offset = min;
229 map->max_offset = max;
230 map->entries_pageable = pageable;
231 map->first_free = &map->header;
232 map->hint = &map->header;
233 map->timestamp = 0;
234 lock_init(&map->lock, TRUE);
235 simple_lock_init(&map->ref_lock);
236 simple_lock_init(&map->hint_lock);
237}
238
175f072e
KM
239/*
240 * vm_map_entry_create: [ internal use only ]
241 *
242 * Allocates a VM map entry for insertion.
243 * No entry fields are filled in. This routine is
244 */
245vm_map_entry_t vm_map_entry_create(map)
246 vm_map_t map;
247{
248 vm_map_entry_t entry;
249 extern vm_map_t kernel_map, kmem_map, mb_map;
250
251 if (map == kernel_map || map == kmem_map || map == mb_map) {
252 if (entry = kentry_free)
253 kentry_free = kentry_free->next;
254 } else
255 MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
256 M_VMMAPENT, M_WAITOK);
5d7b9ad3 257 if (entry == NULL)
175f072e
KM
258 panic("vm_map_entry_create: out of map entries");
259
260 return(entry);
261}
262
263/*
264 * vm_map_entry_dispose: [ internal use only ]
265 *
266 * Inverse of vm_map_entry_create.
267 */
268void vm_map_entry_dispose(map, entry)
269 vm_map_t map;
270 vm_map_entry_t entry;
271{
272 extern vm_map_t kernel_map, kmem_map, mb_map;
273
274 if (map == kernel_map || map == kmem_map || map == mb_map) {
275 entry->next = kentry_free;
276 kentry_free = entry;
277 } else
278 FREE(entry, M_VMMAPENT);
279}
280
281/*
282 * vm_map_entry_{un,}link:
283 *
284 * Insert/remove entries from maps.
285 */
286#define vm_map_entry_link(map, after_where, entry) \
287 { \
288 (map)->nentries++; \
289 (entry)->prev = (after_where); \
290 (entry)->next = (after_where)->next; \
291 (entry)->prev->next = (entry); \
292 (entry)->next->prev = (entry); \
293 }
294#define vm_map_entry_unlink(map, entry) \
295 { \
296 (map)->nentries--; \
297 (entry)->next->prev = (entry)->prev; \
298 (entry)->prev->next = (entry)->next; \
299 }
300
301/*
302 * vm_map_reference:
303 *
304 * Creates another valid reference to the given map.
305 *
306 */
307void vm_map_reference(map)
308 register vm_map_t map;
309{
5d7b9ad3 310 if (map == NULL)
175f072e
KM
311 return;
312
313 simple_lock(&map->ref_lock);
314 map->ref_count++;
315 simple_unlock(&map->ref_lock);
316}
317
318/*
319 * vm_map_deallocate:
320 *
321 * Removes a reference from the specified map,
322 * destroying it if no references remain.
323 * The map should not be locked.
324 */
325void vm_map_deallocate(map)
326 register vm_map_t map;
327{
328 register int c;
329
5d7b9ad3 330 if (map == NULL)
175f072e
KM
331 return;
332
333 simple_lock(&map->ref_lock);
334 c = --map->ref_count;
335 simple_unlock(&map->ref_lock);
336
337 if (c > 0) {
338 return;
339 }
340
341 /*
342 * Lock the map, to wait out all other references
343 * to it.
344 */
345
346 vm_map_lock(map);
347
348 (void) vm_map_delete(map, map->min_offset, map->max_offset);
349
350 pmap_destroy(map->pmap);
351
352 FREE(map, M_VMMAP);
353}
354
355/*
356 * vm_map_insert: [ internal use only ]
357 *
358 * Inserts the given whole VM object into the target
359 * map at the specified address range. The object's
360 * size should match that of the address range.
361 *
362 * Requires that the map be locked, and leaves it so.
363 */
364vm_map_insert(map, object, offset, start, end)
365 vm_map_t map;
366 vm_object_t object;
367 vm_offset_t offset;
368 vm_offset_t start;
369 vm_offset_t end;
370{
371 register vm_map_entry_t new_entry;
372 register vm_map_entry_t prev_entry;
373 vm_map_entry_t temp_entry;
374
375 /*
376 * Check that the start and end points are not bogus.
377 */
378
379 if ((start < map->min_offset) || (end > map->max_offset) ||
380 (start >= end))
381 return(KERN_INVALID_ADDRESS);
382
383 /*
384 * Find the entry prior to the proposed
385 * starting address; if it's part of an
386 * existing entry, this range is bogus.
387 */
388
389 if (vm_map_lookup_entry(map, start, &temp_entry))
390 return(KERN_NO_SPACE);
391
392 prev_entry = temp_entry;
393
394 /*
395 * Assert that the next entry doesn't overlap the
396 * end point.
397 */
398
399 if ((prev_entry->next != &map->header) &&
400 (prev_entry->next->start < end))
401 return(KERN_NO_SPACE);
402
403 /*
404 * See if we can avoid creating a new entry by
405 * extending one of our neighbors.
406 */
407
5d7b9ad3 408 if (object == NULL) {
175f072e
KM
409 if ((prev_entry != &map->header) &&
410 (prev_entry->end == start) &&
411 (map->is_main_map) &&
412 (prev_entry->is_a_map == FALSE) &&
413 (prev_entry->is_sub_map == FALSE) &&
414 (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
415 (prev_entry->protection == VM_PROT_DEFAULT) &&
416 (prev_entry->max_protection == VM_PROT_DEFAULT) &&
417 (prev_entry->wired_count == 0)) {
418
419 if (vm_object_coalesce(prev_entry->object.vm_object,
5d7b9ad3 420 NULL,
175f072e
KM
421 prev_entry->offset,
422 (vm_offset_t) 0,
423 (vm_size_t)(prev_entry->end
424 - prev_entry->start),
425 (vm_size_t)(end - prev_entry->end))) {
426 /*
427 * Coalesced the two objects - can extend
428 * the previous map entry to include the
429 * new range.
430 */
431 map->size += (end - prev_entry->end);
432 prev_entry->end = end;
433 return(KERN_SUCCESS);
434 }
435 }
436 }
437
438 /*
439 * Create a new entry
440 */
441
442 new_entry = vm_map_entry_create(map);
443 new_entry->start = start;
444 new_entry->end = end;
445
446 new_entry->is_a_map = FALSE;
447 new_entry->is_sub_map = FALSE;
448 new_entry->object.vm_object = object;
449 new_entry->offset = offset;
450
451 new_entry->copy_on_write = FALSE;
452 new_entry->needs_copy = FALSE;
453
454 if (map->is_main_map) {
455 new_entry->inheritance = VM_INHERIT_DEFAULT;
456 new_entry->protection = VM_PROT_DEFAULT;
457 new_entry->max_protection = VM_PROT_DEFAULT;
458 new_entry->wired_count = 0;
459 }
460
461 /*
462 * Insert the new entry into the list
463 */
464
465 vm_map_entry_link(map, prev_entry, new_entry);
466 map->size += new_entry->end - new_entry->start;
467
468 /*
469 * Update the free space hint
470 */
471
472 if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
473 map->first_free = new_entry;
474
475 return(KERN_SUCCESS);
476}
477
478/*
479 * SAVE_HINT:
480 *
481 * Saves the specified entry as the hint for
482 * future lookups. Performs necessary interlocks.
483 */
484#define SAVE_HINT(map,value) \
485 simple_lock(&(map)->hint_lock); \
486 (map)->hint = (value); \
487 simple_unlock(&(map)->hint_lock);
488
489/*
490 * vm_map_lookup_entry: [ internal use only ]
491 *
492 * Finds the map entry containing (or
493 * immediately preceding) the specified address
494 * in the given map; the entry is returned
495 * in the "entry" parameter. The boolean
496 * result indicates whether the address is
497 * actually contained in the map.
498 */
499boolean_t vm_map_lookup_entry(map, address, entry)
500 register vm_map_t map;
501 register vm_offset_t address;
502 vm_map_entry_t *entry; /* OUT */
503{
504 register vm_map_entry_t cur;
505 register vm_map_entry_t last;
506
507 /*
508 * Start looking either from the head of the
509 * list, or from the hint.
510 */
511
512 simple_lock(&map->hint_lock);
513 cur = map->hint;
514 simple_unlock(&map->hint_lock);
515
516 if (cur == &map->header)
517 cur = cur->next;
518
519 if (address >= cur->start) {
520 /*
521 * Go from hint to end of list.
522 *
523 * But first, make a quick check to see if
524 * we are already looking at the entry we
525 * want (which is usually the case).
526 * Note also that we don't need to save the hint
527 * here... it is the same hint (unless we are
528 * at the header, in which case the hint didn't
529 * buy us anything anyway).
530 */
531 last = &map->header;
532 if ((cur != last) && (cur->end > address)) {
533 *entry = cur;
534 return(TRUE);
535 }
536 }
537 else {
538 /*
539 * Go from start to hint, *inclusively*
540 */
541 last = cur->next;
542 cur = map->header.next;
543 }
544
545 /*
546 * Search linearly
547 */
548
549 while (cur != last) {
550 if (cur->end > address) {
551 if (address >= cur->start) {
552 /*
553 * Save this lookup for future
554 * hints, and return
555 */
556
557 *entry = cur;
558 SAVE_HINT(map, cur);
559 return(TRUE);
560 }
561 break;
562 }
563 cur = cur->next;
564 }
565 *entry = cur->prev;
566 SAVE_HINT(map, *entry);
567 return(FALSE);
568}
569
fc8007a4
CT
570/*
571 * Find sufficient space for `length' bytes in the given map, starting at
572 * `start'. The map must be locked. Returns 0 on success, 1 on no space.
573 */
574int
575vm_map_findspace(map, start, length, addr)
576 register vm_map_t map;
577 register vm_offset_t start;
578 vm_size_t length;
579 vm_offset_t *addr;
580{
581 register vm_map_entry_t entry, next;
582 register vm_offset_t end;
583
584 if (start < map->min_offset)
585 start = map->min_offset;
586 if (start > map->max_offset)
587 return (1);
588
589 /*
590 * Look for the first possible address; if there's already
591 * something at this address, we have to start after it.
592 */
593 if (start == map->min_offset) {
594 if ((entry = map->first_free) != &map->header)
595 start = entry->end;
596 } else {
597 vm_map_entry_t tmp;
598 if (vm_map_lookup_entry(map, start, &tmp))
599 start = tmp->end;
600 entry = tmp;
601 }
602
603 /*
604 * Look through the rest of the map, trying to fit a new region in
605 * the gap between existing regions, or after the very last region.
606 */
607 for (;; start = (entry = next)->end) {
608 /*
609 * Find the end of the proposed new region. Be sure we didn't
610 * go beyond the end of the map, or wrap around the address;
611 * if so, we lose. Otherwise, if this is the last entry, or
612 * if the proposed new region fits before the next entry, we
613 * win.
614 */
615 end = start + length;
616 if (end > map->max_offset || end < start)
617 return (1);
618 next = entry->next;
619 if (next == &map->header || next->start >= end)
620 break;
621 }
622 SAVE_HINT(map, entry);
623 *addr = start;
624 return (0);
625}
626
175f072e
KM
627/*
628 * vm_map_find finds an unallocated region in the target address
629 * map with the given length. The search is defined to be
630 * first-fit from the specified address; the region found is
631 * returned in the same parameter.
632 *
633 */
634vm_map_find(map, object, offset, addr, length, find_space)
635 vm_map_t map;
636 vm_object_t object;
637 vm_offset_t offset;
638 vm_offset_t *addr; /* IN/OUT */
639 vm_size_t length;
640 boolean_t find_space;
641{
175f072e 642 register vm_offset_t start;
175f072e
KM
643 int result;
644
645 start = *addr;
175f072e 646 vm_map_lock(map);
175f072e 647 if (find_space) {
fc8007a4 648 if (vm_map_findspace(map, start, length, addr)) {
175f072e
KM
649 vm_map_unlock(map);
650 return (KERN_NO_SPACE);
651 }
fc8007a4 652 start = *addr;
175f072e 653 }
175f072e 654 result = vm_map_insert(map, object, offset, start, start + length);
175f072e 655 vm_map_unlock(map);
fc8007a4 656 return (result);
175f072e
KM
657}
658
659/*
660 * vm_map_simplify_entry: [ internal use only ]
661 *
662 * Simplify the given map entry by:
663 * removing extra sharing maps
664 * [XXX maybe later] merging with a neighbor
665 */
666void vm_map_simplify_entry(map, entry)
667 vm_map_t map;
668 vm_map_entry_t entry;
669{
670#ifdef lint
671 map++;
672#endif lint
673
674 /*
675 * If this entry corresponds to a sharing map, then
676 * see if we can remove the level of indirection.
677 * If it's not a sharing map, then it points to
678 * a VM object, so see if we can merge with either
679 * of our neighbors.
680 */
681
682 if (entry->is_sub_map)
683 return;
684 if (entry->is_a_map) {
685#if 0
686 vm_map_t my_share_map;
687 int count;
688
689 my_share_map = entry->object.share_map;
690 simple_lock(&my_share_map->ref_lock);
691 count = my_share_map->ref_count;
692 simple_unlock(&my_share_map->ref_lock);
693
694 if (count == 1) {
695 /* Can move the region from
696 * entry->start to entry->end (+ entry->offset)
697 * in my_share_map into place of entry.
698 * Later.
699 */
700 }
701#endif 0
702 }
703 else {
704 /*
705 * Try to merge with our neighbors.
706 *
707 * Conditions for merge are:
708 *
709 * 1. entries are adjacent.
710 * 2. both entries point to objects
711 * with null pagers.
712 *
713 * If a merge is possible, we replace the two
714 * entries with a single entry, then merge
715 * the two objects into a single object.
716 *
717 * Now, all that is left to do is write the
718 * code!
719 */
720 }
721}
722
723/*
724 * vm_map_clip_start: [ internal use only ]
725 *
726 * Asserts that the given entry begins at or after
727 * the specified address; if necessary,
728 * it splits the entry into two.
729 */
730#define vm_map_clip_start(map, entry, startaddr) \
731{ \
732 if (startaddr > entry->start) \
733 _vm_map_clip_start(map, entry, startaddr); \
734}
735
736/*
737 * This routine is called only when it is known that
738 * the entry must be split.
739 */
740void _vm_map_clip_start(map, entry, start)
741 register vm_map_t map;
742 register vm_map_entry_t entry;
743 register vm_offset_t start;
744{
745 register vm_map_entry_t new_entry;
746
747 /*
748 * See if we can simplify this entry first
749 */
750
751 vm_map_simplify_entry(map, entry);
752
753 /*
754 * Split off the front portion --
755 * note that we must insert the new
756 * entry BEFORE this one, so that
757 * this entry has the specified starting
758 * address.
759 */
760
761 new_entry = vm_map_entry_create(map);
762 *new_entry = *entry;
763
764 new_entry->end = start;
765 entry->offset += (start - entry->start);
766 entry->start = start;
767
768 vm_map_entry_link(map, entry->prev, new_entry);
769
770 if (entry->is_a_map || entry->is_sub_map)
771 vm_map_reference(new_entry->object.share_map);
772 else
773 vm_object_reference(new_entry->object.vm_object);
774}
775
776/*
777 * vm_map_clip_end: [ internal use only ]
778 *
779 * Asserts that the given entry ends at or before
780 * the specified address; if necessary,
781 * it splits the entry into two.
782 */
783
784void _vm_map_clip_end();
785#define vm_map_clip_end(map, entry, endaddr) \
786{ \
787 if (endaddr < entry->end) \
788 _vm_map_clip_end(map, entry, endaddr); \
789}
790
791/*
792 * This routine is called only when it is known that
793 * the entry must be split.
794 */
795void _vm_map_clip_end(map, entry, end)
796 register vm_map_t map;
797 register vm_map_entry_t entry;
798 register vm_offset_t end;
799{
800 register vm_map_entry_t new_entry;
801
802 /*
803 * Create a new entry and insert it
804 * AFTER the specified entry
805 */
806
807 new_entry = vm_map_entry_create(map);
808 *new_entry = *entry;
809
810 new_entry->start = entry->end = end;
811 new_entry->offset += (end - entry->start);
812
813 vm_map_entry_link(map, entry, new_entry);
814
815 if (entry->is_a_map || entry->is_sub_map)
816 vm_map_reference(new_entry->object.share_map);
817 else
818 vm_object_reference(new_entry->object.vm_object);
819}
820
821/*
822 * VM_MAP_RANGE_CHECK: [ internal use only ]
823 *
824 * Asserts that the starting and ending region
825 * addresses fall within the valid range of the map.
826 */
827#define VM_MAP_RANGE_CHECK(map, start, end) \
828 { \
829 if (start < vm_map_min(map)) \
830 start = vm_map_min(map); \
831 if (end > vm_map_max(map)) \
832 end = vm_map_max(map); \
833 if (start > end) \
834 start = end; \
835 }
836
837/*
838 * vm_map_submap: [ kernel use only ]
839 *
840 * Mark the given range as handled by a subordinate map.
841 *
842 * This range must have been created with vm_map_find,
843 * and no other operations may have been performed on this
844 * range prior to calling vm_map_submap.
845 *
846 * Only a limited number of operations can be performed
847 * within this rage after calling vm_map_submap:
848 * vm_fault
849 * [Don't try vm_map_copy!]
850 *
851 * To remove a submapping, one must first remove the
852 * range from the superior map, and then destroy the
853 * submap (if desired). [Better yet, don't try it.]
854 */
855vm_map_submap(map, start, end, submap)
856 register vm_map_t map;
857 register vm_offset_t start;
858 register vm_offset_t end;
859 vm_map_t submap;
860{
861 vm_map_entry_t entry;
862 register int result = KERN_INVALID_ARGUMENT;
863
864 vm_map_lock(map);
865
866 VM_MAP_RANGE_CHECK(map, start, end);
867
868 if (vm_map_lookup_entry(map, start, &entry)) {
869 vm_map_clip_start(map, entry, start);
870 }
871 else
872 entry = entry->next;
873
874 vm_map_clip_end(map, entry, end);
875
876 if ((entry->start == start) && (entry->end == end) &&
877 (!entry->is_a_map) &&
5d7b9ad3 878 (entry->object.vm_object == NULL) &&
175f072e
KM
879 (!entry->copy_on_write)) {
880 entry->is_a_map = FALSE;
881 entry->is_sub_map = TRUE;
882 vm_map_reference(entry->object.sub_map = submap);
883 result = KERN_SUCCESS;
884 }
885 vm_map_unlock(map);
886
887 return(result);
888}
889
890/*
891 * vm_map_protect:
892 *
893 * Sets the protection of the specified address
894 * region in the target map. If "set_max" is
895 * specified, the maximum protection is to be set;
896 * otherwise, only the current protection is affected.
897 */
898vm_map_protect(map, start, end, new_prot, set_max)
899 register vm_map_t map;
900 register vm_offset_t start;
901 register vm_offset_t end;
902 register vm_prot_t new_prot;
903 register boolean_t set_max;
904{
905 register vm_map_entry_t current;
906 vm_map_entry_t entry;
907
908 vm_map_lock(map);
909
910 VM_MAP_RANGE_CHECK(map, start, end);
911
912 if (vm_map_lookup_entry(map, start, &entry)) {
913 vm_map_clip_start(map, entry, start);
914 }
915 else
916 entry = entry->next;
917
918 /*
919 * Make a first pass to check for protection
920 * violations.
921 */
922
923 current = entry;
924 while ((current != &map->header) && (current->start < end)) {
925 if (current->is_sub_map)
926 return(KERN_INVALID_ARGUMENT);
927 if ((new_prot & current->max_protection) != new_prot) {
928 vm_map_unlock(map);
929 return(KERN_PROTECTION_FAILURE);
930 }
931
932 current = current->next;
933 }
934
935 /*
936 * Go back and fix up protections.
937 * [Note that clipping is not necessary the second time.]
938 */
939
940 current = entry;
941
942 while ((current != &map->header) && (current->start < end)) {
943 vm_prot_t old_prot;
944
945 vm_map_clip_end(map, current, end);
946
947 old_prot = current->protection;
948 if (set_max)
949 current->protection =
950 (current->max_protection = new_prot) &
951 old_prot;
952 else
953 current->protection = new_prot;
954
955 /*
956 * Update physical map if necessary.
957 * Worry about copy-on-write here -- CHECK THIS XXX
958 */
959
960 if (current->protection != old_prot) {
961
962#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \
963 VM_PROT_ALL)
964#define max(a,b) ((a) > (b) ? (a) : (b))
965
966 if (current->is_a_map) {
967 vm_map_entry_t share_entry;
968 vm_offset_t share_end;
969
970 vm_map_lock(current->object.share_map);
971 (void) vm_map_lookup_entry(
972 current->object.share_map,
973 current->offset,
974 &share_entry);
975 share_end = current->offset +
976 (current->end - current->start);
977 while ((share_entry !=
978 &current->object.share_map->header) &&
979 (share_entry->start < share_end)) {
980
981 pmap_protect(map->pmap,
982 (max(share_entry->start,
983 current->offset) -
984 current->offset +
985 current->start),
986 min(share_entry->end,
987 share_end) -
988 current->offset +
989 current->start,
990 current->protection &
991 MASK(share_entry));
992
993 share_entry = share_entry->next;
994 }
995 vm_map_unlock(current->object.share_map);
996 }
997 else
998 pmap_protect(map->pmap, current->start,
999 current->end,
1000 current->protection & MASK(entry));
1001#undef max
1002#undef MASK
1003 }
1004 current = current->next;
1005 }
1006
1007 vm_map_unlock(map);
1008 return(KERN_SUCCESS);
1009}
1010
1011/*
1012 * vm_map_inherit:
1013 *
1014 * Sets the inheritance of the specified address
1015 * range in the target map. Inheritance
1016 * affects how the map will be shared with
1017 * child maps at the time of vm_map_fork.
1018 */
1019vm_map_inherit(map, start, end, new_inheritance)
1020 register vm_map_t map;
1021 register vm_offset_t start;
1022 register vm_offset_t end;
1023 register vm_inherit_t new_inheritance;
1024{
1025 register vm_map_entry_t entry;
1026 vm_map_entry_t temp_entry;
1027
1028 switch (new_inheritance) {
1029 case VM_INHERIT_NONE:
1030 case VM_INHERIT_COPY:
1031 case VM_INHERIT_SHARE:
1032 break;
1033 default:
1034 return(KERN_INVALID_ARGUMENT);
1035 }
1036
1037 vm_map_lock(map);
1038
1039 VM_MAP_RANGE_CHECK(map, start, end);
1040
1041 if (vm_map_lookup_entry(map, start, &temp_entry)) {
1042 entry = temp_entry;
1043 vm_map_clip_start(map, entry, start);
1044 }
1045 else
1046 entry = temp_entry->next;
1047
1048 while ((entry != &map->header) && (entry->start < end)) {
1049 vm_map_clip_end(map, entry, end);
1050
1051 entry->inheritance = new_inheritance;
1052
1053 entry = entry->next;
1054 }
1055
1056 vm_map_unlock(map);
1057 return(KERN_SUCCESS);
1058}
1059
1060/*
1061 * vm_map_pageable:
1062 *
1063 * Sets the pageability of the specified address
1064 * range in the target map. Regions specified
1065 * as not pageable require locked-down physical
1066 * memory and physical page maps.
1067 *
1068 * The map must not be locked, but a reference
1069 * must remain to the map throughout the call.
1070 */
1071vm_map_pageable(map, start, end, new_pageable)
1072 register vm_map_t map;
1073 register vm_offset_t start;
1074 register vm_offset_t end;
1075 register boolean_t new_pageable;
1076{
1077 register vm_map_entry_t entry;
1078 vm_map_entry_t temp_entry;
1079
1080 vm_map_lock(map);
1081
1082 VM_MAP_RANGE_CHECK(map, start, end);
1083
1084 /*
1085 * Only one pageability change may take place at one
1086 * time, since vm_fault assumes it will be called
1087 * only once for each wiring/unwiring. Therefore, we
1088 * have to make sure we're actually changing the pageability
1089 * for the entire region. We do so before making any changes.
1090 */
1091
1092 if (vm_map_lookup_entry(map, start, &temp_entry)) {
1093 entry = temp_entry;
1094 vm_map_clip_start(map, entry, start);
1095 }
1096 else
1097 entry = temp_entry->next;
1098 temp_entry = entry;
1099
1100 /*
1101 * Actions are rather different for wiring and unwiring,
1102 * so we have two separate cases.
1103 */
1104
1105 if (new_pageable) {
1106
1107 /*
1108 * Unwiring. First ensure that the range to be
1109 * unwired is really wired down.
1110 */
1111 while ((entry != &map->header) && (entry->start < end)) {
1112
1113 if (entry->wired_count == 0) {
1114 vm_map_unlock(map);
1115 return(KERN_INVALID_ARGUMENT);
1116 }
1117 entry = entry->next;
1118 }
1119
1120 /*
1121 * Now decrement the wiring count for each region.
1122 * If a region becomes completely unwired,
1123 * unwire its physical pages and mappings.
1124 */
1125 lock_set_recursive(&map->lock);
1126
1127 entry = temp_entry;
1128 while ((entry != &map->header) && (entry->start < end)) {
1129 vm_map_clip_end(map, entry, end);
1130
1131 entry->wired_count--;
1132 if (entry->wired_count == 0)
1133 vm_fault_unwire(map, entry->start, entry->end);
1134
1135 entry = entry->next;
1136 }
1137 lock_clear_recursive(&map->lock);
1138 }
1139
1140 else {
1141 /*
1142 * Wiring. We must do this in two passes:
1143 *
1144 * 1. Holding the write lock, we increment the
1145 * wiring count. For any area that is not already
1146 * wired, we create any shadow objects that need
1147 * to be created.
1148 *
1149 * 2. We downgrade to a read lock, and call
1150 * vm_fault_wire to fault in the pages for any
1151 * newly wired area (wired_count is 1).
1152 *
1153 * Downgrading to a read lock for vm_fault_wire avoids
1154 * a possible deadlock with another thread that may have
1155 * faulted on one of the pages to be wired (it would mark
1156 * the page busy, blocking us, then in turn block on the
1157 * map lock that we hold). Because of problems in the
1158 * recursive lock package, we cannot upgrade to a write
1159 * lock in vm_map_lookup. Thus, any actions that require
1160 * the write lock must be done beforehand. Because we
1161 * keep the read lock on the map, the copy-on-write status
1162 * of the entries we modify here cannot change.
1163 */
1164
1165 /*
1166 * Pass 1.
1167 */
1168 entry = temp_entry;
1169 while ((entry != &map->header) && (entry->start < end)) {
1170 vm_map_clip_end(map, entry, end);
1171
1172 entry->wired_count++;
1173 if (entry->wired_count == 1) {
1174
1175 /*
1176 * Perform actions of vm_map_lookup that need
1177 * the write lock on the map: create a shadow
1178 * object for a copy-on-write region, or an
1179 * object for a zero-fill region.
1180 *
1181 * We don't have to do this for entries that
1182 * point to sharing maps, because we won't hold
1183 * the lock on the sharing map.
1184 */
1185 if (!entry->is_a_map) {
1186 if (entry->needs_copy &&
1187 ((entry->protection & VM_PROT_WRITE) != 0)) {
1188
1189 vm_object_shadow(&entry->object.vm_object,
1190 &entry->offset,
1191 (vm_size_t)(entry->end
1192 - entry->start));
1193 entry->needs_copy = FALSE;
1194 }
5d7b9ad3 1195 else if (entry->object.vm_object == NULL) {
175f072e
KM
1196 entry->object.vm_object =
1197 vm_object_allocate((vm_size_t)(entry->end
1198 - entry->start));
1199 entry->offset = (vm_offset_t)0;
1200 }
1201 }
1202 }
1203
1204 entry = entry->next;
1205 }
1206
1207 /*
1208 * Pass 2.
1209 */
1210
1211 /*
1212 * HACK HACK HACK HACK
1213 *
1214 * If we are wiring in the kernel map or a submap of it,
1215 * unlock the map to avoid deadlocks. We trust that the
1216 * kernel threads are well-behaved, and therefore will
1217 * not do anything destructive to this region of the map
1218 * while we have it unlocked. We cannot trust user threads
1219 * to do the same.
1220 *
1221 * HACK HACK HACK HACK
1222 */
1223 if (vm_map_pmap(map) == kernel_pmap) {
1224 vm_map_unlock(map); /* trust me ... */
1225 }
1226 else {
1227 lock_set_recursive(&map->lock);
1228 lock_write_to_read(&map->lock);
1229 }
1230
1231 entry = temp_entry;
1232 while (entry != &map->header && entry->start < end) {
1233 if (entry->wired_count == 1) {
1234 vm_fault_wire(map, entry->start, entry->end);
1235 }
1236 entry = entry->next;
1237 }
1238
1239 if (vm_map_pmap(map) == kernel_pmap) {
1240 vm_map_lock(map);
1241 }
1242 else {
1243 lock_clear_recursive(&map->lock);
1244 }
1245 }
1246
1247 vm_map_unlock(map);
1248
1249 return(KERN_SUCCESS);
1250}
1251
1252/*
1253 * vm_map_entry_unwire: [ internal use only ]
1254 *
1255 * Make the region specified by this entry pageable.
1256 *
1257 * The map in question should be locked.
1258 * [This is the reason for this routine's existence.]
1259 */
1260void vm_map_entry_unwire(map, entry)
1261 vm_map_t map;
1262 register vm_map_entry_t entry;
1263{
1264 vm_fault_unwire(map, entry->start, entry->end);
1265 entry->wired_count = 0;
1266}
1267
1268/*
1269 * vm_map_entry_delete: [ internal use only ]
1270 *
1271 * Deallocate the given entry from the target map.
1272 */
1273void vm_map_entry_delete(map, entry)
1274 register vm_map_t map;
1275 register vm_map_entry_t entry;
1276{
1277 if (entry->wired_count != 0)
1278 vm_map_entry_unwire(map, entry);
1279
1280 vm_map_entry_unlink(map, entry);
1281 map->size -= entry->end - entry->start;
1282
1283 if (entry->is_a_map || entry->is_sub_map)
1284 vm_map_deallocate(entry->object.share_map);
1285 else
1286 vm_object_deallocate(entry->object.vm_object);
1287
1288 vm_map_entry_dispose(map, entry);
1289}
1290
1291/*
1292 * vm_map_delete: [ internal use only ]
1293 *
1294 * Deallocates the given address range from the target
1295 * map.
1296 *
1297 * When called with a sharing map, removes pages from
1298 * that region from all physical maps.
1299 */
1300vm_map_delete(map, start, end)
1301 register vm_map_t map;
1302 vm_offset_t start;
1303 register vm_offset_t end;
1304{
1305 register vm_map_entry_t entry;
1306 vm_map_entry_t first_entry;
1307
1308 /*
1309 * Find the start of the region, and clip it
1310 */
1311
1312 if (!vm_map_lookup_entry(map, start, &first_entry))
1313 entry = first_entry->next;
1314 else {
1315 entry = first_entry;
1316 vm_map_clip_start(map, entry, start);
1317
1318 /*
1319 * Fix the lookup hint now, rather than each
1320 * time though the loop.
1321 */
1322
1323 SAVE_HINT(map, entry->prev);
1324 }
1325
1326 /*
1327 * Save the free space hint
1328 */
1329
1330 if (map->first_free->start >= start)
1331 map->first_free = entry->prev;
1332
1333 /*
1334 * Step through all entries in this region
1335 */
1336
1337 while ((entry != &map->header) && (entry->start < end)) {
1338 vm_map_entry_t next;
1339 register vm_offset_t s, e;
1340 register vm_object_t object;
1341
1342 vm_map_clip_end(map, entry, end);
1343
1344 next = entry->next;
1345 s = entry->start;
1346 e = entry->end;
1347
1348 /*
1349 * Unwire before removing addresses from the pmap;
1350 * otherwise, unwiring will put the entries back in
1351 * the pmap.
1352 */
1353
1354 object = entry->object.vm_object;
1355 if (entry->wired_count != 0)
1356 vm_map_entry_unwire(map, entry);
1357
1358 /*
1359 * If this is a sharing map, we must remove
1360 * *all* references to this data, since we can't
1361 * find all of the physical maps which are sharing
1362 * it.
1363 */
1364
1365 if (object == kernel_object || object == kmem_object)
1366 vm_object_page_remove(object, entry->offset,
1367 entry->offset + (e - s));
1368 else if (!map->is_main_map)
1369 vm_object_pmap_remove(object,
1370 entry->offset,
1371 entry->offset + (e - s));
1372 else
1373 pmap_remove(map->pmap, s, e);
1374
1375 /*
1376 * Delete the entry (which may delete the object)
1377 * only after removing all pmap entries pointing
1378 * to its pages. (Otherwise, its page frames may
1379 * be reallocated, and any modify bits will be
1380 * set in the wrong object!)
1381 */
1382
1383 vm_map_entry_delete(map, entry);
1384 entry = next;
1385 }
1386 return(KERN_SUCCESS);
1387}
1388
1389/*
1390 * vm_map_remove:
1391 *
1392 * Remove the given address range from the target map.
1393 * This is the exported form of vm_map_delete.
1394 */
1395vm_map_remove(map, start, end)
1396 register vm_map_t map;
1397 register vm_offset_t start;
1398 register vm_offset_t end;
1399{
1400 register int result;
1401
1402 vm_map_lock(map);
1403 VM_MAP_RANGE_CHECK(map, start, end);
1404 result = vm_map_delete(map, start, end);
1405 vm_map_unlock(map);
1406
1407 return(result);
1408}
1409
1410/*
1411 * vm_map_check_protection:
1412 *
1413 * Assert that the target map allows the specified
1414 * privilege on the entire address region given.
1415 * The entire region must be allocated.
1416 */
1417boolean_t vm_map_check_protection(map, start, end, protection)
1418 register vm_map_t map;
1419 register vm_offset_t start;
1420 register vm_offset_t end;
1421 register vm_prot_t protection;
1422{
1423 register vm_map_entry_t entry;
1424 vm_map_entry_t tmp_entry;
1425
1426 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1427 return(FALSE);
1428 }
1429
1430 entry = tmp_entry;
1431
1432 while (start < end) {
1433 if (entry == &map->header) {
1434 return(FALSE);
1435 }
1436
1437 /*
1438 * No holes allowed!
1439 */
1440
1441 if (start < entry->start) {
1442 return(FALSE);
1443 }
1444
1445 /*
1446 * Check protection associated with entry.
1447 */
1448
1449 if ((entry->protection & protection) != protection) {
1450 return(FALSE);
1451 }
1452
1453 /* go to next entry */
1454
1455 start = entry->end;
1456 entry = entry->next;
1457 }
1458 return(TRUE);
1459}
1460
1461/*
1462 * vm_map_copy_entry:
1463 *
1464 * Copies the contents of the source entry to the destination
1465 * entry. The entries *must* be aligned properly.
1466 */
1467void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
1468 vm_map_t src_map, dst_map;
1469 register vm_map_entry_t src_entry, dst_entry;
1470{
1471 vm_object_t temp_object;
1472
1473 if (src_entry->is_sub_map || dst_entry->is_sub_map)
1474 return;
1475
5d7b9ad3 1476 if (dst_entry->object.vm_object != NULL &&
224765a4 1477 (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
175f072e
KM
1478 printf("vm_map_copy_entry: copying over permanent data!\n");
1479
1480 /*
1481 * If our destination map was wired down,
1482 * unwire it now.
1483 */
1484
1485 if (dst_entry->wired_count != 0)
1486 vm_map_entry_unwire(dst_map, dst_entry);
1487
1488 /*
1489 * If we're dealing with a sharing map, we
1490 * must remove the destination pages from
1491 * all maps (since we cannot know which maps
1492 * this sharing map belongs in).
1493 */
1494
1495 if (dst_map->is_main_map)
1496 pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
1497 else
1498 vm_object_pmap_remove(dst_entry->object.vm_object,
1499 dst_entry->offset,
1500 dst_entry->offset +
1501 (dst_entry->end - dst_entry->start));
1502
1503 if (src_entry->wired_count == 0) {
1504
1505 boolean_t src_needs_copy;
1506
1507 /*
1508 * If the source entry is marked needs_copy,
1509 * it is already write-protected.
1510 */
1511 if (!src_entry->needs_copy) {
1512
1513 boolean_t su;
1514
1515 /*
1516 * If the source entry has only one mapping,
1517 * we can just protect the virtual address
1518 * range.
1519 */
1520 if (!(su = src_map->is_main_map)) {
1521 simple_lock(&src_map->ref_lock);
1522 su = (src_map->ref_count == 1);
1523 simple_unlock(&src_map->ref_lock);
1524 }
1525
1526 if (su) {
1527 pmap_protect(src_map->pmap,
1528 src_entry->start,
1529 src_entry->end,
1530 src_entry->protection & ~VM_PROT_WRITE);
1531 }
1532 else {
1533 vm_object_pmap_copy(src_entry->object.vm_object,
1534 src_entry->offset,
1535 src_entry->offset + (src_entry->end
1536 -src_entry->start));
1537 }
1538 }
1539
1540 /*
1541 * Make a copy of the object.
1542 */
1543 temp_object = dst_entry->object.vm_object;
1544 vm_object_copy(src_entry->object.vm_object,
1545 src_entry->offset,
1546 (vm_size_t)(src_entry->end -
1547 src_entry->start),
1548 &dst_entry->object.vm_object,
1549 &dst_entry->offset,
1550 &src_needs_copy);
1551 /*
1552 * If we didn't get a copy-object now, mark the
1553 * source map entry so that a shadow will be created
1554 * to hold its changed pages.
1555 */
1556 if (src_needs_copy)
1557 src_entry->needs_copy = TRUE;
1558
1559 /*
1560 * The destination always needs to have a shadow
1561 * created.
1562 */
1563 dst_entry->needs_copy = TRUE;
1564
1565 /*
1566 * Mark the entries copy-on-write, so that write-enabling
1567 * the entry won't make copy-on-write pages writable.
1568 */
1569 src_entry->copy_on_write = TRUE;
1570 dst_entry->copy_on_write = TRUE;
1571 /*
1572 * Get rid of the old object.
1573 */
1574 vm_object_deallocate(temp_object);
1575
1576 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
1577 dst_entry->end - dst_entry->start, src_entry->start);
1578 }
1579 else {
1580 /*
1581 * Of course, wired down pages can't be set copy-on-write.
1582 * Cause wired pages to be copied into the new
1583 * map by simulating faults (the new pages are
1584 * pageable)
1585 */
1586 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
1587 }
1588}
1589
1590/*
1591 * vm_map_copy:
1592 *
1593 * Perform a virtual memory copy from the source
1594 * address map/range to the destination map/range.
1595 *
1596 * If src_destroy or dst_alloc is requested,
1597 * the source and destination regions should be
1598 * disjoint, not only in the top-level map, but
1599 * in the sharing maps as well. [The best way
1600 * to guarantee this is to use a new intermediate
1601 * map to make copies. This also reduces map
1602 * fragmentation.]
1603 */
1604vm_map_copy(dst_map, src_map,
1605 dst_addr, len, src_addr,
1606 dst_alloc, src_destroy)
1607 vm_map_t dst_map;
1608 vm_map_t src_map;
1609 vm_offset_t dst_addr;
1610 vm_size_t len;
1611 vm_offset_t src_addr;
1612 boolean_t dst_alloc;
1613 boolean_t src_destroy;
1614{
1615 register
1616 vm_map_entry_t src_entry;
1617 register
1618 vm_map_entry_t dst_entry;
1619 vm_map_entry_t tmp_entry;
1620 vm_offset_t src_start;
1621 vm_offset_t src_end;
1622 vm_offset_t dst_start;
1623 vm_offset_t dst_end;
1624 vm_offset_t src_clip;
1625 vm_offset_t dst_clip;
1626 int result;
1627 boolean_t old_src_destroy;
1628
1629 /*
1630 * XXX While we figure out why src_destroy screws up,
1631 * we'll do it by explicitly vm_map_delete'ing at the end.
1632 */
1633
1634 old_src_destroy = src_destroy;
1635 src_destroy = FALSE;
1636
1637 /*
1638 * Compute start and end of region in both maps
1639 */
1640
1641 src_start = src_addr;
1642 src_end = src_start + len;
1643 dst_start = dst_addr;
1644 dst_end = dst_start + len;
1645
1646 /*
1647 * Check that the region can exist in both source
1648 * and destination.
1649 */
1650
1651 if ((dst_end < dst_start) || (src_end < src_start))
1652 return(KERN_NO_SPACE);
1653
1654 /*
1655 * Lock the maps in question -- we avoid deadlock
1656 * by ordering lock acquisition by map value
1657 */
1658
1659 if (src_map == dst_map) {
1660 vm_map_lock(src_map);
1661 }
1662 else if ((int) src_map < (int) dst_map) {
1663 vm_map_lock(src_map);
1664 vm_map_lock(dst_map);
1665 } else {
1666 vm_map_lock(dst_map);
1667 vm_map_lock(src_map);
1668 }
1669
1670 result = KERN_SUCCESS;
1671
1672 /*
1673 * Check protections... source must be completely readable and
1674 * destination must be completely writable. [Note that if we're
1675 * allocating the destination region, we don't have to worry
1676 * about protection, but instead about whether the region
1677 * exists.]
1678 */
1679
1680 if (src_map->is_main_map && dst_map->is_main_map) {
1681 if (!vm_map_check_protection(src_map, src_start, src_end,
1682 VM_PROT_READ)) {
1683 result = KERN_PROTECTION_FAILURE;
1684 goto Return;
1685 }
1686
1687 if (dst_alloc) {
1688 /* XXX Consider making this a vm_map_find instead */
5d7b9ad3 1689 if ((result = vm_map_insert(dst_map, NULL,
175f072e
KM
1690 (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
1691 goto Return;
1692 }
1693 else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
1694 VM_PROT_WRITE)) {
1695 result = KERN_PROTECTION_FAILURE;
1696 goto Return;
1697 }
1698 }
1699
1700 /*
1701 * Find the start entries and clip.
1702 *
1703 * Note that checking protection asserts that the
1704 * lookup cannot fail.
1705 *
1706 * Also note that we wait to do the second lookup
1707 * until we have done the first clip, as the clip
1708 * may affect which entry we get!
1709 */
1710
1711 (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
1712 src_entry = tmp_entry;
1713 vm_map_clip_start(src_map, src_entry, src_start);
1714
1715 (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
1716 dst_entry = tmp_entry;
1717 vm_map_clip_start(dst_map, dst_entry, dst_start);
1718
1719 /*
1720 * If both source and destination entries are the same,
1721 * retry the first lookup, as it may have changed.
1722 */
1723
1724 if (src_entry == dst_entry) {
1725 (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
1726 src_entry = tmp_entry;
1727 }
1728
1729 /*
1730 * If source and destination entries are still the same,
1731 * a null copy is being performed.
1732 */
1733
1734 if (src_entry == dst_entry)
1735 goto Return;
1736
1737 /*
1738 * Go through entries until we get to the end of the
1739 * region.
1740 */
1741
1742 while (src_start < src_end) {
1743 /*
1744 * Clip the entries to the endpoint of the entire region.
1745 */
1746
1747 vm_map_clip_end(src_map, src_entry, src_end);
1748 vm_map_clip_end(dst_map, dst_entry, dst_end);
1749
1750 /*
1751 * Clip each entry to the endpoint of the other entry.
1752 */
1753
1754 src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
1755 vm_map_clip_end(src_map, src_entry, src_clip);
1756
1757 dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
1758 vm_map_clip_end(dst_map, dst_entry, dst_clip);
1759
1760 /*
1761 * Both entries now match in size and relative endpoints.
1762 *
1763 * If both entries refer to a VM object, we can
1764 * deal with them now.
1765 */
1766
1767 if (!src_entry->is_a_map && !dst_entry->is_a_map) {
1768 vm_map_copy_entry(src_map, dst_map, src_entry,
1769 dst_entry);
1770 }
1771 else {
1772 register vm_map_t new_dst_map;
1773 vm_offset_t new_dst_start;
1774 vm_size_t new_size;
1775 vm_map_t new_src_map;
1776 vm_offset_t new_src_start;
1777
1778 /*
1779 * We have to follow at least one sharing map.
1780 */
1781
1782 new_size = (dst_entry->end - dst_entry->start);
1783
1784 if (src_entry->is_a_map) {
1785 new_src_map = src_entry->object.share_map;
1786 new_src_start = src_entry->offset;
1787 }
1788 else {
1789 new_src_map = src_map;
1790 new_src_start = src_entry->start;
1791 lock_set_recursive(&src_map->lock);
1792 }
1793
1794 if (dst_entry->is_a_map) {
1795 vm_offset_t new_dst_end;
1796
1797 new_dst_map = dst_entry->object.share_map;
1798 new_dst_start = dst_entry->offset;
1799
1800 /*
1801 * Since the destination sharing entries
1802 * will be merely deallocated, we can
1803 * do that now, and replace the region
1804 * with a null object. [This prevents
1805 * splitting the source map to match
1806 * the form of the destination map.]
1807 * Note that we can only do so if the
1808 * source and destination do not overlap.
1809 */
1810
1811 new_dst_end = new_dst_start + new_size;
1812
1813 if (new_dst_map != new_src_map) {
1814 vm_map_lock(new_dst_map);
1815 (void) vm_map_delete(new_dst_map,
1816 new_dst_start,
1817 new_dst_end);
1818 (void) vm_map_insert(new_dst_map,
5d7b9ad3 1819 NULL,
175f072e
KM
1820 (vm_offset_t) 0,
1821 new_dst_start,
1822 new_dst_end);
1823 vm_map_unlock(new_dst_map);
1824 }
1825 }
1826 else {
1827 new_dst_map = dst_map;
1828 new_dst_start = dst_entry->start;
1829 lock_set_recursive(&dst_map->lock);
1830 }
1831
1832 /*
1833 * Recursively copy the sharing map.
1834 */
1835
1836 (void) vm_map_copy(new_dst_map, new_src_map,
1837 new_dst_start, new_size, new_src_start,
1838 FALSE, FALSE);
1839
1840 if (dst_map == new_dst_map)
1841 lock_clear_recursive(&dst_map->lock);
1842 if (src_map == new_src_map)
1843 lock_clear_recursive(&src_map->lock);
1844 }
1845
1846 /*
1847 * Update variables for next pass through the loop.
1848 */
1849
1850 src_start = src_entry->end;
1851 src_entry = src_entry->next;
1852 dst_start = dst_entry->end;
1853 dst_entry = dst_entry->next;
1854
1855 /*
1856 * If the source is to be destroyed, here is the
1857 * place to do it.
1858 */
1859
1860 if (src_destroy && src_map->is_main_map &&
1861 dst_map->is_main_map)
1862 vm_map_entry_delete(src_map, src_entry->prev);
1863 }
1864
1865 /*
1866 * Update the physical maps as appropriate
1867 */
1868
1869 if (src_map->is_main_map && dst_map->is_main_map) {
1870 if (src_destroy)
1871 pmap_remove(src_map->pmap, src_addr, src_addr + len);
1872 }
1873
1874 /*
1875 * Unlock the maps
1876 */
1877
1878 Return: ;
1879
1880 if (old_src_destroy)
1881 vm_map_delete(src_map, src_addr, src_addr + len);
1882
1883 vm_map_unlock(src_map);
1884 if (src_map != dst_map)
1885 vm_map_unlock(dst_map);
1886
1887 return(result);
1888}
1889
1890/*
5d7b9ad3
MK
1891 * vmspace_fork:
1892 * Create a new process vmspace structure and vm_map
1893 * based on those of an existing process. The new map
1894 * is based on the old map, according to the inheritance
1895 * values on the regions in that map.
175f072e 1896 *
5d7b9ad3 1897 * The source map must not be locked.
175f072e 1898 */
5d7b9ad3
MK
1899struct vmspace *
1900vmspace_fork(vm1)
1901 register struct vmspace *vm1;
175f072e 1902{
5d7b9ad3
MK
1903 register struct vmspace *vm2;
1904 vm_map_t old_map = &vm1->vm_map;
175f072e
KM
1905 vm_map_t new_map;
1906 vm_map_entry_t old_entry;
1907 vm_map_entry_t new_entry;
1908 pmap_t new_pmap;
1909
1910 vm_map_lock(old_map);
1911
5d7b9ad3
MK
1912 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
1913 old_map->entries_pageable);
1914 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
1915 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
1916 new_pmap = &vm2->vm_pmap; /* XXX */
1917 new_map = &vm2->vm_map; /* XXX */
175f072e
KM
1918
1919 old_entry = old_map->header.next;
1920
1921 while (old_entry != &old_map->header) {
1922 if (old_entry->is_sub_map)
1923 panic("vm_map_fork: encountered a submap");
1924
1925 switch (old_entry->inheritance) {
1926 case VM_INHERIT_NONE:
1927 break;
1928
1929 case VM_INHERIT_SHARE:
1930 /*
1931 * If we don't already have a sharing map:
1932 */
1933
1934 if (!old_entry->is_a_map) {
1935 vm_map_t new_share_map;
1936 vm_map_entry_t new_share_entry;
1937
1938 /*
1939 * Create a new sharing map
1940 */
1941
5d7b9ad3 1942 new_share_map = vm_map_create(NULL,
175f072e
KM
1943 old_entry->start,
1944 old_entry->end,
1945 TRUE);
1946 new_share_map->is_main_map = FALSE;
1947
1948 /*
1949 * Create the only sharing entry from the
1950 * old task map entry.
1951 */
1952
1953 new_share_entry =
1954 vm_map_entry_create(new_share_map);
1955 *new_share_entry = *old_entry;
1956
1957 /*
1958 * Insert the entry into the new sharing
1959 * map
1960 */
1961
1962 vm_map_entry_link(new_share_map,
1963 new_share_map->header.prev,
1964 new_share_entry);
1965
1966 /*
1967 * Fix up the task map entry to refer
1968 * to the sharing map now.
1969 */
1970
1971 old_entry->is_a_map = TRUE;
1972 old_entry->object.share_map = new_share_map;
1973 old_entry->offset = old_entry->start;
1974 }
1975
1976 /*
1977 * Clone the entry, referencing the sharing map.
1978 */
1979
1980 new_entry = vm_map_entry_create(new_map);
1981 *new_entry = *old_entry;
1982 vm_map_reference(new_entry->object.share_map);
1983
1984 /*
1985 * Insert the entry into the new map -- we
1986 * know we're inserting at the end of the new
1987 * map.
1988 */
1989
1990 vm_map_entry_link(new_map, new_map->header.prev,
1991 new_entry);
1992
1993 /*
1994 * Update the physical map
1995 */
1996
1997 pmap_copy(new_map->pmap, old_map->pmap,
1998 new_entry->start,
1999 (old_entry->end - old_entry->start),
2000 old_entry->start);
2001 break;
2002
2003 case VM_INHERIT_COPY:
2004 /*
2005 * Clone the entry and link into the map.
2006 */
2007
2008 new_entry = vm_map_entry_create(new_map);
2009 *new_entry = *old_entry;
2010 new_entry->wired_count = 0;
5d7b9ad3 2011 new_entry->object.vm_object = NULL;
175f072e
KM
2012 new_entry->is_a_map = FALSE;
2013 vm_map_entry_link(new_map, new_map->header.prev,
2014 new_entry);
2015 if (old_entry->is_a_map) {
2016 int check;
2017
2018 check = vm_map_copy(new_map,
2019 old_entry->object.share_map,
2020 new_entry->start,
2021 (vm_size_t)(new_entry->end -
2022 new_entry->start),
2023 old_entry->offset,
2024 FALSE, FALSE);
2025 if (check != KERN_SUCCESS)
2026 printf("vm_map_fork: copy in share_map region failed\n");
2027 }
2028 else {
2029 vm_map_copy_entry(old_map, new_map, old_entry,
2030 new_entry);
2031 }
2032 break;
2033 }
2034 old_entry = old_entry->next;
2035 }
2036
2037 new_map->size = old_map->size;
2038 vm_map_unlock(old_map);
2039
5d7b9ad3 2040 return(vm2);
175f072e
KM
2041}
2042
2043/*
2044 * vm_map_lookup:
2045 *
2046 * Finds the VM object, offset, and
2047 * protection for a given virtual address in the
2048 * specified map, assuming a page fault of the
2049 * type specified.
2050 *
2051 * Leaves the map in question locked for read; return
2052 * values are guaranteed until a vm_map_lookup_done
2053 * call is performed. Note that the map argument
2054 * is in/out; the returned map must be used in
2055 * the call to vm_map_lookup_done.
2056 *
2057 * A handle (out_entry) is returned for use in
2058 * vm_map_lookup_done, to make that fast.
2059 *
2060 * If a lookup is requested with "write protection"
2061 * specified, the map may be changed to perform virtual
2062 * copying operations, although the data referenced will
2063 * remain the same.
2064 */
2065vm_map_lookup(var_map, vaddr, fault_type, out_entry,
2066 object, offset, out_prot, wired, single_use)
2067 vm_map_t *var_map; /* IN/OUT */
2068 register vm_offset_t vaddr;
2069 register vm_prot_t fault_type;
2070
2071 vm_map_entry_t *out_entry; /* OUT */
2072 vm_object_t *object; /* OUT */
2073 vm_offset_t *offset; /* OUT */
2074 vm_prot_t *out_prot; /* OUT */
2075 boolean_t *wired; /* OUT */
2076 boolean_t *single_use; /* OUT */
2077{
2078 vm_map_t share_map;
2079 vm_offset_t share_offset;
2080 register vm_map_entry_t entry;
2081 register vm_map_t map = *var_map;
2082 register vm_prot_t prot;
2083 register boolean_t su;
2084
2085 RetryLookup: ;
2086
2087 /*
2088 * Lookup the faulting address.
2089 */
2090
2091 vm_map_lock_read(map);
2092
2093#define RETURN(why) \
2094 { \
2095 vm_map_unlock_read(map); \
2096 return(why); \
2097 }
2098
2099 /*
2100 * If the map has an interesting hint, try it before calling
2101 * full blown lookup routine.
2102 */
2103
2104 simple_lock(&map->hint_lock);
2105 entry = map->hint;
2106 simple_unlock(&map->hint_lock);
2107
2108 *out_entry = entry;
2109
2110 if ((entry == &map->header) ||
2111 (vaddr < entry->start) || (vaddr >= entry->end)) {
2112 vm_map_entry_t tmp_entry;
2113
2114 /*
2115 * Entry was either not a valid hint, or the vaddr
2116 * was not contained in the entry, so do a full lookup.
2117 */
2118 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2119 RETURN(KERN_INVALID_ADDRESS);
2120
2121 entry = tmp_entry;
2122 *out_entry = entry;
2123 }
2124
2125 /*
2126 * Handle submaps.
2127 */
2128
2129 if (entry->is_sub_map) {
2130 vm_map_t old_map = map;
2131
2132 *var_map = map = entry->object.sub_map;
2133 vm_map_unlock_read(old_map);
2134 goto RetryLookup;
2135 }
2136
2137 /*
2138 * Check whether this task is allowed to have
2139 * this page.
2140 */
2141
2142 prot = entry->protection;
2143 if ((fault_type & (prot)) != fault_type)
2144 RETURN(KERN_PROTECTION_FAILURE);
2145
2146 /*
2147 * If this page is not pageable, we have to get
2148 * it for all possible accesses.
2149 */
2150
2151 if (*wired = (entry->wired_count != 0))
2152 prot = fault_type = entry->protection;
2153
2154 /*
2155 * If we don't already have a VM object, track
2156 * it down.
2157 */
2158
2159 if (su = !entry->is_a_map) {
2160 share_map = map;
2161 share_offset = vaddr;
2162 }
2163 else {
2164 vm_map_entry_t share_entry;
2165
2166 /*
2167 * Compute the sharing map, and offset into it.
2168 */
2169
2170 share_map = entry->object.share_map;
2171 share_offset = (vaddr - entry->start) + entry->offset;
2172
2173 /*
2174 * Look for the backing store object and offset
2175 */
2176
2177 vm_map_lock_read(share_map);
2178
2179 if (!vm_map_lookup_entry(share_map, share_offset,
2180 &share_entry)) {
2181 vm_map_unlock_read(share_map);
2182 RETURN(KERN_INVALID_ADDRESS);
2183 }
2184 entry = share_entry;
2185 }
2186
2187 /*
2188 * If the entry was copy-on-write, we either ...
2189 */
2190
2191 if (entry->needs_copy) {
2192 /*
2193 * If we want to write the page, we may as well
2194 * handle that now since we've got the sharing
2195 * map locked.
2196 *
2197 * If we don't need to write the page, we just
2198 * demote the permissions allowed.
2199 */
2200
2201 if (fault_type & VM_PROT_WRITE) {
2202 /*
2203 * Make a new object, and place it in the
2204 * object chain. Note that no new references
2205 * have appeared -- one just moved from the
2206 * share map to the new object.
2207 */
2208
2209 if (lock_read_to_write(&share_map->lock)) {
2210 if (share_map != map)
2211 vm_map_unlock_read(map);
2212 goto RetryLookup;
2213 }
2214
2215 vm_object_shadow(
2216 &entry->object.vm_object,
2217 &entry->offset,
2218 (vm_size_t) (entry->end - entry->start));
2219
2220 entry->needs_copy = FALSE;
2221
2222 lock_write_to_read(&share_map->lock);
2223 }
2224 else {
2225 /*
2226 * We're attempting to read a copy-on-write
2227 * page -- don't allow writes.
2228 */
2229
2230 prot &= (~VM_PROT_WRITE);
2231 }
2232 }
2233
2234 /*
2235 * Create an object if necessary.
2236 */
5d7b9ad3 2237 if (entry->object.vm_object == NULL) {
175f072e
KM
2238
2239 if (lock_read_to_write(&share_map->lock)) {
2240 if (share_map != map)
2241 vm_map_unlock_read(map);
2242 goto RetryLookup;
2243 }
2244
2245 entry->object.vm_object = vm_object_allocate(
2246 (vm_size_t)(entry->end - entry->start));
2247 entry->offset = 0;
2248 lock_write_to_read(&share_map->lock);
2249 }
2250
2251 /*
2252 * Return the object/offset from this entry. If the entry
2253 * was copy-on-write or empty, it has been fixed up.
2254 */
2255
2256 *offset = (share_offset - entry->start) + entry->offset;
2257 *object = entry->object.vm_object;
2258
2259 /*
2260 * Return whether this is the only map sharing this data.
2261 */
2262
2263 if (!su) {
2264 simple_lock(&share_map->ref_lock);
2265 su = (share_map->ref_count == 1);
2266 simple_unlock(&share_map->ref_lock);
2267 }
2268
2269 *out_prot = prot;
2270 *single_use = su;
2271
2272 return(KERN_SUCCESS);
2273
2274#undef RETURN
2275}
2276
2277/*
2278 * vm_map_lookup_done:
2279 *
2280 * Releases locks acquired by a vm_map_lookup
2281 * (according to the handle returned by that lookup).
2282 */
2283
2284void vm_map_lookup_done(map, entry)
2285 register vm_map_t map;
2286 vm_map_entry_t entry;
2287{
2288 /*
2289 * If this entry references a map, unlock it first.
2290 */
2291
2292 if (entry->is_a_map)
2293 vm_map_unlock_read(entry->object.share_map);
2294
2295 /*
2296 * Unlock the main-level map
2297 */
2298
2299 vm_map_unlock_read(map);
2300}
2301
2302/*
2303 * Routine: vm_map_simplify
2304 * Purpose:
2305 * Attempt to simplify the map representation in
2306 * the vicinity of the given starting address.
2307 * Note:
2308 * This routine is intended primarily to keep the
2309 * kernel maps more compact -- they generally don't
2310 * benefit from the "expand a map entry" technology
2311 * at allocation time because the adjacent entry
2312 * is often wired down.
2313 */
2314void vm_map_simplify(map, start)
2315 vm_map_t map;
2316 vm_offset_t start;
2317{
2318 vm_map_entry_t this_entry;
2319 vm_map_entry_t prev_entry;
2320
2321 vm_map_lock(map);
2322 if (
2323 (vm_map_lookup_entry(map, start, &this_entry)) &&
2324 ((prev_entry = this_entry->prev) != &map->header) &&
2325
2326 (prev_entry->end == start) &&
2327 (map->is_main_map) &&
2328
2329 (prev_entry->is_a_map == FALSE) &&
2330 (prev_entry->is_sub_map == FALSE) &&
2331
2332 (this_entry->is_a_map == FALSE) &&
2333 (this_entry->is_sub_map == FALSE) &&
2334
2335 (prev_entry->inheritance == this_entry->inheritance) &&
2336 (prev_entry->protection == this_entry->protection) &&
2337 (prev_entry->max_protection == this_entry->max_protection) &&
2338 (prev_entry->wired_count == this_entry->wired_count) &&
2339
2340 (prev_entry->copy_on_write == this_entry->copy_on_write) &&
2341 (prev_entry->needs_copy == this_entry->needs_copy) &&
2342
2343 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
2344 ((prev_entry->offset + (prev_entry->end - prev_entry->start))
2345 == this_entry->offset)
2346 ) {
2347 if (map->first_free == this_entry)
2348 map->first_free = prev_entry;
2349
2350 SAVE_HINT(map, prev_entry);
2351 vm_map_entry_unlink(map, this_entry);
2352 prev_entry->end = this_entry->end;
2353 vm_object_deallocate(this_entry->object.vm_object);
2354 vm_map_entry_dispose(map, this_entry);
2355 }
2356 vm_map_unlock(map);
2357}
2358
2359/*
2360 * vm_map_print: [ debug ]
2361 */
2362void vm_map_print(map, full)
2363 register vm_map_t map;
2364 boolean_t full;
2365{
2366 register vm_map_entry_t entry;
2367 extern int indent;
2368
2369 iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
2370 (map->is_main_map ? "Task" : "Share"),
2371 (int) map, (int) (map->pmap), map->ref_count, map->nentries,
2372 map->timestamp);
2373
2374 if (!full && indent)
2375 return;
2376
2377 indent += 2;
2378 for (entry = map->header.next; entry != &map->header;
2379 entry = entry->next) {
2380 iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
2381 (int) entry, (int) entry->start, (int) entry->end);
2382 if (map->is_main_map) {
2383 static char *inheritance_name[4] =
2384 { "share", "copy", "none", "donate_copy"};
2385 printf("prot=%x/%x/%s, ",
2386 entry->protection,
2387 entry->max_protection,
2388 inheritance_name[entry->inheritance]);
2389 if (entry->wired_count != 0)
2390 printf("wired, ");
2391 }
2392
2393 if (entry->is_a_map || entry->is_sub_map) {
2394 printf("share=0x%x, offset=0x%x\n",
2395 (int) entry->object.share_map,
2396 (int) entry->offset);
2397 if ((entry->prev == &map->header) ||
2398 (!entry->prev->is_a_map) ||
2399 (entry->prev->object.share_map !=
2400 entry->object.share_map)) {
2401 indent += 2;
2402 vm_map_print(entry->object.share_map, full);
2403 indent -= 2;
2404 }
2405
2406 }
2407 else {
2408 printf("object=0x%x, offset=0x%x",
2409 (int) entry->object.vm_object,
2410 (int) entry->offset);
2411 if (entry->copy_on_write)
2412 printf(", copy (%s)",
2413 entry->needs_copy ? "needed" : "done");
2414 printf("\n");
2415
2416 if ((entry->prev == &map->header) ||
2417 (entry->prev->is_a_map) ||
2418 (entry->prev->object.vm_object !=
2419 entry->object.vm_object)) {
2420 indent += 2;
2421 vm_object_print(entry->object.vm_object, full);
2422 indent -= 2;
2423 }
2424 }
2425 }
2426 indent -= 2;
2427}