Commit | Line | Data |
---|---|---|
175f072e | 1 | /* |
175f072e KM |
2 | * Copyright (c) 1991 Regents of the University of California. |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * The Mach Operating System project at Carnegie-Mellon University. | |
7 | * | |
0e24ad83 | 8 | * %sccs.include.redist.c% |
175f072e | 9 | * |
edb12eaf | 10 | * @(#)vm_kern.c 7.4 (Berkeley) %G% |
0e24ad83 KM |
11 | * |
12 | * | |
13 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
14 | * All rights reserved. | |
15 | * | |
16 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
17 | * | |
18 | * Permission to use, copy, modify and distribute this software and | |
19 | * its documentation is hereby granted, provided that both the copyright | |
20 | * notice and this permission notice appear in all copies of the | |
21 | * software, derivative works or modified versions, and any portions | |
22 | * thereof, and that both notices appear in supporting documentation. | |
23 | * | |
24 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
25 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
26 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
27 | * | |
28 | * Carnegie Mellon requests users of this software to return to | |
29 | * | |
30 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
31 | * School of Computer Science | |
32 | * Carnegie Mellon University | |
33 | * Pittsburgh PA 15213-3890 | |
34 | * | |
35 | * any improvements or extensions that they make and grant Carnegie the | |
36 | * rights to redistribute these changes. | |
175f072e KM |
37 | */ |
38 | ||
39 | /* | |
40 | * Kernel memory management. | |
41 | */ | |
42 | ||
ffe0d082 | 43 | #include "param.h" |
175f072e | 44 | |
ffe0d082 MK |
45 | #include "vm.h" |
46 | #include "vm_page.h" | |
47 | #include "vm_pageout.h" | |
48 | #include "vm_kern.h" | |
175f072e KM |
49 | |
50 | /* | |
51 | * kmem_alloc_pageable: | |
52 | * | |
53 | * Allocate pageable memory to the kernel's address map. | |
54 | * map must be "kernel_map" below. | |
55 | */ | |
56 | ||
57 | vm_offset_t kmem_alloc_pageable(map, size) | |
58 | vm_map_t map; | |
59 | register vm_size_t size; | |
60 | { | |
61 | vm_offset_t addr; | |
62 | register int result; | |
63 | ||
64 | #if 0 | |
65 | if (map != kernel_map) | |
66 | panic("kmem_alloc_pageable: not called with kernel_map"); | |
67 | #endif 0 | |
68 | ||
69 | size = round_page(size); | |
70 | ||
71 | addr = vm_map_min(map); | |
ffe0d082 | 72 | result = vm_map_find(map, NULL, (vm_offset_t) 0, |
175f072e KM |
73 | &addr, size, TRUE); |
74 | if (result != KERN_SUCCESS) { | |
75 | return(0); | |
76 | } | |
77 | ||
78 | return(addr); | |
79 | } | |
80 | ||
81 | /* | |
82 | * Allocate wired-down memory in the kernel's address map | |
83 | * or a submap. | |
84 | */ | |
85 | vm_offset_t kmem_alloc(map, size) | |
86 | register vm_map_t map; | |
87 | register vm_size_t size; | |
88 | { | |
89 | vm_offset_t addr; | |
90 | register int result; | |
91 | register vm_offset_t offset; | |
92 | extern vm_object_t kernel_object; | |
93 | vm_offset_t i; | |
94 | ||
95 | size = round_page(size); | |
96 | ||
97 | /* | |
98 | * Use the kernel object for wired-down kernel pages. | |
99 | * Assume that no region of the kernel object is | |
100 | * referenced more than once. | |
101 | */ | |
102 | ||
103 | addr = vm_map_min(map); | |
ffe0d082 | 104 | result = vm_map_find(map, NULL, (vm_offset_t) 0, |
175f072e KM |
105 | &addr, size, TRUE); |
106 | if (result != KERN_SUCCESS) { | |
107 | return(0); | |
108 | } | |
109 | ||
110 | /* | |
111 | * Since we didn't know where the new region would | |
112 | * start, we couldn't supply the correct offset into | |
113 | * the kernel object. Re-allocate that address | |
114 | * region with the correct offset. | |
115 | */ | |
116 | ||
117 | offset = addr - VM_MIN_KERNEL_ADDRESS; | |
118 | vm_object_reference(kernel_object); | |
119 | ||
120 | vm_map_lock(map); | |
121 | vm_map_delete(map, addr, addr + size); | |
122 | vm_map_insert(map, kernel_object, offset, addr, addr + size); | |
123 | vm_map_unlock(map); | |
124 | ||
125 | /* | |
126 | * Guarantee that there are pages already in this object | |
127 | * before calling vm_map_pageable. This is to prevent the | |
128 | * following scenario: | |
129 | * | |
130 | * 1) Threads have swapped out, so that there is a | |
131 | * pager for the kernel_object. | |
132 | * 2) The kmsg zone is empty, and so we are kmem_allocing | |
133 | * a new page for it. | |
134 | * 3) vm_map_pageable calls vm_fault; there is no page, | |
135 | * but there is a pager, so we call | |
136 | * pager_data_request. But the kmsg zone is empty, | |
137 | * so we must kmem_alloc. | |
138 | * 4) goto 1 | |
139 | * 5) Even if the kmsg zone is not empty: when we get | |
140 | * the data back from the pager, it will be (very | |
141 | * stale) non-zero data. kmem_alloc is defined to | |
142 | * return zero-filled memory. | |
143 | * | |
144 | * We're intentionally not activating the pages we allocate | |
145 | * to prevent a race with page-out. vm_map_pageable will wire | |
146 | * the pages. | |
147 | */ | |
148 | ||
149 | vm_object_lock(kernel_object); | |
150 | for (i = 0 ; i < size; i+= PAGE_SIZE) { | |
151 | vm_page_t mem; | |
152 | ||
ffe0d082 | 153 | while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) { |
175f072e KM |
154 | vm_object_unlock(kernel_object); |
155 | VM_WAIT; | |
156 | vm_object_lock(kernel_object); | |
157 | } | |
158 | vm_page_zero_fill(mem); | |
159 | mem->busy = FALSE; | |
160 | } | |
161 | vm_object_unlock(kernel_object); | |
162 | ||
163 | /* | |
164 | * And finally, mark the data as non-pageable. | |
165 | */ | |
166 | ||
167 | (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); | |
168 | ||
169 | /* | |
170 | * Try to coalesce the map | |
171 | */ | |
172 | ||
173 | vm_map_simplify(map, addr); | |
174 | ||
175 | return(addr); | |
176 | } | |
177 | ||
178 | /* | |
179 | * kmem_free: | |
180 | * | |
181 | * Release a region of kernel virtual memory allocated | |
182 | * with kmem_alloc, and return the physical pages | |
183 | * associated with that region. | |
184 | */ | |
185 | void kmem_free(map, addr, size) | |
186 | vm_map_t map; | |
187 | register vm_offset_t addr; | |
188 | vm_size_t size; | |
189 | { | |
190 | (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); | |
191 | } | |
192 | ||
193 | /* | |
194 | * kmem_suballoc: | |
195 | * | |
196 | * Allocates a map to manage a subrange | |
197 | * of the kernel virtual address space. | |
198 | * | |
199 | * Arguments are as follows: | |
200 | * | |
201 | * parent Map to take range from | |
202 | * size Size of range to find | |
203 | * min, max Returned endpoints of map | |
204 | * pageable Can the region be paged | |
205 | */ | |
206 | vm_map_t kmem_suballoc(parent, min, max, size, pageable) | |
207 | register vm_map_t parent; | |
208 | vm_offset_t *min, *max; | |
209 | register vm_size_t size; | |
210 | boolean_t pageable; | |
211 | { | |
212 | register int ret; | |
213 | vm_map_t result; | |
214 | ||
215 | size = round_page(size); | |
216 | ||
217 | *min = (vm_offset_t) vm_map_min(parent); | |
ffe0d082 | 218 | ret = vm_map_find(parent, NULL, (vm_offset_t) 0, |
175f072e KM |
219 | min, size, TRUE); |
220 | if (ret != KERN_SUCCESS) { | |
221 | printf("kmem_suballoc: bad status return of %d.\n", ret); | |
222 | panic("kmem_suballoc"); | |
223 | } | |
224 | *max = *min + size; | |
225 | pmap_reference(vm_map_pmap(parent)); | |
226 | result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); | |
ffe0d082 | 227 | if (result == NULL) |
175f072e KM |
228 | panic("kmem_suballoc: cannot create submap"); |
229 | if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) | |
230 | panic("kmem_suballoc: unable to change range to submap"); | |
231 | return(result); | |
232 | } | |
233 | ||
234 | /* | |
235 | * vm_move: | |
236 | * | |
237 | * Move memory from source to destination map, possibly deallocating | |
238 | * the source map reference to the memory. | |
239 | * | |
240 | * Parameters are as follows: | |
241 | * | |
242 | * src_map Source address map | |
243 | * src_addr Address within source map | |
244 | * dst_map Destination address map | |
245 | * num_bytes Amount of data (in bytes) to copy/move | |
246 | * src_dealloc Should source be removed after copy? | |
247 | * | |
248 | * Assumes the src and dst maps are not already locked. | |
249 | * | |
250 | * Returns new destination address or 0 (if a failure occurs). | |
251 | */ | |
252 | vm_offset_t vm_move(src_map,src_addr,dst_map,num_bytes,src_dealloc) | |
253 | vm_map_t src_map; | |
254 | register vm_offset_t src_addr; | |
255 | register vm_map_t dst_map; | |
256 | vm_offset_t num_bytes; | |
257 | boolean_t src_dealloc; | |
258 | { | |
259 | register vm_offset_t src_start; /* Beginning of region */ | |
260 | register vm_size_t src_size; /* Size of rounded region */ | |
261 | vm_offset_t dst_start; /* destination address */ | |
262 | register int result; | |
263 | ||
264 | /* | |
265 | * Page-align the source region | |
266 | */ | |
267 | ||
268 | src_start = trunc_page(src_addr); | |
269 | src_size = round_page(src_addr + num_bytes) - src_start; | |
270 | ||
271 | /* | |
272 | * If there's no destination, we can be at most deallocating | |
273 | * the source range. | |
274 | */ | |
ffe0d082 | 275 | if (dst_map == NULL) { |
175f072e KM |
276 | if (src_dealloc) |
277 | if (vm_deallocate(src_map, src_start, src_size) | |
278 | != KERN_SUCCESS) { | |
279 | printf("vm_move: deallocate of source"); | |
280 | printf(" failed, dealloc_only clause\n"); | |
281 | } | |
282 | return(0); | |
283 | } | |
284 | ||
285 | /* | |
286 | * Allocate a place to put the copy | |
287 | */ | |
288 | ||
289 | dst_start = (vm_offset_t) 0; | |
290 | if ((result = vm_allocate(dst_map, &dst_start, src_size, TRUE)) | |
291 | == KERN_SUCCESS) { | |
292 | /* | |
293 | * Perform the copy, asking for deallocation if desired | |
294 | */ | |
295 | result = vm_map_copy(dst_map, src_map, dst_start, src_size, | |
296 | src_start, FALSE, src_dealloc); | |
297 | } | |
298 | ||
299 | /* | |
300 | * Return the destination address corresponding to | |
301 | * the source address given (rather than the front | |
302 | * of the newly-allocated page). | |
303 | */ | |
304 | ||
305 | if (result == KERN_SUCCESS) | |
306 | return(dst_start + (src_addr - src_start)); | |
307 | return(0); | |
308 | } | |
309 | ||
310 | /* | |
311 | * Allocate wired-down memory in the kernel's address map for the higher | |
312 | * level kernel memory allocator (kern/kern_malloc.c). We cannot use | |
313 | * kmem_alloc() because we may need to allocate memory at interrupt | |
314 | * level where we cannot block (canwait == FALSE). | |
315 | * | |
316 | * This routine has its own private kernel submap (kmem_map) and object | |
317 | * (kmem_object). This, combined with the fact that only malloc uses | |
318 | * this routine, ensures that we will never block in map or object waits. | |
319 | * | |
320 | * Note that this still only works in a uni-processor environment and | |
321 | * when called at splhigh(). | |
322 | * | |
323 | * We don't worry about expanding the map (adding entries) since entries | |
324 | * for wired maps are statically allocated. | |
325 | */ | |
326 | vm_offset_t | |
327 | kmem_malloc(map, size, canwait) | |
328 | register vm_map_t map; | |
329 | register vm_size_t size; | |
330 | boolean_t canwait; | |
331 | { | |
332 | register vm_offset_t offset, i; | |
333 | vm_map_entry_t entry; | |
334 | vm_offset_t addr; | |
335 | vm_page_t m; | |
336 | extern vm_object_t kmem_object; | |
337 | ||
338 | if (map != kmem_map && map != mb_map) | |
339 | panic("kern_malloc_alloc: map != {kmem,mb}_map"); | |
340 | ||
341 | size = round_page(size); | |
342 | addr = vm_map_min(map); | |
343 | ||
ffe0d082 | 344 | if (vm_map_find(map, NULL, (vm_offset_t)0, |
edb12eaf MH |
345 | &addr, size, TRUE) != KERN_SUCCESS) { |
346 | if (canwait) | |
347 | panic("kmem_malloc: kmem_map too small"); | |
175f072e | 348 | return(0); |
edb12eaf | 349 | } |
175f072e KM |
350 | |
351 | /* | |
352 | * Since we didn't know where the new region would start, | |
353 | * we couldn't supply the correct offset into the kmem object. | |
354 | * Re-allocate that address region with the correct offset. | |
355 | */ | |
356 | offset = addr - vm_map_min(kmem_map); | |
357 | vm_object_reference(kmem_object); | |
358 | ||
359 | vm_map_lock(map); | |
360 | vm_map_delete(map, addr, addr + size); | |
361 | vm_map_insert(map, kmem_object, offset, addr, addr + size); | |
362 | ||
363 | /* | |
364 | * If we can wait, just mark the range as wired | |
365 | * (will fault pages as necessary). | |
366 | */ | |
367 | if (canwait) { | |
368 | vm_map_unlock(map); | |
369 | (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, | |
370 | FALSE); | |
371 | vm_map_simplify(map, addr); | |
372 | return(addr); | |
373 | } | |
374 | ||
375 | /* | |
376 | * If we cannot wait then we must allocate all memory up front, | |
377 | * pulling it off the active queue to prevent pageout. | |
378 | */ | |
379 | vm_object_lock(kmem_object); | |
380 | for (i = 0; i < size; i += PAGE_SIZE) { | |
381 | m = vm_page_alloc(kmem_object, offset + i); | |
382 | ||
383 | /* | |
384 | * Ran out of space, free everything up and return. | |
385 | * Don't need to lock page queues here as we know | |
386 | * that the pages we got aren't on any queues. | |
387 | */ | |
ffe0d082 | 388 | if (m == NULL) { |
175f072e KM |
389 | while (i != 0) { |
390 | i -= PAGE_SIZE; | |
391 | m = vm_page_lookup(kmem_object, offset + i); | |
392 | vm_page_free(m); | |
393 | } | |
394 | vm_object_unlock(kmem_object); | |
395 | vm_map_delete(map, addr, addr + size); | |
396 | vm_map_unlock(map); | |
397 | return(0); | |
398 | } | |
399 | #if 0 | |
400 | vm_page_zero_fill(m); | |
401 | #endif | |
402 | m->busy = FALSE; | |
403 | } | |
404 | vm_object_unlock(kmem_object); | |
405 | ||
406 | /* | |
407 | * Mark map entry as non-pageable. | |
408 | * Assert: vm_map_insert() will never be able to extend the previous | |
409 | * entry so there will be a new entry exactly corresponding to this | |
410 | * address range and it will have wired_count == 0. | |
411 | */ | |
412 | if (!vm_map_lookup_entry(map, addr, &entry) || | |
413 | entry->start != addr || entry->end != addr + size || | |
414 | entry->wired_count) | |
415 | panic("kmem_malloc: entry not found or misaligned"); | |
416 | entry->wired_count++; | |
417 | ||
418 | /* | |
419 | * Loop thru pages, entering them in the pmap. | |
420 | * (We cannot add them to the wired count without | |
421 | * wrapping the vm_page_queue_lock in splimp...) | |
422 | */ | |
423 | for (i = 0; i < size; i += PAGE_SIZE) { | |
424 | vm_object_lock(kmem_object); | |
425 | m = vm_page_lookup(kmem_object, offset + i); | |
426 | vm_object_unlock(kmem_object); | |
427 | pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), | |
428 | VM_PROT_DEFAULT, TRUE); | |
429 | } | |
430 | vm_map_unlock(map); | |
431 | ||
432 | vm_map_simplify(map, addr); | |
433 | return(addr); | |
434 | } | |
435 | ||
436 | /* | |
437 | * kmem_alloc_wait | |
438 | * | |
439 | * Allocates pageable memory from a sub-map of the kernel. If the submap | |
440 | * has no room, the caller sleeps waiting for more memory in the submap. | |
441 | * | |
442 | */ | |
443 | vm_offset_t kmem_alloc_wait(map, size) | |
444 | vm_map_t map; | |
445 | vm_size_t size; | |
446 | { | |
447 | vm_offset_t addr; | |
448 | int result; | |
449 | ||
450 | size = round_page(size); | |
451 | ||
452 | do { | |
453 | /* | |
454 | * To make this work for more than one map, | |
455 | * use the map's lock to lock out sleepers/wakers. | |
456 | * Unfortunately, vm_map_find also grabs the map lock. | |
457 | */ | |
458 | vm_map_lock(map); | |
459 | lock_set_recursive(&map->lock); | |
460 | ||
461 | addr = vm_map_min(map); | |
ffe0d082 | 462 | result = vm_map_find(map, NULL, (vm_offset_t) 0, |
175f072e KM |
463 | &addr, size, TRUE); |
464 | ||
465 | lock_clear_recursive(&map->lock); | |
466 | if (result != KERN_SUCCESS) { | |
467 | ||
468 | if ( (vm_map_max(map) - vm_map_min(map)) < size ) { | |
469 | vm_map_unlock(map); | |
470 | return(0); | |
471 | } | |
472 | ||
473 | assert_wait((int)map, TRUE); | |
474 | vm_map_unlock(map); | |
475 | thread_block(); | |
476 | } | |
477 | else { | |
478 | vm_map_unlock(map); | |
479 | } | |
480 | ||
481 | } while (result != KERN_SUCCESS); | |
482 | ||
483 | return(addr); | |
484 | } | |
485 | ||
486 | /* | |
487 | * kmem_free_wakeup | |
488 | * | |
489 | * Returns memory to a submap of the kernel, and wakes up any threads | |
490 | * waiting for memory in that map. | |
491 | */ | |
492 | void kmem_free_wakeup(map, addr, size) | |
493 | vm_map_t map; | |
494 | vm_offset_t addr; | |
495 | vm_size_t size; | |
496 | { | |
497 | vm_map_lock(map); | |
498 | (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); | |
499 | thread_wakeup((int)map); | |
500 | vm_map_unlock(map); | |
501 | } | |
502 | ||
503 | /* | |
504 | * kmem_init: | |
505 | * | |
506 | * Initialize the kernel's virtual memory map, taking | |
507 | * into account all memory allocated up to this time. | |
508 | */ | |
509 | void kmem_init(start, end) | |
510 | vm_offset_t start; | |
511 | vm_offset_t end; | |
512 | { | |
513 | vm_offset_t addr; | |
514 | extern vm_map_t kernel_map; | |
515 | ||
516 | addr = VM_MIN_KERNEL_ADDRESS; | |
517 | kernel_map = vm_map_create(pmap_kernel(), addr, end, FALSE); | |
ffe0d082 | 518 | (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, |
175f072e KM |
519 | &addr, (start - VM_MIN_KERNEL_ADDRESS), |
520 | FALSE); | |
521 | } |