Commit | Line | Data |
---|---|---|
06c5935e WJ |
1 | /* |
2 | * Copyright (c) 1991 Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * The Mach Operating System project at Carnegie-Mellon University. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | * | |
36 | * @(#)vm_glue.c 7.8 (Berkeley) 5/15/91 | |
37 | * | |
38 | * | |
39 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
40 | * All rights reserved. | |
41 | * | |
42 | * Permission to use, copy, modify and distribute this software and | |
43 | * its documentation is hereby granted, provided that both the copyright | |
44 | * notice and this permission notice appear in all copies of the | |
45 | * software, derivative works or modified versions, and any portions | |
46 | * thereof, and that both notices appear in supporting documentation. | |
47 | * | |
48 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
49 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
50 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
51 | * | |
52 | * Carnegie Mellon requests users of this software to return to | |
53 | * | |
54 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
55 | * School of Computer Science | |
56 | * Carnegie Mellon University | |
57 | * Pittsburgh PA 15213-3890 | |
58 | * | |
59 | * any improvements or extensions that they make and grant Carnegie the | |
60 | * rights to redistribute these changes. | |
61 | */ | |
62 | static char rcsid[] = "$Header: /usr/bill/working/sys/vm/RCS/vm_glue.c,v 1.2 92/01/21 21:58:21 william Exp $"; | |
63 | ||
64 | #include "param.h" | |
65 | #include "systm.h" | |
66 | #include "proc.h" | |
67 | #include "resourcevar.h" | |
68 | #include "buf.h" | |
69 | #include "user.h" | |
70 | ||
71 | #include "vm.h" | |
72 | #include "vm_page.h" | |
73 | #include "vm_kern.h" | |
74 | ||
75 | int avefree = 0; /* XXX */ | |
76 | unsigned maxdmap = MAXDSIZ; /* XXX */ | |
77 | int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ | |
78 | ||
79 | kernacc(addr, len, rw) | |
80 | caddr_t addr; | |
81 | int len, rw; | |
82 | { | |
83 | boolean_t rv; | |
84 | vm_offset_t saddr, eaddr; | |
85 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
86 | ||
87 | saddr = trunc_page(addr); | |
88 | eaddr = round_page(addr+len-1); | |
89 | rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); | |
90 | /* | |
91 | * XXX there are still some things (e.g. the buffer cache) that | |
92 | * are managed behind the VM system's back so even though an | |
93 | * address is accessible in the mind of the VM system, there may | |
94 | * not be physical pages where the VM thinks there is. This can | |
95 | * lead to bogus allocation of pages in the kernel address space | |
96 | * or worse, inconsistencies at the pmap level. We only worry | |
97 | * about the buffer cache for now. | |
98 | */ | |
99 | if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers && | |
100 | saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf)) | |
101 | rv = FALSE; | |
102 | return(rv == TRUE); | |
103 | } | |
104 | ||
105 | useracc(addr, len, rw) | |
106 | caddr_t addr; | |
107 | int len, rw; | |
108 | { | |
109 | boolean_t rv; | |
110 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
111 | ||
112 | rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, | |
113 | trunc_page(addr), round_page(addr+len-1), prot); | |
114 | return(rv == TRUE); | |
115 | } | |
116 | ||
117 | #ifdef KGDB | |
118 | /* | |
119 | * Change protections on kernel pages from addr to addr+len | |
120 | * (presumably so debugger can plant a breakpoint). | |
121 | * All addresses are assumed to reside in the Sysmap, | |
122 | */ | |
123 | chgkprot(addr, len, rw) | |
124 | register caddr_t addr; | |
125 | int len, rw; | |
126 | { | |
127 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
128 | ||
129 | vm_map_protect(kernel_map, trunc_page(addr), | |
130 | round_page(addr+len-1), prot, FALSE); | |
131 | } | |
132 | #endif | |
133 | ||
134 | vslock(addr, len) | |
135 | caddr_t addr; | |
136 | u_int len; | |
137 | { | |
138 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), | |
139 | round_page(addr+len-1), FALSE); | |
140 | } | |
141 | ||
142 | vsunlock(addr, len, dirtied) | |
143 | caddr_t addr; | |
144 | u_int len; | |
145 | int dirtied; | |
146 | { | |
147 | #ifdef lint | |
148 | dirtied++; | |
149 | #endif lint | |
150 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), | |
151 | round_page(addr+len-1), TRUE); | |
152 | } | |
153 | ||
154 | /* | |
155 | * Implement fork's actions on an address space. | |
156 | * Here we arrange for the address space to be copied or referenced, | |
157 | * allocate a user struct (pcb and kernel stack), then call the | |
158 | * machine-dependent layer to fill those in and make the new process | |
159 | * ready to run. | |
160 | * NOTE: the kernel stack may be at a different location in the child | |
161 | * process, and thus addresses of automatic variables may be invalid | |
162 | * after cpu_fork returns in the child process. We do nothing here | |
163 | * after cpu_fork returns. | |
164 | */ | |
165 | vm_fork(p1, p2, isvfork) | |
166 | register struct proc *p1, *p2; | |
167 | int isvfork; | |
168 | { | |
169 | register struct user *up; | |
170 | vm_offset_t addr; | |
171 | ||
172 | #ifdef i386 | |
173 | /* | |
174 | * avoid copying any of the parent's pagetables or other per-process | |
175 | * objects that reside in the map by marking all of them non-inheritable | |
176 | */ | |
177 | (void)vm_map_inherit(&p1->p_vmspace->vm_map, | |
178 | UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); | |
179 | #endif | |
180 | p2->p_vmspace = vmspace_fork(p1->p_vmspace); | |
181 | ||
182 | #ifdef SYSVSHM | |
183 | if (p1->p_vmspace->vm_shm) | |
184 | shmfork(p1, p2, isvfork); | |
185 | #endif | |
186 | ||
187 | /* | |
188 | * Allocate a wired-down (for now) pcb and kernel stack for the process | |
189 | */ | |
190 | #ifdef notyet | |
191 | addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES)); | |
192 | vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE); | |
193 | #else | |
194 | addr = kmem_alloc(kernel_map, ctob(UPAGES)); | |
195 | #endif | |
196 | up = (struct user *)addr; | |
197 | p2->p_addr = up; | |
198 | ||
199 | /* | |
200 | * p_stats and p_sigacts currently point at fields | |
201 | * in the user struct but not at &u, instead at p_addr. | |
202 | * Copy p_sigacts and parts of p_stats; zero the rest | |
203 | * of p_stats (statistics). | |
204 | */ | |
205 | p2->p_stats = &up->u_stats; | |
206 | p2->p_sigacts = &up->u_sigacts; | |
207 | up->u_sigacts = *p1->p_sigacts; | |
208 | bzero(&up->u_stats.pstat_startzero, | |
209 | (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - | |
210 | (caddr_t)&up->u_stats.pstat_startzero)); | |
211 | bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, | |
212 | ((caddr_t)&up->u_stats.pstat_endcopy - | |
213 | (caddr_t)&up->u_stats.pstat_startcopy)); | |
214 | ||
215 | #ifdef i386 | |
216 | { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp; | |
217 | ||
218 | vp = &p2->p_vmspace->vm_map; | |
219 | ||
220 | /* ream out old pagetables and kernel stack */ | |
221 | (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); | |
222 | (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); | |
223 | } | |
224 | #endif | |
225 | /* | |
226 | * cpu_fork will copy and update the kernel stack and pcb, | |
227 | * and make the child ready to run. It marks the child | |
228 | * so that it can return differently than the parent. | |
229 | * It returns twice, once in the parent process and | |
230 | * once in the child. | |
231 | */ | |
232 | return (cpu_fork(p1, p2)); | |
233 | } | |
234 | ||
235 | /* | |
236 | * Set default limits for VM system. | |
237 | * Called for proc 0, and then inherited by all others. | |
238 | */ | |
239 | vm_init_limits(p) | |
240 | register struct proc *p; | |
241 | { | |
242 | ||
243 | /* | |
244 | * Set up the initial limits on process VM. | |
245 | * Set the maximum resident set size to be all | |
246 | * of (reasonably) available memory. This causes | |
247 | * any single, large process to start random page | |
248 | * replacement once it fills memory. | |
249 | */ | |
250 | p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; | |
251 | p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; | |
252 | p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; | |
253 | p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; | |
254 | p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max = | |
255 | ptoa(vm_page_free_count); | |
256 | } | |
257 | ||
258 | #include "../vm/vm_pageout.h" | |
259 | ||
260 | #ifdef DEBUG | |
261 | int enableswap = 1; | |
262 | int swapdebug = 0; | |
263 | #define SDB_FOLLOW 1 | |
264 | #define SDB_SWAPIN 2 | |
265 | #define SDB_SWAPOUT 4 | |
266 | #endif | |
267 | ||
268 | /* | |
269 | * Brutally simple: | |
270 | * 1. Attempt to swapin every swaped-out, runnable process in | |
271 | * order of priority. | |
272 | * 2. If not enough memory, wake the pageout daemon and let it | |
273 | * clear some space. | |
274 | */ | |
275 | sched() | |
276 | { | |
277 | register struct proc *p; | |
278 | register int pri; | |
279 | struct proc *pp; | |
280 | int ppri; | |
281 | vm_offset_t addr; | |
282 | vm_size_t size; | |
283 | ||
284 | loop: | |
285 | #ifdef DEBUG | |
286 | if (!enableswap) { | |
287 | pp = NULL; | |
288 | goto noswap; | |
289 | } | |
290 | #endif | |
291 | pp = NULL; | |
292 | ppri = INT_MIN; | |
293 | for (p = allproc; p != NULL; p = p->p_nxt) | |
294 | if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) { | |
295 | pri = p->p_time + p->p_slptime - p->p_nice * 8; | |
296 | if (pri > ppri) { | |
297 | pp = p; | |
298 | ppri = pri; | |
299 | } | |
300 | } | |
301 | #ifdef DEBUG | |
302 | if (swapdebug & SDB_FOLLOW) | |
303 | printf("sched: running, procp %x pri %d\n", pp, ppri); | |
304 | noswap: | |
305 | #endif | |
306 | /* | |
307 | * Nothing to do, back to sleep | |
308 | */ | |
309 | if ((p = pp) == NULL) { | |
310 | sleep((caddr_t)&proc0, PVM); | |
311 | goto loop; | |
312 | } | |
313 | ||
314 | /* | |
315 | * We would like to bring someone in. | |
316 | * This part is really bogus cuz we could deadlock on memory | |
317 | * despite our feeble check. | |
318 | */ | |
319 | size = round_page(ctob(UPAGES)); | |
320 | addr = (vm_offset_t) p->p_addr; | |
321 | if (vm_page_free_count > atop(size)) { | |
322 | #ifdef DEBUG | |
323 | if (swapdebug & SDB_SWAPIN) | |
324 | printf("swapin: pid %d(%s)@%x, pri %d free %d\n", | |
325 | p->p_pid, p->p_comm, p->p_addr, | |
326 | ppri, vm_page_free_count); | |
327 | #endif | |
328 | vm_map_pageable(kernel_map, addr, addr+size, FALSE); | |
329 | (void) splclock(); | |
330 | if (p->p_stat == SRUN) | |
331 | setrq(p); | |
332 | p->p_flag |= SLOAD; | |
333 | (void) spl0(); | |
334 | p->p_time = 0; | |
335 | goto loop; | |
336 | } | |
337 | /* | |
338 | * Not enough memory, jab the pageout daemon and wait til the | |
339 | * coast is clear. | |
340 | */ | |
341 | #ifdef DEBUG | |
342 | if (swapdebug & SDB_FOLLOW) | |
343 | printf("sched: no room for pid %d(%s), free %d\n", | |
344 | p->p_pid, p->p_comm, vm_page_free_count); | |
345 | #endif | |
346 | (void) splhigh(); | |
347 | VM_WAIT; | |
348 | (void) spl0(); | |
349 | #ifdef DEBUG | |
350 | if (swapdebug & SDB_FOLLOW) | |
351 | printf("sched: room again, free %d\n", vm_page_free_count); | |
352 | #endif | |
353 | goto loop; | |
354 | } | |
355 | ||
356 | #define swappable(p) \ | |
357 | (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) | |
358 | ||
359 | /* | |
360 | * Swapout is driven by the pageout daemon. Very simple, we find eligible | |
361 | * procs and unwire their u-areas. We try to always "swap" at least one | |
362 | * process in case we need the room for a swapin. | |
363 | * If any procs have been sleeping/stopped for at least maxslp seconds, | |
364 | * they are swapped. Else, we swap the longest-sleeping or stopped process, | |
365 | * if any, otherwise the longest-resident process. | |
366 | */ | |
367 | swapout_threads() | |
368 | { | |
369 | register struct proc *p; | |
370 | struct proc *outp, *outp2; | |
371 | int outpri, outpri2; | |
372 | int didswap = 0; | |
373 | extern int maxslp; | |
374 | ||
375 | #ifdef DEBUG | |
376 | if (!enableswap) | |
377 | return; | |
378 | #endif | |
379 | outp = outp2 = NULL; | |
380 | outpri = outpri2 = 0; | |
381 | for (p = allproc; p != NULL; p = p->p_nxt) { | |
382 | if (!swappable(p)) | |
383 | continue; | |
384 | switch (p->p_stat) { | |
385 | case SRUN: | |
386 | if (p->p_time > outpri2) { | |
387 | outp2 = p; | |
388 | outpri2 = p->p_time; | |
389 | } | |
390 | continue; | |
391 | ||
392 | case SSLEEP: | |
393 | case SSTOP: | |
394 | if (p->p_slptime > maxslp) { | |
395 | swapout(p); | |
396 | didswap++; | |
397 | } else if (p->p_slptime > outpri) { | |
398 | outp = p; | |
399 | outpri = p->p_slptime; | |
400 | } | |
401 | continue; | |
402 | } | |
403 | } | |
404 | /* | |
405 | * If we didn't get rid of any real duds, toss out the next most | |
406 | * likely sleeping/stopped or running candidate. We only do this | |
407 | * if we are real low on memory since we don't gain much by doing | |
408 | * it (UPAGES pages). | |
409 | */ | |
410 | if (didswap == 0 && | |
411 | vm_page_free_count <= atop(round_page(ctob(UPAGES)))) { | |
412 | if ((p = outp) == 0) | |
413 | p = outp2; | |
414 | #ifdef DEBUG | |
415 | if (swapdebug & SDB_SWAPOUT) | |
416 | printf("swapout_threads: no duds, try procp %x\n", p); | |
417 | #endif | |
418 | if (p) | |
419 | swapout(p); | |
420 | } | |
421 | } | |
422 | ||
423 | swapout(p) | |
424 | register struct proc *p; | |
425 | { | |
426 | vm_offset_t addr; | |
427 | vm_size_t size; | |
428 | ||
429 | #ifdef DEBUG | |
430 | if (swapdebug & SDB_SWAPOUT) | |
431 | printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n", | |
432 | p->p_pid, p->p_comm, p->p_addr, p->p_stat, | |
433 | p->p_slptime, vm_page_free_count); | |
434 | #endif | |
435 | size = round_page(ctob(UPAGES)); | |
436 | addr = (vm_offset_t) p->p_addr; | |
437 | #ifdef notyet | |
438 | #ifdef hp300 | |
439 | /* | |
440 | * Ugh! u-area is double mapped to a fixed address behind the | |
441 | * back of the VM system and accesses are usually through that | |
442 | * address rather than the per-process address. Hence reference | |
443 | * and modify information are recorded at the fixed address and | |
444 | * lost at context switch time. We assume the u-struct and | |
445 | * kernel stack are always accessed/modified and force it to be so. | |
446 | */ | |
447 | { | |
448 | register int i; | |
449 | volatile long tmp; | |
450 | ||
451 | for (i = 0; i < UPAGES; i++) { | |
452 | tmp = *(long *)addr; *(long *)addr = tmp; | |
453 | addr += NBPG; | |
454 | } | |
455 | addr = (vm_offset_t) p->p_addr; | |
456 | } | |
457 | #endif | |
458 | vm_map_pageable(kernel_map, addr, addr+size, TRUE); | |
459 | pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); | |
460 | #endif | |
461 | (void) splhigh(); | |
462 | p->p_flag &= ~SLOAD; | |
463 | if (p->p_stat == SRUN) | |
464 | remrq(p); | |
465 | (void) spl0(); | |
466 | p->p_time = 0; | |
467 | } | |
468 | ||
469 | /* | |
470 | * The rest of these routines fake thread handling | |
471 | */ | |
472 | ||
473 | void | |
474 | assert_wait(event, ruptible) | |
475 | int event; | |
476 | boolean_t ruptible; | |
477 | { | |
478 | #ifdef lint | |
479 | ruptible++; | |
480 | #endif | |
481 | curproc->p_thread = event; | |
482 | } | |
483 | ||
484 | void | |
485 | thread_block() | |
486 | { | |
487 | int s = splhigh(); | |
488 | ||
489 | if (curproc->p_thread) | |
490 | sleep((caddr_t)curproc->p_thread, PVM); | |
491 | splx(s); | |
492 | } | |
493 | ||
494 | thread_sleep(event, lock, ruptible) | |
495 | int event; | |
496 | simple_lock_t lock; | |
497 | boolean_t ruptible; | |
498 | { | |
499 | #ifdef lint | |
500 | ruptible++; | |
501 | #endif | |
502 | int s = splhigh(); | |
503 | ||
504 | curproc->p_thread = event; | |
505 | simple_unlock(lock); | |
506 | if (curproc->p_thread) | |
507 | sleep((caddr_t)event, PVM); | |
508 | splx(s); | |
509 | } | |
510 | ||
511 | thread_wakeup(event) | |
512 | int event; | |
513 | { | |
514 | int s = splhigh(); | |
515 | ||
516 | wakeup((caddr_t)event); | |
517 | splx(s); | |
518 | } | |
519 | ||
520 | /* | |
521 | * DEBUG stuff | |
522 | */ | |
523 | ||
524 | int indent = 0; | |
525 | ||
526 | /*ARGSUSED2*/ | |
527 | iprintf(a, b, c, d, e, f, g, h) | |
528 | char *a; | |
529 | { | |
530 | register int i; | |
531 | ||
532 | i = indent; | |
533 | while (i >= 8) { | |
534 | printf("\t"); | |
535 | i -= 8; | |
536 | } | |
537 | for (; i > 0; --i) | |
538 | printf(" "); | |
539 | printf(a, b, c, d, e, f, g, h); | |
540 | } |