Commit | Line | Data |
---|---|---|
175f072e | 1 | /* |
175f072e KM |
2 | * Copyright (c) 1991 Regents of the University of California. |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * The Mach Operating System project at Carnegie-Mellon University. | |
7 | * | |
0e24ad83 | 8 | * %sccs.include.redist.c% |
175f072e | 9 | * |
8edfaa67 | 10 | * @(#)vm_glue.c 7.9 (Berkeley) %G% |
0e24ad83 KM |
11 | * |
12 | * | |
13 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
14 | * All rights reserved. | |
15 | * | |
16 | * Permission to use, copy, modify and distribute this software and | |
17 | * its documentation is hereby granted, provided that both the copyright | |
18 | * notice and this permission notice appear in all copies of the | |
19 | * software, derivative works or modified versions, and any portions | |
20 | * thereof, and that both notices appear in supporting documentation. | |
21 | * | |
22 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
23 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
24 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
25 | * | |
26 | * Carnegie Mellon requests users of this software to return to | |
27 | * | |
28 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
29 | * School of Computer Science | |
30 | * Carnegie Mellon University | |
31 | * Pittsburgh PA 15213-3890 | |
32 | * | |
33 | * any improvements or extensions that they make and grant Carnegie the | |
34 | * rights to redistribute these changes. | |
175f072e KM |
35 | */ |
36 | ||
37 | #include "param.h" | |
38 | #include "systm.h" | |
175f072e | 39 | #include "proc.h" |
ed5c84ba | 40 | #include "resourcevar.h" |
175f072e | 41 | #include "buf.h" |
ed5c84ba | 42 | #include "user.h" |
175f072e | 43 | |
ed5c84ba MK |
44 | #include "vm.h" |
45 | #include "vm_page.h" | |
46 | #include "vm_kern.h" | |
175f072e KM |
47 | |
48 | int avefree = 0; /* XXX */ | |
49 | unsigned maxdmap = MAXDSIZ; /* XXX */ | |
c3385412 | 50 | int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ |
175f072e KM |
51 | |
52 | kernacc(addr, len, rw) | |
53 | caddr_t addr; | |
54 | int len, rw; | |
55 | { | |
56 | boolean_t rv; | |
165f38d6 | 57 | vm_offset_t saddr, eaddr; |
175f072e KM |
58 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; |
59 | ||
165f38d6 MH |
60 | saddr = trunc_page(addr); |
61 | eaddr = round_page(addr+len-1); | |
62 | rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); | |
63 | /* | |
64 | * XXX there are still some things (e.g. the buffer cache) that | |
65 | * are managed behind the VM system's back so even though an | |
66 | * address is accessible in the mind of the VM system, there may | |
67 | * not be physical pages where the VM thinks there is. This can | |
68 | * lead to bogus allocation of pages in the kernel address space | |
69 | * or worse, inconsistencies at the pmap level. We only worry | |
70 | * about the buffer cache for now. | |
71 | */ | |
c3385412 | 72 | if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers && |
db341dbf | 73 | saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf)) |
165f38d6 | 74 | rv = FALSE; |
175f072e KM |
75 | return(rv == TRUE); |
76 | } | |
77 | ||
78 | useracc(addr, len, rw) | |
79 | caddr_t addr; | |
80 | int len, rw; | |
81 | { | |
82 | boolean_t rv; | |
83 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
84 | ||
ed5c84ba MK |
85 | rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, |
86 | trunc_page(addr), round_page(addr+len-1), prot); | |
175f072e KM |
87 | return(rv == TRUE); |
88 | } | |
89 | ||
90 | #ifdef KGDB | |
91 | /* | |
9dd0b816 | 92 | * Change protections on kernel pages from addr to addr+len |
175f072e KM |
93 | * (presumably so debugger can plant a breakpoint). |
94 | * All addresses are assumed to reside in the Sysmap, | |
95 | */ | |
96 | chgkprot(addr, len, rw) | |
97 | register caddr_t addr; | |
98 | int len, rw; | |
99 | { | |
100 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
101 | ||
102 | vm_map_protect(kernel_map, trunc_page(addr), | |
103 | round_page(addr+len-1), prot, FALSE); | |
104 | } | |
105 | #endif | |
106 | ||
107 | vslock(addr, len) | |
108 | caddr_t addr; | |
109 | u_int len; | |
110 | { | |
ed5c84ba | 111 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), |
175f072e KM |
112 | round_page(addr+len-1), FALSE); |
113 | } | |
114 | ||
115 | vsunlock(addr, len, dirtied) | |
116 | caddr_t addr; | |
117 | u_int len; | |
118 | int dirtied; | |
119 | { | |
120 | #ifdef lint | |
121 | dirtied++; | |
122 | #endif lint | |
ed5c84ba | 123 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), |
175f072e KM |
124 | round_page(addr+len-1), TRUE); |
125 | } | |
126 | ||
9dd0b816 MK |
127 | /* |
128 | * Implement fork's actions on an address space. | |
129 | * Here we arrange for the address space to be copied or referenced, | |
130 | * allocate a user struct (pcb and kernel stack), then call the | |
131 | * machine-dependent layer to fill those in and make the new process | |
132 | * ready to run. | |
133 | * NOTE: the kernel stack may be at a different location in the child | |
134 | * process, and thus addresses of automatic variables may be invalid | |
135 | * after cpu_fork returns in the child process. We do nothing here | |
136 | * after cpu_fork returns. | |
137 | */ | |
ed5c84ba MK |
138 | vm_fork(p1, p2, isvfork) |
139 | register struct proc *p1, *p2; | |
175f072e KM |
140 | int isvfork; |
141 | { | |
142 | register struct user *up; | |
143 | vm_offset_t addr; | |
175f072e | 144 | |
1e1f624c WN |
145 | #ifdef i386 |
146 | /* | |
147 | * avoid copying any of the parent's pagetables or other per-process | |
148 | * objects that reside in the map by marking all of them non-inheritable | |
149 | */ | |
150 | (void)vm_map_inherit(&p1->p_vmspace->vm_map, | |
151 | UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); | |
152 | #endif | |
ed5c84ba MK |
153 | p2->p_vmspace = vmspace_fork(p1->p_vmspace); |
154 | ||
155 | #ifdef SYSVSHM | |
156 | if (p1->p_vmspace->vm_shm) | |
157 | shmfork(p1, p2, isvfork); | |
175f072e | 158 | #endif |
ed5c84ba | 159 | |
175f072e | 160 | /* |
9dd0b816 | 161 | * Allocate a wired-down (for now) pcb and kernel stack for the process |
175f072e | 162 | */ |
9dd0b816 MK |
163 | addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES)); |
164 | vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE); | |
175f072e | 165 | up = (struct user *)addr; |
9dd0b816 | 166 | p2->p_addr = up; |
175f072e | 167 | |
ed5c84ba MK |
168 | /* |
169 | * p_stats and p_sigacts currently point at fields | |
170 | * in the user struct but not at &u, instead at p_addr. | |
9dd0b816 MK |
171 | * Copy p_sigacts and parts of p_stats; zero the rest |
172 | * of p_stats (statistics). | |
175f072e | 173 | */ |
9dd0b816 MK |
174 | p2->p_stats = &up->u_stats; |
175 | p2->p_sigacts = &up->u_sigacts; | |
176 | up->u_sigacts = *p1->p_sigacts; | |
177 | bzero(&up->u_stats.pstat_startzero, | |
178 | (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - | |
179 | (caddr_t)&up->u_stats.pstat_startzero)); | |
180 | bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, | |
181 | ((caddr_t)&up->u_stats.pstat_endcopy - | |
182 | (caddr_t)&up->u_stats.pstat_startcopy)); | |
175f072e | 183 | |
165f38d6 | 184 | #ifdef i386 |
165f38d6 MH |
185 | { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp; |
186 | ||
187 | vp = &p2->p_vmspace->vm_map; | |
188 | (void)vm_map_pageable(vp, addr, 0xfe000000 - addr, TRUE); | |
189 | (void)vm_deallocate(vp, addr, 0xfe000000 - addr); | |
190 | (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); | |
191 | (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE); | |
192 | } | |
193 | #endif | |
175f072e | 194 | /* |
9dd0b816 MK |
195 | * cpu_fork will copy and update the kernel stack and pcb, |
196 | * and make the child ready to run. It marks the child | |
197 | * so that it can return differently than the parent. | |
198 | * It returns twice, once in the parent process and | |
199 | * once in the child. | |
175f072e | 200 | */ |
9dd0b816 | 201 | return (cpu_fork(p1, p2)); |
175f072e KM |
202 | } |
203 | ||
204 | /* | |
ed5c84ba MK |
205 | * Set default limits for VM system. |
206 | * Called for proc 0, and then inherited by all others. | |
175f072e | 207 | */ |
ed5c84ba MK |
208 | vm_init_limits(p) |
209 | register struct proc *p; | |
175f072e | 210 | { |
ed5c84ba | 211 | |
175f072e KM |
212 | /* |
213 | * Set up the initial limits on process VM. | |
214 | * Set the maximum resident set size to be all | |
215 | * of (reasonably) available memory. This causes | |
216 | * any single, large process to start random page | |
217 | * replacement once it fills memory. | |
218 | */ | |
ed5c84ba MK |
219 | p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; |
220 | p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; | |
221 | p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; | |
222 | p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; | |
223 | p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max = | |
8edfaa67 | 224 | ptoa(vm_stat.free_count); |
175f072e KM |
225 | } |
226 | ||
227 | #include "../vm/vm_pageout.h" | |
228 | ||
229 | #ifdef DEBUG | |
230 | int enableswap = 1; | |
231 | int swapdebug = 0; | |
232 | #define SDB_FOLLOW 1 | |
233 | #define SDB_SWAPIN 2 | |
234 | #define SDB_SWAPOUT 4 | |
235 | #endif | |
236 | ||
237 | /* | |
238 | * Brutally simple: | |
239 | * 1. Attempt to swapin every swaped-out, runnable process in | |
240 | * order of priority. | |
241 | * 2. If not enough memory, wake the pageout daemon and let it | |
242 | * clear some space. | |
243 | */ | |
244 | sched() | |
245 | { | |
ed5c84ba MK |
246 | register struct proc *p; |
247 | register int pri; | |
248 | struct proc *pp; | |
249 | int ppri; | |
175f072e KM |
250 | vm_offset_t addr; |
251 | vm_size_t size; | |
252 | ||
253 | loop: | |
254 | #ifdef DEBUG | |
255 | if (!enableswap) { | |
ed5c84ba | 256 | pp = NULL; |
175f072e KM |
257 | goto noswap; |
258 | } | |
259 | #endif | |
ed5c84ba MK |
260 | pp = NULL; |
261 | ppri = INT_MIN; | |
262 | for (p = allproc; p != NULL; p = p->p_nxt) | |
263 | if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) { | |
264 | pri = p->p_time + p->p_slptime - p->p_nice * 8; | |
265 | if (pri > ppri) { | |
266 | pp = p; | |
267 | ppri = pri; | |
175f072e KM |
268 | } |
269 | } | |
270 | #ifdef DEBUG | |
271 | if (swapdebug & SDB_FOLLOW) | |
ed5c84ba | 272 | printf("sched: running, procp %x pri %d\n", pp, ppri); |
175f072e KM |
273 | noswap: |
274 | #endif | |
275 | /* | |
276 | * Nothing to do, back to sleep | |
277 | */ | |
ed5c84ba MK |
278 | if ((p = pp) == NULL) { |
279 | sleep((caddr_t)&proc0, PVM); | |
175f072e KM |
280 | goto loop; |
281 | } | |
ed5c84ba | 282 | |
175f072e KM |
283 | /* |
284 | * We would like to bring someone in. | |
285 | * This part is really bogus cuz we could deadlock on memory | |
286 | * despite our feeble check. | |
287 | */ | |
288 | size = round_page(ctob(UPAGES)); | |
ed5c84ba | 289 | addr = (vm_offset_t) p->p_addr; |
8edfaa67 | 290 | if (vm_stat.free_count > atop(size)) { |
175f072e KM |
291 | #ifdef DEBUG |
292 | if (swapdebug & SDB_SWAPIN) | |
293 | printf("swapin: pid %d(%s)@%x, pri %d free %d\n", | |
ed5c84ba | 294 | p->p_pid, p->p_comm, p->p_addr, |
8edfaa67 | 295 | ppri, vm_stat.free_count); |
175f072e KM |
296 | #endif |
297 | vm_map_pageable(kernel_map, addr, addr+size, FALSE); | |
298 | (void) splclock(); | |
ed5c84ba MK |
299 | if (p->p_stat == SRUN) |
300 | setrq(p); | |
301 | p->p_flag |= SLOAD; | |
175f072e | 302 | (void) spl0(); |
ed5c84ba | 303 | p->p_time = 0; |
175f072e KM |
304 | goto loop; |
305 | } | |
306 | /* | |
307 | * Not enough memory, jab the pageout daemon and wait til the | |
308 | * coast is clear. | |
309 | */ | |
310 | #ifdef DEBUG | |
311 | if (swapdebug & SDB_FOLLOW) | |
312 | printf("sched: no room for pid %d(%s), free %d\n", | |
8edfaa67 | 313 | p->p_pid, p->p_comm, vm_stat.free_count); |
175f072e KM |
314 | #endif |
315 | (void) splhigh(); | |
316 | VM_WAIT; | |
317 | (void) spl0(); | |
318 | #ifdef DEBUG | |
319 | if (swapdebug & SDB_FOLLOW) | |
8edfaa67 | 320 | printf("sched: room again, free %d\n", vm_stat.free_count); |
175f072e KM |
321 | #endif |
322 | goto loop; | |
323 | } | |
324 | ||
325 | #define swappable(p) \ | |
ed5c84ba | 326 | (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) |
175f072e KM |
327 | |
328 | /* | |
329 | * Swapout is driven by the pageout daemon. Very simple, we find eligible | |
330 | * procs and unwire their u-areas. We try to always "swap" at least one | |
331 | * process in case we need the room for a swapin. | |
ed5c84ba MK |
332 | * If any procs have been sleeping/stopped for at least maxslp seconds, |
333 | * they are swapped. Else, we swap the longest-sleeping or stopped process, | |
334 | * if any, otherwise the longest-resident process. | |
175f072e KM |
335 | */ |
336 | swapout_threads() | |
337 | { | |
ed5c84ba | 338 | register struct proc *p; |
175f072e KM |
339 | struct proc *outp, *outp2; |
340 | int outpri, outpri2; | |
341 | int didswap = 0; | |
342 | extern int maxslp; | |
343 | ||
344 | #ifdef DEBUG | |
345 | if (!enableswap) | |
346 | return; | |
347 | #endif | |
348 | outp = outp2 = NULL; | |
ed5c84ba MK |
349 | outpri = outpri2 = 0; |
350 | for (p = allproc; p != NULL; p = p->p_nxt) { | |
351 | if (!swappable(p)) | |
175f072e | 352 | continue; |
ed5c84ba | 353 | switch (p->p_stat) { |
175f072e | 354 | case SRUN: |
ed5c84ba MK |
355 | if (p->p_time > outpri2) { |
356 | outp2 = p; | |
357 | outpri2 = p->p_time; | |
175f072e KM |
358 | } |
359 | continue; | |
360 | ||
361 | case SSLEEP: | |
362 | case SSTOP: | |
ed5c84ba MK |
363 | if (p->p_slptime > maxslp) { |
364 | swapout(p); | |
175f072e | 365 | didswap++; |
ed5c84ba MK |
366 | } else if (p->p_slptime > outpri) { |
367 | outp = p; | |
368 | outpri = p->p_slptime; | |
175f072e KM |
369 | } |
370 | continue; | |
371 | } | |
372 | } | |
373 | /* | |
374 | * If we didn't get rid of any real duds, toss out the next most | |
375 | * likely sleeping/stopped or running candidate. We only do this | |
376 | * if we are real low on memory since we don't gain much by doing | |
377 | * it (UPAGES pages). | |
378 | */ | |
379 | if (didswap == 0 && | |
8edfaa67 | 380 | vm_stat.free_count <= atop(round_page(ctob(UPAGES)))) { |
ed5c84ba MK |
381 | if ((p = outp) == 0) |
382 | p = outp2; | |
175f072e KM |
383 | #ifdef DEBUG |
384 | if (swapdebug & SDB_SWAPOUT) | |
ed5c84ba | 385 | printf("swapout_threads: no duds, try procp %x\n", p); |
175f072e | 386 | #endif |
ed5c84ba MK |
387 | if (p) |
388 | swapout(p); | |
175f072e KM |
389 | } |
390 | } | |
391 | ||
392 | swapout(p) | |
393 | register struct proc *p; | |
394 | { | |
395 | vm_offset_t addr; | |
396 | vm_size_t size; | |
397 | ||
398 | #ifdef DEBUG | |
399 | if (swapdebug & SDB_SWAPOUT) | |
400 | printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n", | |
401 | p->p_pid, p->p_comm, p->p_addr, p->p_stat, | |
8edfaa67 | 402 | p->p_slptime, vm_stat.free_count); |
175f072e KM |
403 | #endif |
404 | size = round_page(ctob(UPAGES)); | |
405 | addr = (vm_offset_t) p->p_addr; | |
165f38d6 MH |
406 | #ifdef hp300 |
407 | /* | |
408 | * Ugh! u-area is double mapped to a fixed address behind the | |
409 | * back of the VM system and accesses are usually through that | |
410 | * address rather than the per-process address. Hence reference | |
411 | * and modify information are recorded at the fixed address and | |
412 | * lost at context switch time. We assume the u-struct and | |
413 | * kernel stack are always accessed/modified and force it to be so. | |
414 | */ | |
415 | { | |
416 | register int i; | |
417 | volatile long tmp; | |
418 | ||
419 | for (i = 0; i < UPAGES; i++) { | |
420 | tmp = *(long *)addr; *(long *)addr = tmp; | |
421 | addr += NBPG; | |
422 | } | |
423 | addr = (vm_offset_t) p->p_addr; | |
424 | } | |
425 | #endif | |
175f072e | 426 | vm_map_pageable(kernel_map, addr, addr+size, TRUE); |
ed5c84ba | 427 | pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); |
175f072e KM |
428 | (void) splhigh(); |
429 | p->p_flag &= ~SLOAD; | |
430 | if (p->p_stat == SRUN) | |
431 | remrq(p); | |
432 | (void) spl0(); | |
433 | p->p_time = 0; | |
434 | } | |
435 | ||
436 | /* | |
437 | * The rest of these routines fake thread handling | |
438 | */ | |
439 | ||
440 | void | |
441 | assert_wait(event, ruptible) | |
442 | int event; | |
443 | boolean_t ruptible; | |
444 | { | |
445 | #ifdef lint | |
446 | ruptible++; | |
447 | #endif | |
ed5c84ba | 448 | curproc->p_thread = event; |
175f072e KM |
449 | } |
450 | ||
451 | void | |
452 | thread_block() | |
453 | { | |
454 | int s = splhigh(); | |
455 | ||
ed5c84ba MK |
456 | if (curproc->p_thread) |
457 | sleep((caddr_t)curproc->p_thread, PVM); | |
175f072e KM |
458 | splx(s); |
459 | } | |
460 | ||
175f072e KM |
461 | thread_sleep(event, lock, ruptible) |
462 | int event; | |
463 | simple_lock_t lock; | |
464 | boolean_t ruptible; | |
465 | { | |
466 | #ifdef lint | |
467 | ruptible++; | |
468 | #endif | |
469 | int s = splhigh(); | |
470 | ||
ed5c84ba | 471 | curproc->p_thread = event; |
175f072e | 472 | simple_unlock(lock); |
ed5c84ba MK |
473 | if (curproc->p_thread) |
474 | sleep((caddr_t)event, PVM); | |
175f072e KM |
475 | splx(s); |
476 | } | |
477 | ||
175f072e KM |
478 | thread_wakeup(event) |
479 | int event; | |
480 | { | |
481 | int s = splhigh(); | |
482 | ||
483 | wakeup((caddr_t)event); | |
484 | splx(s); | |
485 | } | |
486 | ||
487 | /* | |
488 | * DEBUG stuff | |
489 | */ | |
490 | ||
491 | int indent = 0; | |
492 | ||
493 | /*ARGSUSED2*/ | |
494 | iprintf(a, b, c, d, e, f, g, h) | |
495 | char *a; | |
496 | { | |
497 | register int i; | |
498 | ||
165f38d6 MH |
499 | i = indent; |
500 | while (i >= 8) { | |
501 | printf("\t"); | |
502 | i -= 8; | |
175f072e | 503 | } |
165f38d6 MH |
504 | for (; i > 0; --i) |
505 | printf(" "); | |
175f072e KM |
506 | printf(a, b, c, d, e, f, g, h); |
507 | } |