Commit | Line | Data |
---|---|---|
175f072e | 1 | /* |
175f072e KM |
2 | * Copyright (c) 1991 Regents of the University of California. |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * The Mach Operating System project at Carnegie-Mellon University. | |
7 | * | |
0e24ad83 | 8 | * %sccs.include.redist.c% |
175f072e | 9 | * |
5f446058 | 10 | * @(#)vm_glue.c 7.12 (Berkeley) %G% |
0e24ad83 KM |
11 | * |
12 | * | |
13 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
14 | * All rights reserved. | |
15 | * | |
16 | * Permission to use, copy, modify and distribute this software and | |
17 | * its documentation is hereby granted, provided that both the copyright | |
18 | * notice and this permission notice appear in all copies of the | |
19 | * software, derivative works or modified versions, and any portions | |
20 | * thereof, and that both notices appear in supporting documentation. | |
21 | * | |
22 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
23 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
24 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
25 | * | |
26 | * Carnegie Mellon requests users of this software to return to | |
27 | * | |
28 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
29 | * School of Computer Science | |
30 | * Carnegie Mellon University | |
31 | * Pittsburgh PA 15213-3890 | |
32 | * | |
33 | * any improvements or extensions that they make and grant Carnegie the | |
34 | * rights to redistribute these changes. | |
175f072e KM |
35 | */ |
36 | ||
37 | #include "param.h" | |
38 | #include "systm.h" | |
175f072e | 39 | #include "proc.h" |
ed5c84ba | 40 | #include "resourcevar.h" |
175f072e | 41 | #include "buf.h" |
ed5c84ba | 42 | #include "user.h" |
175f072e | 43 | |
ed5c84ba MK |
44 | #include "vm.h" |
45 | #include "vm_page.h" | |
46 | #include "vm_kern.h" | |
175f072e KM |
47 | |
48 | int avefree = 0; /* XXX */ | |
49 | unsigned maxdmap = MAXDSIZ; /* XXX */ | |
c3385412 | 50 | int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */ |
175f072e KM |
51 | |
52 | kernacc(addr, len, rw) | |
53 | caddr_t addr; | |
54 | int len, rw; | |
55 | { | |
56 | boolean_t rv; | |
165f38d6 | 57 | vm_offset_t saddr, eaddr; |
175f072e KM |
58 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; |
59 | ||
165f38d6 MH |
60 | saddr = trunc_page(addr); |
61 | eaddr = round_page(addr+len-1); | |
62 | rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); | |
63 | /* | |
64 | * XXX there are still some things (e.g. the buffer cache) that | |
65 | * are managed behind the VM system's back so even though an | |
66 | * address is accessible in the mind of the VM system, there may | |
67 | * not be physical pages where the VM thinks there is. This can | |
68 | * lead to bogus allocation of pages in the kernel address space | |
69 | * or worse, inconsistencies at the pmap level. We only worry | |
70 | * about the buffer cache for now. | |
71 | */ | |
c3385412 | 72 | if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers && |
db341dbf | 73 | saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf)) |
165f38d6 | 74 | rv = FALSE; |
175f072e KM |
75 | return(rv == TRUE); |
76 | } | |
77 | ||
78 | useracc(addr, len, rw) | |
79 | caddr_t addr; | |
80 | int len, rw; | |
81 | { | |
82 | boolean_t rv; | |
83 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
84 | ||
ed5c84ba MK |
85 | rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, |
86 | trunc_page(addr), round_page(addr+len-1), prot); | |
175f072e KM |
87 | return(rv == TRUE); |
88 | } | |
89 | ||
90 | #ifdef KGDB | |
91 | /* | |
9dd0b816 | 92 | * Change protections on kernel pages from addr to addr+len |
175f072e KM |
93 | * (presumably so debugger can plant a breakpoint). |
94 | * All addresses are assumed to reside in the Sysmap, | |
95 | */ | |
96 | chgkprot(addr, len, rw) | |
97 | register caddr_t addr; | |
98 | int len, rw; | |
99 | { | |
100 | vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; | |
101 | ||
102 | vm_map_protect(kernel_map, trunc_page(addr), | |
103 | round_page(addr+len-1), prot, FALSE); | |
104 | } | |
105 | #endif | |
106 | ||
107 | vslock(addr, len) | |
108 | caddr_t addr; | |
109 | u_int len; | |
110 | { | |
ed5c84ba | 111 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), |
175f072e KM |
112 | round_page(addr+len-1), FALSE); |
113 | } | |
114 | ||
115 | vsunlock(addr, len, dirtied) | |
116 | caddr_t addr; | |
117 | u_int len; | |
118 | int dirtied; | |
119 | { | |
120 | #ifdef lint | |
121 | dirtied++; | |
122 | #endif lint | |
ed5c84ba | 123 | vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), |
175f072e KM |
124 | round_page(addr+len-1), TRUE); |
125 | } | |
126 | ||
9dd0b816 MK |
127 | /* |
128 | * Implement fork's actions on an address space. | |
129 | * Here we arrange for the address space to be copied or referenced, | |
130 | * allocate a user struct (pcb and kernel stack), then call the | |
131 | * machine-dependent layer to fill those in and make the new process | |
132 | * ready to run. | |
133 | * NOTE: the kernel stack may be at a different location in the child | |
134 | * process, and thus addresses of automatic variables may be invalid | |
135 | * after cpu_fork returns in the child process. We do nothing here | |
136 | * after cpu_fork returns. | |
137 | */ | |
ed5c84ba MK |
138 | vm_fork(p1, p2, isvfork) |
139 | register struct proc *p1, *p2; | |
175f072e KM |
140 | int isvfork; |
141 | { | |
142 | register struct user *up; | |
143 | vm_offset_t addr; | |
175f072e | 144 | |
1e1f624c WN |
145 | #ifdef i386 |
146 | /* | |
147 | * avoid copying any of the parent's pagetables or other per-process | |
148 | * objects that reside in the map by marking all of them non-inheritable | |
149 | */ | |
150 | (void)vm_map_inherit(&p1->p_vmspace->vm_map, | |
151 | UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE); | |
152 | #endif | |
ed5c84ba MK |
153 | p2->p_vmspace = vmspace_fork(p1->p_vmspace); |
154 | ||
155 | #ifdef SYSVSHM | |
156 | if (p1->p_vmspace->vm_shm) | |
157 | shmfork(p1, p2, isvfork); | |
175f072e | 158 | #endif |
ed5c84ba | 159 | |
cb5fb9b0 | 160 | #ifndef i386 |
175f072e | 161 | /* |
9dd0b816 | 162 | * Allocate a wired-down (for now) pcb and kernel stack for the process |
175f072e | 163 | */ |
9dd0b816 MK |
164 | addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES)); |
165 | vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE); | |
cb5fb9b0 WN |
166 | #else |
167 | /* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack, | |
168 | and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is | |
169 | not yet clear, yet it does... */ | |
170 | addr = kmem_alloc(kernel_map, ctob(UPAGES)); | |
171 | #endif | |
175f072e | 172 | up = (struct user *)addr; |
9dd0b816 | 173 | p2->p_addr = up; |
175f072e | 174 | |
ed5c84ba MK |
175 | /* |
176 | * p_stats and p_sigacts currently point at fields | |
177 | * in the user struct but not at &u, instead at p_addr. | |
9dd0b816 MK |
178 | * Copy p_sigacts and parts of p_stats; zero the rest |
179 | * of p_stats (statistics). | |
175f072e | 180 | */ |
9dd0b816 MK |
181 | p2->p_stats = &up->u_stats; |
182 | p2->p_sigacts = &up->u_sigacts; | |
183 | up->u_sigacts = *p1->p_sigacts; | |
184 | bzero(&up->u_stats.pstat_startzero, | |
185 | (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - | |
186 | (caddr_t)&up->u_stats.pstat_startzero)); | |
187 | bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, | |
188 | ((caddr_t)&up->u_stats.pstat_endcopy - | |
189 | (caddr_t)&up->u_stats.pstat_startcopy)); | |
175f072e | 190 | |
165f38d6 | 191 | #ifdef i386 |
165f38d6 MH |
192 | { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp; |
193 | ||
194 | vp = &p2->p_vmspace->vm_map; | |
cb5fb9b0 | 195 | (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr); |
165f38d6 MH |
196 | (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE); |
197 | (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE); | |
198 | } | |
199 | #endif | |
175f072e | 200 | /* |
9dd0b816 MK |
201 | * cpu_fork will copy and update the kernel stack and pcb, |
202 | * and make the child ready to run. It marks the child | |
203 | * so that it can return differently than the parent. | |
204 | * It returns twice, once in the parent process and | |
205 | * once in the child. | |
175f072e | 206 | */ |
9dd0b816 | 207 | return (cpu_fork(p1, p2)); |
175f072e KM |
208 | } |
209 | ||
210 | /* | |
ed5c84ba MK |
211 | * Set default limits for VM system. |
212 | * Called for proc 0, and then inherited by all others. | |
175f072e | 213 | */ |
ed5c84ba MK |
214 | vm_init_limits(p) |
215 | register struct proc *p; | |
175f072e | 216 | { |
ed5c84ba | 217 | |
175f072e KM |
218 | /* |
219 | * Set up the initial limits on process VM. | |
220 | * Set the maximum resident set size to be all | |
221 | * of (reasonably) available memory. This causes | |
222 | * any single, large process to start random page | |
223 | * replacement once it fills memory. | |
224 | */ | |
ed5c84ba MK |
225 | p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; |
226 | p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; | |
227 | p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; | |
228 | p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; | |
229 | p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max = | |
01733b29 | 230 | ptoa(cnt.v_free_count); |
175f072e KM |
231 | } |
232 | ||
233 | #include "../vm/vm_pageout.h" | |
234 | ||
235 | #ifdef DEBUG | |
236 | int enableswap = 1; | |
237 | int swapdebug = 0; | |
238 | #define SDB_FOLLOW 1 | |
239 | #define SDB_SWAPIN 2 | |
240 | #define SDB_SWAPOUT 4 | |
241 | #endif | |
242 | ||
243 | /* | |
244 | * Brutally simple: | |
245 | * 1. Attempt to swapin every swaped-out, runnable process in | |
246 | * order of priority. | |
247 | * 2. If not enough memory, wake the pageout daemon and let it | |
248 | * clear some space. | |
249 | */ | |
250 | sched() | |
251 | { | |
ed5c84ba MK |
252 | register struct proc *p; |
253 | register int pri; | |
254 | struct proc *pp; | |
255 | int ppri; | |
175f072e KM |
256 | vm_offset_t addr; |
257 | vm_size_t size; | |
258 | ||
259 | loop: | |
260 | #ifdef DEBUG | |
261 | if (!enableswap) { | |
ed5c84ba | 262 | pp = NULL; |
175f072e KM |
263 | goto noswap; |
264 | } | |
265 | #endif | |
ed5c84ba MK |
266 | pp = NULL; |
267 | ppri = INT_MIN; | |
268 | for (p = allproc; p != NULL; p = p->p_nxt) | |
269 | if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) { | |
270 | pri = p->p_time + p->p_slptime - p->p_nice * 8; | |
271 | if (pri > ppri) { | |
272 | pp = p; | |
273 | ppri = pri; | |
175f072e KM |
274 | } |
275 | } | |
276 | #ifdef DEBUG | |
277 | if (swapdebug & SDB_FOLLOW) | |
ed5c84ba | 278 | printf("sched: running, procp %x pri %d\n", pp, ppri); |
175f072e KM |
279 | noswap: |
280 | #endif | |
281 | /* | |
282 | * Nothing to do, back to sleep | |
283 | */ | |
ed5c84ba MK |
284 | if ((p = pp) == NULL) { |
285 | sleep((caddr_t)&proc0, PVM); | |
175f072e KM |
286 | goto loop; |
287 | } | |
ed5c84ba | 288 | |
175f072e KM |
289 | /* |
290 | * We would like to bring someone in. | |
291 | * This part is really bogus cuz we could deadlock on memory | |
292 | * despite our feeble check. | |
293 | */ | |
294 | size = round_page(ctob(UPAGES)); | |
ed5c84ba | 295 | addr = (vm_offset_t) p->p_addr; |
01733b29 | 296 | if (cnt.v_free_count > atop(size)) { |
175f072e KM |
297 | #ifdef DEBUG |
298 | if (swapdebug & SDB_SWAPIN) | |
299 | printf("swapin: pid %d(%s)@%x, pri %d free %d\n", | |
ed5c84ba | 300 | p->p_pid, p->p_comm, p->p_addr, |
01733b29 | 301 | ppri, cnt.v_free_count); |
175f072e KM |
302 | #endif |
303 | vm_map_pageable(kernel_map, addr, addr+size, FALSE); | |
304 | (void) splclock(); | |
ed5c84ba MK |
305 | if (p->p_stat == SRUN) |
306 | setrq(p); | |
307 | p->p_flag |= SLOAD; | |
175f072e | 308 | (void) spl0(); |
ed5c84ba | 309 | p->p_time = 0; |
175f072e KM |
310 | goto loop; |
311 | } | |
312 | /* | |
313 | * Not enough memory, jab the pageout daemon and wait til the | |
314 | * coast is clear. | |
315 | */ | |
316 | #ifdef DEBUG | |
317 | if (swapdebug & SDB_FOLLOW) | |
318 | printf("sched: no room for pid %d(%s), free %d\n", | |
01733b29 | 319 | p->p_pid, p->p_comm, cnt.v_free_count); |
175f072e KM |
320 | #endif |
321 | (void) splhigh(); | |
322 | VM_WAIT; | |
323 | (void) spl0(); | |
324 | #ifdef DEBUG | |
325 | if (swapdebug & SDB_FOLLOW) | |
01733b29 | 326 | printf("sched: room again, free %d\n", cnt.v_free_count); |
175f072e KM |
327 | #endif |
328 | goto loop; | |
329 | } | |
330 | ||
331 | #define swappable(p) \ | |
ed5c84ba | 332 | (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD) |
175f072e KM |
333 | |
334 | /* | |
335 | * Swapout is driven by the pageout daemon. Very simple, we find eligible | |
336 | * procs and unwire their u-areas. We try to always "swap" at least one | |
337 | * process in case we need the room for a swapin. | |
ed5c84ba MK |
338 | * If any procs have been sleeping/stopped for at least maxslp seconds, |
339 | * they are swapped. Else, we swap the longest-sleeping or stopped process, | |
340 | * if any, otherwise the longest-resident process. | |
175f072e KM |
341 | */ |
342 | swapout_threads() | |
343 | { | |
ed5c84ba | 344 | register struct proc *p; |
175f072e KM |
345 | struct proc *outp, *outp2; |
346 | int outpri, outpri2; | |
347 | int didswap = 0; | |
348 | extern int maxslp; | |
349 | ||
350 | #ifdef DEBUG | |
351 | if (!enableswap) | |
352 | return; | |
353 | #endif | |
354 | outp = outp2 = NULL; | |
ed5c84ba MK |
355 | outpri = outpri2 = 0; |
356 | for (p = allproc; p != NULL; p = p->p_nxt) { | |
357 | if (!swappable(p)) | |
175f072e | 358 | continue; |
ed5c84ba | 359 | switch (p->p_stat) { |
175f072e | 360 | case SRUN: |
ed5c84ba MK |
361 | if (p->p_time > outpri2) { |
362 | outp2 = p; | |
363 | outpri2 = p->p_time; | |
175f072e KM |
364 | } |
365 | continue; | |
366 | ||
367 | case SSLEEP: | |
368 | case SSTOP: | |
ed5c84ba MK |
369 | if (p->p_slptime > maxslp) { |
370 | swapout(p); | |
175f072e | 371 | didswap++; |
ed5c84ba MK |
372 | } else if (p->p_slptime > outpri) { |
373 | outp = p; | |
374 | outpri = p->p_slptime; | |
175f072e KM |
375 | } |
376 | continue; | |
377 | } | |
378 | } | |
379 | /* | |
380 | * If we didn't get rid of any real duds, toss out the next most | |
381 | * likely sleeping/stopped or running candidate. We only do this | |
382 | * if we are real low on memory since we don't gain much by doing | |
383 | * it (UPAGES pages). | |
384 | */ | |
385 | if (didswap == 0 && | |
01733b29 | 386 | cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) { |
ed5c84ba MK |
387 | if ((p = outp) == 0) |
388 | p = outp2; | |
175f072e KM |
389 | #ifdef DEBUG |
390 | if (swapdebug & SDB_SWAPOUT) | |
ed5c84ba | 391 | printf("swapout_threads: no duds, try procp %x\n", p); |
175f072e | 392 | #endif |
ed5c84ba MK |
393 | if (p) |
394 | swapout(p); | |
175f072e KM |
395 | } |
396 | } | |
397 | ||
398 | swapout(p) | |
399 | register struct proc *p; | |
400 | { | |
401 | vm_offset_t addr; | |
402 | vm_size_t size; | |
403 | ||
404 | #ifdef DEBUG | |
405 | if (swapdebug & SDB_SWAPOUT) | |
406 | printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n", | |
407 | p->p_pid, p->p_comm, p->p_addr, p->p_stat, | |
01733b29 | 408 | p->p_slptime, cnt.v_free_count); |
175f072e KM |
409 | #endif |
410 | size = round_page(ctob(UPAGES)); | |
411 | addr = (vm_offset_t) p->p_addr; | |
165f38d6 MH |
412 | #ifdef hp300 |
413 | /* | |
414 | * Ugh! u-area is double mapped to a fixed address behind the | |
415 | * back of the VM system and accesses are usually through that | |
416 | * address rather than the per-process address. Hence reference | |
417 | * and modify information are recorded at the fixed address and | |
418 | * lost at context switch time. We assume the u-struct and | |
419 | * kernel stack are always accessed/modified and force it to be so. | |
420 | */ | |
421 | { | |
422 | register int i; | |
423 | volatile long tmp; | |
424 | ||
425 | for (i = 0; i < UPAGES; i++) { | |
426 | tmp = *(long *)addr; *(long *)addr = tmp; | |
427 | addr += NBPG; | |
428 | } | |
429 | addr = (vm_offset_t) p->p_addr; | |
430 | } | |
431 | #endif | |
5f446058 MT |
432 | #ifdef mips |
433 | /* | |
434 | * Be sure to save the floating point coprocessor state before | |
435 | * paging out the u-struct. | |
436 | */ | |
437 | { | |
438 | extern struct proc *machFPCurProcPtr; | |
439 | ||
440 | if (p == machFPCurProcPtr) { | |
441 | MachSaveCurFPState(p); | |
442 | machFPCurProcPtr = (struct proc *)0; | |
443 | } | |
444 | } | |
445 | #endif | |
cb5fb9b0 | 446 | #ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */ |
175f072e | 447 | vm_map_pageable(kernel_map, addr, addr+size, TRUE); |
ed5c84ba | 448 | pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); |
cb5fb9b0 | 449 | #endif |
175f072e KM |
450 | (void) splhigh(); |
451 | p->p_flag &= ~SLOAD; | |
452 | if (p->p_stat == SRUN) | |
453 | remrq(p); | |
454 | (void) spl0(); | |
455 | p->p_time = 0; | |
456 | } | |
457 | ||
458 | /* | |
459 | * The rest of these routines fake thread handling | |
460 | */ | |
461 | ||
462 | void | |
463 | assert_wait(event, ruptible) | |
464 | int event; | |
465 | boolean_t ruptible; | |
466 | { | |
467 | #ifdef lint | |
468 | ruptible++; | |
469 | #endif | |
ed5c84ba | 470 | curproc->p_thread = event; |
175f072e KM |
471 | } |
472 | ||
473 | void | |
474 | thread_block() | |
475 | { | |
476 | int s = splhigh(); | |
477 | ||
ed5c84ba MK |
478 | if (curproc->p_thread) |
479 | sleep((caddr_t)curproc->p_thread, PVM); | |
175f072e KM |
480 | splx(s); |
481 | } | |
482 | ||
175f072e KM |
483 | thread_sleep(event, lock, ruptible) |
484 | int event; | |
485 | simple_lock_t lock; | |
486 | boolean_t ruptible; | |
487 | { | |
488 | #ifdef lint | |
489 | ruptible++; | |
490 | #endif | |
491 | int s = splhigh(); | |
492 | ||
ed5c84ba | 493 | curproc->p_thread = event; |
175f072e | 494 | simple_unlock(lock); |
ed5c84ba MK |
495 | if (curproc->p_thread) |
496 | sleep((caddr_t)event, PVM); | |
175f072e KM |
497 | splx(s); |
498 | } | |
499 | ||
175f072e KM |
500 | thread_wakeup(event) |
501 | int event; | |
502 | { | |
503 | int s = splhigh(); | |
504 | ||
505 | wakeup((caddr_t)event); | |
506 | splx(s); | |
507 | } | |
508 | ||
509 | /* | |
510 | * DEBUG stuff | |
511 | */ | |
512 | ||
513 | int indent = 0; | |
514 | ||
515 | /*ARGSUSED2*/ | |
516 | iprintf(a, b, c, d, e, f, g, h) | |
517 | char *a; | |
518 | { | |
519 | register int i; | |
520 | ||
165f38d6 MH |
521 | i = indent; |
522 | while (i >= 8) { | |
523 | printf("\t"); | |
524 | i -= 8; | |
175f072e | 525 | } |
165f38d6 MH |
526 | for (; i > 0; --i) |
527 | printf(" "); | |
175f072e KM |
528 | printf(a, b, c, d, e, f, g, h); |
529 | } |