Cleanups for 4.4BSD-Lite
[unix-history] / usr / src / sys / vm / vm_glue.c
CommitLineData
175f072e 1/*
ceacede1
KB
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
175f072e
KM
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
0e24ad83 8 * %sccs.include.redist.c%
175f072e 9 *
5685f766 10 * @(#)vm_glue.c 8.2 (Berkeley) %G%
0e24ad83
KM
11 *
12 *
13 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14 * All rights reserved.
15 *
16 * Permission to use, copy, modify and distribute this software and
17 * its documentation is hereby granted, provided that both the copyright
18 * notice and this permission notice appear in all copies of the
19 * software, derivative works or modified versions, and any portions
20 * thereof, and that both notices appear in supporting documentation.
21 *
22 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25 *
26 * Carnegie Mellon requests users of this software to return to
27 *
28 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
29 * School of Computer Science
30 * Carnegie Mellon University
31 * Pittsburgh PA 15213-3890
32 *
33 * any improvements or extensions that they make and grant Carnegie the
34 * rights to redistribute these changes.
175f072e
KM
35 */
36
e3a67891
KB
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/resourcevar.h>
41#include <sys/buf.h>
42#include <sys/user.h>
175f072e 43
e3a67891
KB
44#include <vm/vm.h>
45#include <vm/vm_page.h>
46#include <vm/vm_kern.h>
175f072e
KM
47
48int avefree = 0; /* XXX */
49unsigned maxdmap = MAXDSIZ; /* XXX */
c3385412 50int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
175f072e 51
e3a67891 52int
175f072e
KM
53kernacc(addr, len, rw)
54 caddr_t addr;
55 int len, rw;
56{
57 boolean_t rv;
165f38d6 58 vm_offset_t saddr, eaddr;
175f072e
KM
59 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60
165f38d6 61 saddr = trunc_page(addr);
ecd89ac9 62 eaddr = round_page(addr+len);
165f38d6
MH
63 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 /*
65 * XXX there are still some things (e.g. the buffer cache) that
66 * are managed behind the VM system's back so even though an
67 * address is accessible in the mind of the VM system, there may
68 * not be physical pages where the VM thinks there is. This can
69 * lead to bogus allocation of pages in the kernel address space
70 * or worse, inconsistencies at the pmap level. We only worry
71 * about the buffer cache for now.
72 */
c3385412 73 if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
db341dbf 74 saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
165f38d6 75 rv = FALSE;
175f072e
KM
76 return(rv == TRUE);
77}
78
e3a67891 79int
175f072e
KM
80useracc(addr, len, rw)
81 caddr_t addr;
82 int len, rw;
83{
84 boolean_t rv;
85 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86
ed5c84ba 87 rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
ecd89ac9 88 trunc_page(addr), round_page(addr+len), prot);
175f072e
KM
89 return(rv == TRUE);
90}
91
92#ifdef KGDB
93/*
9dd0b816 94 * Change protections on kernel pages from addr to addr+len
175f072e 95 * (presumably so debugger can plant a breakpoint).
11944c92
MH
96 *
97 * We force the protection change at the pmap level. If we were
98 * to use vm_map_protect a change to allow writing would be lazily-
99 * applied meaning we would still take a protection fault, something
100 * we really don't want to do. It would also fragment the kernel
101 * map unnecessarily. We cannot use pmap_protect since it also won't
102 * enforce a write-enable request. Using pmap_enter is the only way
103 * we can ensure the change takes place properly.
175f072e 104 */
e3a67891 105void
175f072e
KM
106chgkprot(addr, len, rw)
107 register caddr_t addr;
108 int len, rw;
109{
11944c92
MH
110 vm_prot_t prot;
111 vm_offset_t pa, sva, eva;
175f072e 112
11944c92 113 prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
ecd89ac9 114 eva = round_page(addr + len);
11944c92
MH
115 for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
116 /*
117 * Extract physical address for the page.
118 * We use a cheezy hack to differentiate physical
119 * page 0 from an invalid mapping, not that it
120 * really matters...
121 */
122 pa = pmap_extract(kernel_pmap, sva|1);
123 if (pa == 0)
124 panic("chgkprot: invalid page");
d9b16ad7 125 pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
11944c92 126 }
175f072e
KM
127}
128#endif
129
e3a67891 130void
175f072e
KM
131vslock(addr, len)
132 caddr_t addr;
133 u_int len;
134{
ed5c84ba 135 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
ecd89ac9 136 round_page(addr+len), FALSE);
175f072e
KM
137}
138
e3a67891 139void
175f072e
KM
140vsunlock(addr, len, dirtied)
141 caddr_t addr;
142 u_int len;
143 int dirtied;
144{
145#ifdef lint
146 dirtied++;
1524bcb8 147#endif
ed5c84ba 148 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
ecd89ac9 149 round_page(addr+len), TRUE);
175f072e
KM
150}
151
9dd0b816
MK
152/*
153 * Implement fork's actions on an address space.
154 * Here we arrange for the address space to be copied or referenced,
155 * allocate a user struct (pcb and kernel stack), then call the
156 * machine-dependent layer to fill those in and make the new process
157 * ready to run.
158 * NOTE: the kernel stack may be at a different location in the child
159 * process, and thus addresses of automatic variables may be invalid
160 * after cpu_fork returns in the child process. We do nothing here
161 * after cpu_fork returns.
162 */
e3a67891 163int
ed5c84ba
MK
164vm_fork(p1, p2, isvfork)
165 register struct proc *p1, *p2;
175f072e
KM
166 int isvfork;
167{
168 register struct user *up;
169 vm_offset_t addr;
175f072e 170
1e1f624c
WN
171#ifdef i386
172 /*
173 * avoid copying any of the parent's pagetables or other per-process
174 * objects that reside in the map by marking all of them non-inheritable
175 */
176 (void)vm_map_inherit(&p1->p_vmspace->vm_map,
177 UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
178#endif
ed5c84ba
MK
179 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
180
181#ifdef SYSVSHM
182 if (p1->p_vmspace->vm_shm)
183 shmfork(p1, p2, isvfork);
175f072e 184#endif
ed5c84ba 185
cb5fb9b0 186#ifndef i386
175f072e 187 /*
9dd0b816 188 * Allocate a wired-down (for now) pcb and kernel stack for the process
175f072e 189 */
9dd0b816 190 addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
a54326c9 191 if (addr == 0)
8ecfe4f9 192 panic("vm_fork: no more kernel virtual memory");
9dd0b816 193 vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
cb5fb9b0
WN
194#else
195/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
196and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
197not yet clear, yet it does... */
198 addr = kmem_alloc(kernel_map, ctob(UPAGES));
a54326c9 199 if (addr == 0)
8ecfe4f9 200 panic("vm_fork: no more kernel virtual memory");
cb5fb9b0 201#endif
175f072e 202 up = (struct user *)addr;
9dd0b816 203 p2->p_addr = up;
175f072e 204
ed5c84ba
MK
205 /*
206 * p_stats and p_sigacts currently point at fields
207 * in the user struct but not at &u, instead at p_addr.
9dd0b816
MK
208 * Copy p_sigacts and parts of p_stats; zero the rest
209 * of p_stats (statistics).
175f072e 210 */
9dd0b816
MK
211 p2->p_stats = &up->u_stats;
212 p2->p_sigacts = &up->u_sigacts;
213 up->u_sigacts = *p1->p_sigacts;
214 bzero(&up->u_stats.pstat_startzero,
215 (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
216 (caddr_t)&up->u_stats.pstat_startzero));
217 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
218 ((caddr_t)&up->u_stats.pstat_endcopy -
219 (caddr_t)&up->u_stats.pstat_startcopy));
175f072e 220
165f38d6 221#ifdef i386
165f38d6
MH
222 { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
223
224 vp = &p2->p_vmspace->vm_map;
cb5fb9b0 225 (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
165f38d6
MH
226 (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
227 (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
228 }
229#endif
175f072e 230 /*
9dd0b816
MK
231 * cpu_fork will copy and update the kernel stack and pcb,
232 * and make the child ready to run. It marks the child
233 * so that it can return differently than the parent.
234 * It returns twice, once in the parent process and
235 * once in the child.
175f072e 236 */
9dd0b816 237 return (cpu_fork(p1, p2));
175f072e
KM
238}
239
240/*
ed5c84ba
MK
241 * Set default limits for VM system.
242 * Called for proc 0, and then inherited by all others.
175f072e 243 */
e3a67891 244void
ed5c84ba
MK
245vm_init_limits(p)
246 register struct proc *p;
175f072e 247{
ed5c84ba 248
175f072e
KM
249 /*
250 * Set up the initial limits on process VM.
251 * Set the maximum resident set size to be all
252 * of (reasonably) available memory. This causes
253 * any single, large process to start random page
254 * replacement once it fills memory.
255 */
ed5c84ba
MK
256 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
257 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
258 p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
259 p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
37511447 260 p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
175f072e
KM
261}
262
5548a02f 263#include <vm/vm_pageout.h>
175f072e
KM
264
265#ifdef DEBUG
266int enableswap = 1;
267int swapdebug = 0;
268#define SDB_FOLLOW 1
269#define SDB_SWAPIN 2
270#define SDB_SWAPOUT 4
271#endif
272
273/*
274 * Brutally simple:
275 * 1. Attempt to swapin every swaped-out, runnable process in
276 * order of priority.
277 * 2. If not enough memory, wake the pageout daemon and let it
278 * clear some space.
279 */
e3a67891 280void
5685f766 281scheduler()
175f072e 282{
ed5c84ba
MK
283 register struct proc *p;
284 register int pri;
285 struct proc *pp;
286 int ppri;
175f072e
KM
287 vm_offset_t addr;
288 vm_size_t size;
289
290loop:
291#ifdef DEBUG
aba84174
CT
292 while (!enableswap)
293 sleep((caddr_t)&proc0, PVM);
175f072e 294#endif
ed5c84ba
MK
295 pp = NULL;
296 ppri = INT_MIN;
f32c5c48 297 for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
ed5c84ba
MK
298 if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
299 pri = p->p_time + p->p_slptime - p->p_nice * 8;
300 if (pri > ppri) {
301 pp = p;
302 ppri = pri;
175f072e
KM
303 }
304 }
aba84174 305 }
175f072e
KM
306#ifdef DEBUG
307 if (swapdebug & SDB_FOLLOW)
ed5c84ba 308 printf("sched: running, procp %x pri %d\n", pp, ppri);
175f072e
KM
309#endif
310 /*
311 * Nothing to do, back to sleep
312 */
ed5c84ba
MK
313 if ((p = pp) == NULL) {
314 sleep((caddr_t)&proc0, PVM);
175f072e
KM
315 goto loop;
316 }
ed5c84ba 317
175f072e
KM
318 /*
319 * We would like to bring someone in.
320 * This part is really bogus cuz we could deadlock on memory
321 * despite our feeble check.
322 */
323 size = round_page(ctob(UPAGES));
ed5c84ba 324 addr = (vm_offset_t) p->p_addr;
01733b29 325 if (cnt.v_free_count > atop(size)) {
175f072e
KM
326#ifdef DEBUG
327 if (swapdebug & SDB_SWAPIN)
328 printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
ed5c84ba 329 p->p_pid, p->p_comm, p->p_addr,
01733b29 330 ppri, cnt.v_free_count);
175f072e
KM
331#endif
332 vm_map_pageable(kernel_map, addr, addr+size, FALSE);
f32c5c48 333 (void) splstatclock();
ed5c84ba
MK
334 if (p->p_stat == SRUN)
335 setrq(p);
336 p->p_flag |= SLOAD;
175f072e 337 (void) spl0();
ed5c84ba 338 p->p_time = 0;
175f072e
KM
339 goto loop;
340 }
341 /*
342 * Not enough memory, jab the pageout daemon and wait til the
343 * coast is clear.
344 */
345#ifdef DEBUG
346 if (swapdebug & SDB_FOLLOW)
347 printf("sched: no room for pid %d(%s), free %d\n",
01733b29 348 p->p_pid, p->p_comm, cnt.v_free_count);
175f072e
KM
349#endif
350 (void) splhigh();
351 VM_WAIT;
352 (void) spl0();
353#ifdef DEBUG
354 if (swapdebug & SDB_FOLLOW)
01733b29 355 printf("sched: room again, free %d\n", cnt.v_free_count);
175f072e
KM
356#endif
357 goto loop;
358}
359
360#define swappable(p) \
ed5c84ba 361 (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
175f072e
KM
362
363/*
364 * Swapout is driven by the pageout daemon. Very simple, we find eligible
365 * procs and unwire their u-areas. We try to always "swap" at least one
366 * process in case we need the room for a swapin.
ed5c84ba
MK
367 * If any procs have been sleeping/stopped for at least maxslp seconds,
368 * they are swapped. Else, we swap the longest-sleeping or stopped process,
369 * if any, otherwise the longest-resident process.
175f072e 370 */
e3a67891 371void
175f072e
KM
372swapout_threads()
373{
ed5c84ba 374 register struct proc *p;
175f072e
KM
375 struct proc *outp, *outp2;
376 int outpri, outpri2;
377 int didswap = 0;
378 extern int maxslp;
379
380#ifdef DEBUG
381 if (!enableswap)
382 return;
383#endif
384 outp = outp2 = NULL;
ed5c84ba 385 outpri = outpri2 = 0;
f32c5c48 386 for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
ed5c84ba 387 if (!swappable(p))
175f072e 388 continue;
ed5c84ba 389 switch (p->p_stat) {
175f072e 390 case SRUN:
ed5c84ba
MK
391 if (p->p_time > outpri2) {
392 outp2 = p;
393 outpri2 = p->p_time;
175f072e
KM
394 }
395 continue;
396
397 case SSLEEP:
398 case SSTOP:
62ef9a34 399 if (p->p_slptime >= maxslp) {
ed5c84ba 400 swapout(p);
175f072e 401 didswap++;
ed5c84ba
MK
402 } else if (p->p_slptime > outpri) {
403 outp = p;
404 outpri = p->p_slptime;
175f072e
KM
405 }
406 continue;
407 }
408 }
409 /*
410 * If we didn't get rid of any real duds, toss out the next most
411 * likely sleeping/stopped or running candidate. We only do this
412 * if we are real low on memory since we don't gain much by doing
413 * it (UPAGES pages).
414 */
415 if (didswap == 0 &&
01733b29 416 cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
ed5c84ba
MK
417 if ((p = outp) == 0)
418 p = outp2;
175f072e
KM
419#ifdef DEBUG
420 if (swapdebug & SDB_SWAPOUT)
ed5c84ba 421 printf("swapout_threads: no duds, try procp %x\n", p);
175f072e 422#endif
ed5c84ba
MK
423 if (p)
424 swapout(p);
175f072e
KM
425 }
426}
427
e3a67891 428void
175f072e
KM
429swapout(p)
430 register struct proc *p;
431{
432 vm_offset_t addr;
433 vm_size_t size;
434
435#ifdef DEBUG
436 if (swapdebug & SDB_SWAPOUT)
437 printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
438 p->p_pid, p->p_comm, p->p_addr, p->p_stat,
01733b29 439 p->p_slptime, cnt.v_free_count);
175f072e
KM
440#endif
441 size = round_page(ctob(UPAGES));
442 addr = (vm_offset_t) p->p_addr;
7c02c1d3 443#if defined(hp300) || defined(luna68k)
165f38d6
MH
444 /*
445 * Ugh! u-area is double mapped to a fixed address behind the
446 * back of the VM system and accesses are usually through that
447 * address rather than the per-process address. Hence reference
448 * and modify information are recorded at the fixed address and
449 * lost at context switch time. We assume the u-struct and
450 * kernel stack are always accessed/modified and force it to be so.
451 */
452 {
453 register int i;
454 volatile long tmp;
455
456 for (i = 0; i < UPAGES; i++) {
457 tmp = *(long *)addr; *(long *)addr = tmp;
458 addr += NBPG;
459 }
460 addr = (vm_offset_t) p->p_addr;
461 }
462#endif
5f446058
MT
463#ifdef mips
464 /*
465 * Be sure to save the floating point coprocessor state before
466 * paging out the u-struct.
467 */
468 {
469 extern struct proc *machFPCurProcPtr;
470
471 if (p == machFPCurProcPtr) {
472 MachSaveCurFPState(p);
473 machFPCurProcPtr = (struct proc *)0;
474 }
475 }
476#endif
cb5fb9b0 477#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
175f072e 478 vm_map_pageable(kernel_map, addr, addr+size, TRUE);
ed5c84ba 479 pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
cb5fb9b0 480#endif
175f072e
KM
481 (void) splhigh();
482 p->p_flag &= ~SLOAD;
483 if (p->p_stat == SRUN)
484 remrq(p);
485 (void) spl0();
486 p->p_time = 0;
487}
488
489/*
490 * The rest of these routines fake thread handling
491 */
492
493void
494assert_wait(event, ruptible)
495 int event;
496 boolean_t ruptible;
497{
498#ifdef lint
499 ruptible++;
500#endif
ed5c84ba 501 curproc->p_thread = event;
175f072e
KM
502}
503
504void
505thread_block()
506{
507 int s = splhigh();
508
ed5c84ba
MK
509 if (curproc->p_thread)
510 sleep((caddr_t)curproc->p_thread, PVM);
175f072e
KM
511 splx(s);
512}
513
16c815b2 514void
175f072e
KM
515thread_sleep(event, lock, ruptible)
516 int event;
517 simple_lock_t lock;
518 boolean_t ruptible;
519{
520#ifdef lint
521 ruptible++;
522#endif
523 int s = splhigh();
524
ed5c84ba 525 curproc->p_thread = event;
175f072e 526 simple_unlock(lock);
ed5c84ba
MK
527 if (curproc->p_thread)
528 sleep((caddr_t)event, PVM);
175f072e
KM
529 splx(s);
530}
531
16c815b2 532void
175f072e
KM
533thread_wakeup(event)
534 int event;
535{
536 int s = splhigh();
537
538 wakeup((caddr_t)event);
539 splx(s);
540}
541
542/*
543 * DEBUG stuff
544 */
545
546int indent = 0;
547
aba84174
CT
548#include <machine/stdarg.h> /* see subr_prf.c */
549
175f072e 550/*ARGSUSED2*/
e3a67891 551void
aba84174
CT
552#if __STDC__
553iprintf(const char *fmt, ...)
554#else
555iprintf(fmt /* , va_alist */)
556 char *fmt;
557 /* va_dcl */
558#endif
175f072e
KM
559{
560 register int i;
aba84174 561 va_list ap;
175f072e 562
aba84174 563 for (i = indent; i >= 8; i -= 8)
165f38d6 564 printf("\t");
aba84174 565 while (--i >= 0)
165f38d6 566 printf(" ");
aba84174
CT
567 va_start(ap, fmt);
568 printf("%r", fmt, ap);
569 va_end(ap);
175f072e 570}