contributed by Ralph Campbell
[unix-history] / usr / src / sys / vm / vm_glue.c
CommitLineData
175f072e 1/*
175f072e
KM
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
0e24ad83 8 * %sccs.include.redist.c%
175f072e 9 *
5f446058 10 * @(#)vm_glue.c 7.12 (Berkeley) %G%
0e24ad83
KM
11 *
12 *
13 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14 * All rights reserved.
15 *
16 * Permission to use, copy, modify and distribute this software and
17 * its documentation is hereby granted, provided that both the copyright
18 * notice and this permission notice appear in all copies of the
19 * software, derivative works or modified versions, and any portions
20 * thereof, and that both notices appear in supporting documentation.
21 *
22 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25 *
26 * Carnegie Mellon requests users of this software to return to
27 *
28 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
29 * School of Computer Science
30 * Carnegie Mellon University
31 * Pittsburgh PA 15213-3890
32 *
33 * any improvements or extensions that they make and grant Carnegie the
34 * rights to redistribute these changes.
175f072e
KM
35 */
36
37#include "param.h"
38#include "systm.h"
175f072e 39#include "proc.h"
ed5c84ba 40#include "resourcevar.h"
175f072e 41#include "buf.h"
ed5c84ba 42#include "user.h"
175f072e 43
ed5c84ba
MK
44#include "vm.h"
45#include "vm_page.h"
46#include "vm_kern.h"
175f072e
KM
47
48int avefree = 0; /* XXX */
49unsigned maxdmap = MAXDSIZ; /* XXX */
c3385412 50int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
175f072e
KM
51
52kernacc(addr, len, rw)
53 caddr_t addr;
54 int len, rw;
55{
56 boolean_t rv;
165f38d6 57 vm_offset_t saddr, eaddr;
175f072e
KM
58 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
59
165f38d6
MH
60 saddr = trunc_page(addr);
61 eaddr = round_page(addr+len-1);
62 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
63 /*
64 * XXX there are still some things (e.g. the buffer cache) that
65 * are managed behind the VM system's back so even though an
66 * address is accessible in the mind of the VM system, there may
67 * not be physical pages where the VM thinks there is. This can
68 * lead to bogus allocation of pages in the kernel address space
69 * or worse, inconsistencies at the pmap level. We only worry
70 * about the buffer cache for now.
71 */
c3385412 72 if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
db341dbf 73 saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
165f38d6 74 rv = FALSE;
175f072e
KM
75 return(rv == TRUE);
76}
77
78useracc(addr, len, rw)
79 caddr_t addr;
80 int len, rw;
81{
82 boolean_t rv;
83 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
84
ed5c84ba
MK
85 rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
86 trunc_page(addr), round_page(addr+len-1), prot);
175f072e
KM
87 return(rv == TRUE);
88}
89
90#ifdef KGDB
91/*
9dd0b816 92 * Change protections on kernel pages from addr to addr+len
175f072e
KM
93 * (presumably so debugger can plant a breakpoint).
94 * All addresses are assumed to reside in the Sysmap,
95 */
96chgkprot(addr, len, rw)
97 register caddr_t addr;
98 int len, rw;
99{
100 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
101
102 vm_map_protect(kernel_map, trunc_page(addr),
103 round_page(addr+len-1), prot, FALSE);
104}
105#endif
106
107vslock(addr, len)
108 caddr_t addr;
109 u_int len;
110{
ed5c84ba 111 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
175f072e
KM
112 round_page(addr+len-1), FALSE);
113}
114
115vsunlock(addr, len, dirtied)
116 caddr_t addr;
117 u_int len;
118 int dirtied;
119{
120#ifdef lint
121 dirtied++;
122#endif lint
ed5c84ba 123 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
175f072e
KM
124 round_page(addr+len-1), TRUE);
125}
126
9dd0b816
MK
127/*
128 * Implement fork's actions on an address space.
129 * Here we arrange for the address space to be copied or referenced,
130 * allocate a user struct (pcb and kernel stack), then call the
131 * machine-dependent layer to fill those in and make the new process
132 * ready to run.
133 * NOTE: the kernel stack may be at a different location in the child
134 * process, and thus addresses of automatic variables may be invalid
135 * after cpu_fork returns in the child process. We do nothing here
136 * after cpu_fork returns.
137 */
ed5c84ba
MK
138vm_fork(p1, p2, isvfork)
139 register struct proc *p1, *p2;
175f072e
KM
140 int isvfork;
141{
142 register struct user *up;
143 vm_offset_t addr;
175f072e 144
1e1f624c
WN
145#ifdef i386
146 /*
147 * avoid copying any of the parent's pagetables or other per-process
148 * objects that reside in the map by marking all of them non-inheritable
149 */
150 (void)vm_map_inherit(&p1->p_vmspace->vm_map,
151 UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
152#endif
ed5c84ba
MK
153 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
154
155#ifdef SYSVSHM
156 if (p1->p_vmspace->vm_shm)
157 shmfork(p1, p2, isvfork);
175f072e 158#endif
ed5c84ba 159
cb5fb9b0 160#ifndef i386
175f072e 161 /*
9dd0b816 162 * Allocate a wired-down (for now) pcb and kernel stack for the process
175f072e 163 */
9dd0b816
MK
164 addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
165 vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
cb5fb9b0
WN
166#else
167/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
168and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
169not yet clear, yet it does... */
170 addr = kmem_alloc(kernel_map, ctob(UPAGES));
171#endif
175f072e 172 up = (struct user *)addr;
9dd0b816 173 p2->p_addr = up;
175f072e 174
ed5c84ba
MK
175 /*
176 * p_stats and p_sigacts currently point at fields
177 * in the user struct but not at &u, instead at p_addr.
9dd0b816
MK
178 * Copy p_sigacts and parts of p_stats; zero the rest
179 * of p_stats (statistics).
175f072e 180 */
9dd0b816
MK
181 p2->p_stats = &up->u_stats;
182 p2->p_sigacts = &up->u_sigacts;
183 up->u_sigacts = *p1->p_sigacts;
184 bzero(&up->u_stats.pstat_startzero,
185 (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
186 (caddr_t)&up->u_stats.pstat_startzero));
187 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
188 ((caddr_t)&up->u_stats.pstat_endcopy -
189 (caddr_t)&up->u_stats.pstat_startcopy));
175f072e 190
165f38d6 191#ifdef i386
165f38d6
MH
192 { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
193
194 vp = &p2->p_vmspace->vm_map;
cb5fb9b0 195 (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
165f38d6
MH
196 (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
197 (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
198 }
199#endif
175f072e 200 /*
9dd0b816
MK
201 * cpu_fork will copy and update the kernel stack and pcb,
202 * and make the child ready to run. It marks the child
203 * so that it can return differently than the parent.
204 * It returns twice, once in the parent process and
205 * once in the child.
175f072e 206 */
9dd0b816 207 return (cpu_fork(p1, p2));
175f072e
KM
208}
209
210/*
ed5c84ba
MK
211 * Set default limits for VM system.
212 * Called for proc 0, and then inherited by all others.
175f072e 213 */
ed5c84ba
MK
214vm_init_limits(p)
215 register struct proc *p;
175f072e 216{
ed5c84ba 217
175f072e
KM
218 /*
219 * Set up the initial limits on process VM.
220 * Set the maximum resident set size to be all
221 * of (reasonably) available memory. This causes
222 * any single, large process to start random page
223 * replacement once it fills memory.
224 */
ed5c84ba
MK
225 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
226 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
227 p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
228 p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
229 p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max =
01733b29 230 ptoa(cnt.v_free_count);
175f072e
KM
231}
232
233#include "../vm/vm_pageout.h"
234
235#ifdef DEBUG
236int enableswap = 1;
237int swapdebug = 0;
238#define SDB_FOLLOW 1
239#define SDB_SWAPIN 2
240#define SDB_SWAPOUT 4
241#endif
242
243/*
244 * Brutally simple:
245 * 1. Attempt to swapin every swaped-out, runnable process in
246 * order of priority.
247 * 2. If not enough memory, wake the pageout daemon and let it
248 * clear some space.
249 */
250sched()
251{
ed5c84ba
MK
252 register struct proc *p;
253 register int pri;
254 struct proc *pp;
255 int ppri;
175f072e
KM
256 vm_offset_t addr;
257 vm_size_t size;
258
259loop:
260#ifdef DEBUG
261 if (!enableswap) {
ed5c84ba 262 pp = NULL;
175f072e
KM
263 goto noswap;
264 }
265#endif
ed5c84ba
MK
266 pp = NULL;
267 ppri = INT_MIN;
268 for (p = allproc; p != NULL; p = p->p_nxt)
269 if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
270 pri = p->p_time + p->p_slptime - p->p_nice * 8;
271 if (pri > ppri) {
272 pp = p;
273 ppri = pri;
175f072e
KM
274 }
275 }
276#ifdef DEBUG
277 if (swapdebug & SDB_FOLLOW)
ed5c84ba 278 printf("sched: running, procp %x pri %d\n", pp, ppri);
175f072e
KM
279noswap:
280#endif
281 /*
282 * Nothing to do, back to sleep
283 */
ed5c84ba
MK
284 if ((p = pp) == NULL) {
285 sleep((caddr_t)&proc0, PVM);
175f072e
KM
286 goto loop;
287 }
ed5c84ba 288
175f072e
KM
289 /*
290 * We would like to bring someone in.
291 * This part is really bogus cuz we could deadlock on memory
292 * despite our feeble check.
293 */
294 size = round_page(ctob(UPAGES));
ed5c84ba 295 addr = (vm_offset_t) p->p_addr;
01733b29 296 if (cnt.v_free_count > atop(size)) {
175f072e
KM
297#ifdef DEBUG
298 if (swapdebug & SDB_SWAPIN)
299 printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
ed5c84ba 300 p->p_pid, p->p_comm, p->p_addr,
01733b29 301 ppri, cnt.v_free_count);
175f072e
KM
302#endif
303 vm_map_pageable(kernel_map, addr, addr+size, FALSE);
304 (void) splclock();
ed5c84ba
MK
305 if (p->p_stat == SRUN)
306 setrq(p);
307 p->p_flag |= SLOAD;
175f072e 308 (void) spl0();
ed5c84ba 309 p->p_time = 0;
175f072e
KM
310 goto loop;
311 }
312 /*
313 * Not enough memory, jab the pageout daemon and wait til the
314 * coast is clear.
315 */
316#ifdef DEBUG
317 if (swapdebug & SDB_FOLLOW)
318 printf("sched: no room for pid %d(%s), free %d\n",
01733b29 319 p->p_pid, p->p_comm, cnt.v_free_count);
175f072e
KM
320#endif
321 (void) splhigh();
322 VM_WAIT;
323 (void) spl0();
324#ifdef DEBUG
325 if (swapdebug & SDB_FOLLOW)
01733b29 326 printf("sched: room again, free %d\n", cnt.v_free_count);
175f072e
KM
327#endif
328 goto loop;
329}
330
331#define swappable(p) \
ed5c84ba 332 (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
175f072e
KM
333
334/*
335 * Swapout is driven by the pageout daemon. Very simple, we find eligible
336 * procs and unwire their u-areas. We try to always "swap" at least one
337 * process in case we need the room for a swapin.
ed5c84ba
MK
338 * If any procs have been sleeping/stopped for at least maxslp seconds,
339 * they are swapped. Else, we swap the longest-sleeping or stopped process,
340 * if any, otherwise the longest-resident process.
175f072e
KM
341 */
342swapout_threads()
343{
ed5c84ba 344 register struct proc *p;
175f072e
KM
345 struct proc *outp, *outp2;
346 int outpri, outpri2;
347 int didswap = 0;
348 extern int maxslp;
349
350#ifdef DEBUG
351 if (!enableswap)
352 return;
353#endif
354 outp = outp2 = NULL;
ed5c84ba
MK
355 outpri = outpri2 = 0;
356 for (p = allproc; p != NULL; p = p->p_nxt) {
357 if (!swappable(p))
175f072e 358 continue;
ed5c84ba 359 switch (p->p_stat) {
175f072e 360 case SRUN:
ed5c84ba
MK
361 if (p->p_time > outpri2) {
362 outp2 = p;
363 outpri2 = p->p_time;
175f072e
KM
364 }
365 continue;
366
367 case SSLEEP:
368 case SSTOP:
ed5c84ba
MK
369 if (p->p_slptime > maxslp) {
370 swapout(p);
175f072e 371 didswap++;
ed5c84ba
MK
372 } else if (p->p_slptime > outpri) {
373 outp = p;
374 outpri = p->p_slptime;
175f072e
KM
375 }
376 continue;
377 }
378 }
379 /*
380 * If we didn't get rid of any real duds, toss out the next most
381 * likely sleeping/stopped or running candidate. We only do this
382 * if we are real low on memory since we don't gain much by doing
383 * it (UPAGES pages).
384 */
385 if (didswap == 0 &&
01733b29 386 cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
ed5c84ba
MK
387 if ((p = outp) == 0)
388 p = outp2;
175f072e
KM
389#ifdef DEBUG
390 if (swapdebug & SDB_SWAPOUT)
ed5c84ba 391 printf("swapout_threads: no duds, try procp %x\n", p);
175f072e 392#endif
ed5c84ba
MK
393 if (p)
394 swapout(p);
175f072e
KM
395 }
396}
397
398swapout(p)
399 register struct proc *p;
400{
401 vm_offset_t addr;
402 vm_size_t size;
403
404#ifdef DEBUG
405 if (swapdebug & SDB_SWAPOUT)
406 printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
407 p->p_pid, p->p_comm, p->p_addr, p->p_stat,
01733b29 408 p->p_slptime, cnt.v_free_count);
175f072e
KM
409#endif
410 size = round_page(ctob(UPAGES));
411 addr = (vm_offset_t) p->p_addr;
165f38d6
MH
412#ifdef hp300
413 /*
414 * Ugh! u-area is double mapped to a fixed address behind the
415 * back of the VM system and accesses are usually through that
416 * address rather than the per-process address. Hence reference
417 * and modify information are recorded at the fixed address and
418 * lost at context switch time. We assume the u-struct and
419 * kernel stack are always accessed/modified and force it to be so.
420 */
421 {
422 register int i;
423 volatile long tmp;
424
425 for (i = 0; i < UPAGES; i++) {
426 tmp = *(long *)addr; *(long *)addr = tmp;
427 addr += NBPG;
428 }
429 addr = (vm_offset_t) p->p_addr;
430 }
431#endif
5f446058
MT
432#ifdef mips
433 /*
434 * Be sure to save the floating point coprocessor state before
435 * paging out the u-struct.
436 */
437 {
438 extern struct proc *machFPCurProcPtr;
439
440 if (p == machFPCurProcPtr) {
441 MachSaveCurFPState(p);
442 machFPCurProcPtr = (struct proc *)0;
443 }
444 }
445#endif
cb5fb9b0 446#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
175f072e 447 vm_map_pageable(kernel_map, addr, addr+size, TRUE);
ed5c84ba 448 pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
cb5fb9b0 449#endif
175f072e
KM
450 (void) splhigh();
451 p->p_flag &= ~SLOAD;
452 if (p->p_stat == SRUN)
453 remrq(p);
454 (void) spl0();
455 p->p_time = 0;
456}
457
458/*
459 * The rest of these routines fake thread handling
460 */
461
462void
463assert_wait(event, ruptible)
464 int event;
465 boolean_t ruptible;
466{
467#ifdef lint
468 ruptible++;
469#endif
ed5c84ba 470 curproc->p_thread = event;
175f072e
KM
471}
472
473void
474thread_block()
475{
476 int s = splhigh();
477
ed5c84ba
MK
478 if (curproc->p_thread)
479 sleep((caddr_t)curproc->p_thread, PVM);
175f072e
KM
480 splx(s);
481}
482
175f072e
KM
483thread_sleep(event, lock, ruptible)
484 int event;
485 simple_lock_t lock;
486 boolean_t ruptible;
487{
488#ifdef lint
489 ruptible++;
490#endif
491 int s = splhigh();
492
ed5c84ba 493 curproc->p_thread = event;
175f072e 494 simple_unlock(lock);
ed5c84ba
MK
495 if (curproc->p_thread)
496 sleep((caddr_t)event, PVM);
175f072e
KM
497 splx(s);
498}
499
175f072e
KM
500thread_wakeup(event)
501 int event;
502{
503 int s = splhigh();
504
505 wakeup((caddr_t)event);
506 splx(s);
507}
508
509/*
510 * DEBUG stuff
511 */
512
513int indent = 0;
514
515/*ARGSUSED2*/
516iprintf(a, b, c, d, e, f, g, h)
517 char *a;
518{
519 register int i;
520
165f38d6
MH
521 i = indent;
522 while (i >= 8) {
523 printf("\t");
524 i -= 8;
175f072e 525 }
165f38d6
MH
526 for (; i > 0; --i)
527 printf(" ");
175f072e
KM
528 printf(a, b, c, d, e, f, g, h);
529}