hack chgkprot to attempt to allow writing ktext without forcing
[unix-history] / usr / src / sys / vm / vm_glue.c
CommitLineData
175f072e 1/*
175f072e
KM
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
0e24ad83 8 * %sccs.include.redist.c%
175f072e 9 *
11944c92 10 * @(#)vm_glue.c 7.19 (Berkeley) %G%
0e24ad83
KM
11 *
12 *
13 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14 * All rights reserved.
15 *
16 * Permission to use, copy, modify and distribute this software and
17 * its documentation is hereby granted, provided that both the copyright
18 * notice and this permission notice appear in all copies of the
19 * software, derivative works or modified versions, and any portions
20 * thereof, and that both notices appear in supporting documentation.
21 *
22 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25 *
26 * Carnegie Mellon requests users of this software to return to
27 *
28 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
29 * School of Computer Science
30 * Carnegie Mellon University
31 * Pittsburgh PA 15213-3890
32 *
33 * any improvements or extensions that they make and grant Carnegie the
34 * rights to redistribute these changes.
175f072e
KM
35 */
36
e3a67891
KB
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/resourcevar.h>
41#include <sys/buf.h>
42#include <sys/user.h>
175f072e 43
e3a67891
KB
44#include <vm/vm.h>
45#include <vm/vm_page.h>
46#include <vm/vm_kern.h>
175f072e
KM
47
48int avefree = 0; /* XXX */
49unsigned maxdmap = MAXDSIZ; /* XXX */
c3385412 50int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
175f072e 51
e3a67891 52int
175f072e
KM
53kernacc(addr, len, rw)
54 caddr_t addr;
55 int len, rw;
56{
57 boolean_t rv;
165f38d6 58 vm_offset_t saddr, eaddr;
175f072e
KM
59 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60
165f38d6
MH
61 saddr = trunc_page(addr);
62 eaddr = round_page(addr+len-1);
63 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 /*
65 * XXX there are still some things (e.g. the buffer cache) that
66 * are managed behind the VM system's back so even though an
67 * address is accessible in the mind of the VM system, there may
68 * not be physical pages where the VM thinks there is. This can
69 * lead to bogus allocation of pages in the kernel address space
70 * or worse, inconsistencies at the pmap level. We only worry
71 * about the buffer cache for now.
72 */
c3385412 73 if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
db341dbf 74 saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
165f38d6 75 rv = FALSE;
175f072e
KM
76 return(rv == TRUE);
77}
78
e3a67891 79int
175f072e
KM
80useracc(addr, len, rw)
81 caddr_t addr;
82 int len, rw;
83{
84 boolean_t rv;
85 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86
ed5c84ba
MK
87 rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
88 trunc_page(addr), round_page(addr+len-1), prot);
175f072e
KM
89 return(rv == TRUE);
90}
91
92#ifdef KGDB
93/*
9dd0b816 94 * Change protections on kernel pages from addr to addr+len
175f072e 95 * (presumably so debugger can plant a breakpoint).
11944c92
MH
96 *
97 * We force the protection change at the pmap level. If we were
98 * to use vm_map_protect a change to allow writing would be lazily-
99 * applied meaning we would still take a protection fault, something
100 * we really don't want to do. It would also fragment the kernel
101 * map unnecessarily. We cannot use pmap_protect since it also won't
102 * enforce a write-enable request. Using pmap_enter is the only way
103 * we can ensure the change takes place properly.
175f072e 104 */
e3a67891 105void
175f072e
KM
106chgkprot(addr, len, rw)
107 register caddr_t addr;
108 int len, rw;
109{
11944c92
MH
110 vm_prot_t prot;
111 vm_offset_t pa, sva, eva;
175f072e 112
11944c92
MH
113 prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
114 eva = round_page(addr + len - 1);
115 for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
116 /*
117 * Extract physical address for the page.
118 * We use a cheezy hack to differentiate physical
119 * page 0 from an invalid mapping, not that it
120 * really matters...
121 */
122 pa = pmap_extract(kernel_pmap, sva|1);
123 if (pa == 0)
124 panic("chgkprot: invalid page");
125 pmap_enter(kernel_pmap, sva, pva&~1, prot, TRUE);
126 }
175f072e
KM
127}
128#endif
129
e3a67891 130void
175f072e
KM
131vslock(addr, len)
132 caddr_t addr;
133 u_int len;
134{
ed5c84ba 135 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
175f072e
KM
136 round_page(addr+len-1), FALSE);
137}
138
e3a67891 139void
175f072e
KM
140vsunlock(addr, len, dirtied)
141 caddr_t addr;
142 u_int len;
143 int dirtied;
144{
145#ifdef lint
146 dirtied++;
147#endif lint
ed5c84ba 148 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
175f072e
KM
149 round_page(addr+len-1), TRUE);
150}
151
9dd0b816
MK
152/*
153 * Implement fork's actions on an address space.
154 * Here we arrange for the address space to be copied or referenced,
155 * allocate a user struct (pcb and kernel stack), then call the
156 * machine-dependent layer to fill those in and make the new process
157 * ready to run.
158 * NOTE: the kernel stack may be at a different location in the child
159 * process, and thus addresses of automatic variables may be invalid
160 * after cpu_fork returns in the child process. We do nothing here
161 * after cpu_fork returns.
162 */
e3a67891 163int
ed5c84ba
MK
164vm_fork(p1, p2, isvfork)
165 register struct proc *p1, *p2;
175f072e
KM
166 int isvfork;
167{
168 register struct user *up;
169 vm_offset_t addr;
175f072e 170
1e1f624c
WN
171#ifdef i386
172 /*
173 * avoid copying any of the parent's pagetables or other per-process
174 * objects that reside in the map by marking all of them non-inheritable
175 */
176 (void)vm_map_inherit(&p1->p_vmspace->vm_map,
177 UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
178#endif
ed5c84ba
MK
179 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
180
181#ifdef SYSVSHM
182 if (p1->p_vmspace->vm_shm)
183 shmfork(p1, p2, isvfork);
175f072e 184#endif
ed5c84ba 185
cb5fb9b0 186#ifndef i386
175f072e 187 /*
9dd0b816 188 * Allocate a wired-down (for now) pcb and kernel stack for the process
175f072e 189 */
9dd0b816
MK
190 addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
191 vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
cb5fb9b0
WN
192#else
193/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
194and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
195not yet clear, yet it does... */
196 addr = kmem_alloc(kernel_map, ctob(UPAGES));
197#endif
175f072e 198 up = (struct user *)addr;
9dd0b816 199 p2->p_addr = up;
175f072e 200
ed5c84ba
MK
201 /*
202 * p_stats and p_sigacts currently point at fields
203 * in the user struct but not at &u, instead at p_addr.
9dd0b816
MK
204 * Copy p_sigacts and parts of p_stats; zero the rest
205 * of p_stats (statistics).
175f072e 206 */
9dd0b816
MK
207 p2->p_stats = &up->u_stats;
208 p2->p_sigacts = &up->u_sigacts;
209 up->u_sigacts = *p1->p_sigacts;
210 bzero(&up->u_stats.pstat_startzero,
211 (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
212 (caddr_t)&up->u_stats.pstat_startzero));
213 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
214 ((caddr_t)&up->u_stats.pstat_endcopy -
215 (caddr_t)&up->u_stats.pstat_startcopy));
175f072e 216
165f38d6 217#ifdef i386
165f38d6
MH
218 { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
219
220 vp = &p2->p_vmspace->vm_map;
cb5fb9b0 221 (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
165f38d6
MH
222 (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
223 (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
224 }
225#endif
175f072e 226 /*
9dd0b816
MK
227 * cpu_fork will copy and update the kernel stack and pcb,
228 * and make the child ready to run. It marks the child
229 * so that it can return differently than the parent.
230 * It returns twice, once in the parent process and
231 * once in the child.
175f072e 232 */
9dd0b816 233 return (cpu_fork(p1, p2));
175f072e
KM
234}
235
236/*
ed5c84ba
MK
237 * Set default limits for VM system.
238 * Called for proc 0, and then inherited by all others.
175f072e 239 */
e3a67891 240void
ed5c84ba
MK
241vm_init_limits(p)
242 register struct proc *p;
175f072e 243{
ed5c84ba 244
175f072e
KM
245 /*
246 * Set up the initial limits on process VM.
247 * Set the maximum resident set size to be all
248 * of (reasonably) available memory. This causes
249 * any single, large process to start random page
250 * replacement once it fills memory.
251 */
ed5c84ba
MK
252 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
253 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
254 p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
255 p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
256 p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max =
01733b29 257 ptoa(cnt.v_free_count);
175f072e
KM
258}
259
5548a02f 260#include <vm/vm_pageout.h>
175f072e
KM
261
262#ifdef DEBUG
263int enableswap = 1;
264int swapdebug = 0;
265#define SDB_FOLLOW 1
266#define SDB_SWAPIN 2
267#define SDB_SWAPOUT 4
268#endif
269
270/*
271 * Brutally simple:
272 * 1. Attempt to swapin every swaped-out, runnable process in
273 * order of priority.
274 * 2. If not enough memory, wake the pageout daemon and let it
275 * clear some space.
276 */
e3a67891 277void
175f072e
KM
278sched()
279{
ed5c84ba
MK
280 register struct proc *p;
281 register int pri;
282 struct proc *pp;
283 int ppri;
175f072e
KM
284 vm_offset_t addr;
285 vm_size_t size;
286
287loop:
288#ifdef DEBUG
aba84174
CT
289 while (!enableswap)
290 sleep((caddr_t)&proc0, PVM);
175f072e 291#endif
ed5c84ba
MK
292 pp = NULL;
293 ppri = INT_MIN;
f32c5c48 294 for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
ed5c84ba
MK
295 if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
296 pri = p->p_time + p->p_slptime - p->p_nice * 8;
297 if (pri > ppri) {
298 pp = p;
299 ppri = pri;
175f072e
KM
300 }
301 }
aba84174 302 }
175f072e
KM
303#ifdef DEBUG
304 if (swapdebug & SDB_FOLLOW)
ed5c84ba 305 printf("sched: running, procp %x pri %d\n", pp, ppri);
175f072e
KM
306#endif
307 /*
308 * Nothing to do, back to sleep
309 */
ed5c84ba
MK
310 if ((p = pp) == NULL) {
311 sleep((caddr_t)&proc0, PVM);
175f072e
KM
312 goto loop;
313 }
ed5c84ba 314
175f072e
KM
315 /*
316 * We would like to bring someone in.
317 * This part is really bogus cuz we could deadlock on memory
318 * despite our feeble check.
319 */
320 size = round_page(ctob(UPAGES));
ed5c84ba 321 addr = (vm_offset_t) p->p_addr;
01733b29 322 if (cnt.v_free_count > atop(size)) {
175f072e
KM
323#ifdef DEBUG
324 if (swapdebug & SDB_SWAPIN)
325 printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
ed5c84ba 326 p->p_pid, p->p_comm, p->p_addr,
01733b29 327 ppri, cnt.v_free_count);
175f072e
KM
328#endif
329 vm_map_pageable(kernel_map, addr, addr+size, FALSE);
f32c5c48 330 (void) splstatclock();
ed5c84ba
MK
331 if (p->p_stat == SRUN)
332 setrq(p);
333 p->p_flag |= SLOAD;
175f072e 334 (void) spl0();
ed5c84ba 335 p->p_time = 0;
175f072e
KM
336 goto loop;
337 }
338 /*
339 * Not enough memory, jab the pageout daemon and wait til the
340 * coast is clear.
341 */
342#ifdef DEBUG
343 if (swapdebug & SDB_FOLLOW)
344 printf("sched: no room for pid %d(%s), free %d\n",
01733b29 345 p->p_pid, p->p_comm, cnt.v_free_count);
175f072e
KM
346#endif
347 (void) splhigh();
348 VM_WAIT;
349 (void) spl0();
350#ifdef DEBUG
351 if (swapdebug & SDB_FOLLOW)
01733b29 352 printf("sched: room again, free %d\n", cnt.v_free_count);
175f072e
KM
353#endif
354 goto loop;
355}
356
357#define swappable(p) \
ed5c84ba 358 (((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
175f072e
KM
359
360/*
361 * Swapout is driven by the pageout daemon. Very simple, we find eligible
362 * procs and unwire their u-areas. We try to always "swap" at least one
363 * process in case we need the room for a swapin.
ed5c84ba
MK
364 * If any procs have been sleeping/stopped for at least maxslp seconds,
365 * they are swapped. Else, we swap the longest-sleeping or stopped process,
366 * if any, otherwise the longest-resident process.
175f072e 367 */
e3a67891 368void
175f072e
KM
369swapout_threads()
370{
ed5c84ba 371 register struct proc *p;
175f072e
KM
372 struct proc *outp, *outp2;
373 int outpri, outpri2;
374 int didswap = 0;
375 extern int maxslp;
376
377#ifdef DEBUG
378 if (!enableswap)
379 return;
380#endif
381 outp = outp2 = NULL;
ed5c84ba 382 outpri = outpri2 = 0;
f32c5c48 383 for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
ed5c84ba 384 if (!swappable(p))
175f072e 385 continue;
ed5c84ba 386 switch (p->p_stat) {
175f072e 387 case SRUN:
ed5c84ba
MK
388 if (p->p_time > outpri2) {
389 outp2 = p;
390 outpri2 = p->p_time;
175f072e
KM
391 }
392 continue;
393
394 case SSLEEP:
395 case SSTOP:
ed5c84ba
MK
396 if (p->p_slptime > maxslp) {
397 swapout(p);
175f072e 398 didswap++;
ed5c84ba
MK
399 } else if (p->p_slptime > outpri) {
400 outp = p;
401 outpri = p->p_slptime;
175f072e
KM
402 }
403 continue;
404 }
405 }
406 /*
407 * If we didn't get rid of any real duds, toss out the next most
408 * likely sleeping/stopped or running candidate. We only do this
409 * if we are real low on memory since we don't gain much by doing
410 * it (UPAGES pages).
411 */
412 if (didswap == 0 &&
01733b29 413 cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
ed5c84ba
MK
414 if ((p = outp) == 0)
415 p = outp2;
175f072e
KM
416#ifdef DEBUG
417 if (swapdebug & SDB_SWAPOUT)
ed5c84ba 418 printf("swapout_threads: no duds, try procp %x\n", p);
175f072e 419#endif
ed5c84ba
MK
420 if (p)
421 swapout(p);
175f072e
KM
422 }
423}
424
e3a67891 425void
175f072e
KM
426swapout(p)
427 register struct proc *p;
428{
429 vm_offset_t addr;
430 vm_size_t size;
431
432#ifdef DEBUG
433 if (swapdebug & SDB_SWAPOUT)
434 printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
435 p->p_pid, p->p_comm, p->p_addr, p->p_stat,
01733b29 436 p->p_slptime, cnt.v_free_count);
175f072e
KM
437#endif
438 size = round_page(ctob(UPAGES));
439 addr = (vm_offset_t) p->p_addr;
7c02c1d3 440#if defined(hp300) || defined(luna68k)
165f38d6
MH
441 /*
442 * Ugh! u-area is double mapped to a fixed address behind the
443 * back of the VM system and accesses are usually through that
444 * address rather than the per-process address. Hence reference
445 * and modify information are recorded at the fixed address and
446 * lost at context switch time. We assume the u-struct and
447 * kernel stack are always accessed/modified and force it to be so.
448 */
449 {
450 register int i;
451 volatile long tmp;
452
453 for (i = 0; i < UPAGES; i++) {
454 tmp = *(long *)addr; *(long *)addr = tmp;
455 addr += NBPG;
456 }
457 addr = (vm_offset_t) p->p_addr;
458 }
459#endif
5f446058
MT
460#ifdef mips
461 /*
462 * Be sure to save the floating point coprocessor state before
463 * paging out the u-struct.
464 */
465 {
466 extern struct proc *machFPCurProcPtr;
467
468 if (p == machFPCurProcPtr) {
469 MachSaveCurFPState(p);
470 machFPCurProcPtr = (struct proc *)0;
471 }
472 }
473#endif
cb5fb9b0 474#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
175f072e 475 vm_map_pageable(kernel_map, addr, addr+size, TRUE);
ed5c84ba 476 pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
cb5fb9b0 477#endif
175f072e
KM
478 (void) splhigh();
479 p->p_flag &= ~SLOAD;
480 if (p->p_stat == SRUN)
481 remrq(p);
482 (void) spl0();
483 p->p_time = 0;
484}
485
486/*
487 * The rest of these routines fake thread handling
488 */
489
490void
491assert_wait(event, ruptible)
492 int event;
493 boolean_t ruptible;
494{
495#ifdef lint
496 ruptible++;
497#endif
ed5c84ba 498 curproc->p_thread = event;
175f072e
KM
499}
500
501void
502thread_block()
503{
504 int s = splhigh();
505
ed5c84ba
MK
506 if (curproc->p_thread)
507 sleep((caddr_t)curproc->p_thread, PVM);
175f072e
KM
508 splx(s);
509}
510
16c815b2 511void
175f072e
KM
512thread_sleep(event, lock, ruptible)
513 int event;
514 simple_lock_t lock;
515 boolean_t ruptible;
516{
517#ifdef lint
518 ruptible++;
519#endif
520 int s = splhigh();
521
ed5c84ba 522 curproc->p_thread = event;
175f072e 523 simple_unlock(lock);
ed5c84ba
MK
524 if (curproc->p_thread)
525 sleep((caddr_t)event, PVM);
175f072e
KM
526 splx(s);
527}
528
16c815b2 529void
175f072e
KM
530thread_wakeup(event)
531 int event;
532{
533 int s = splhigh();
534
535 wakeup((caddr_t)event);
536 splx(s);
537}
538
539/*
540 * DEBUG stuff
541 */
542
543int indent = 0;
544
aba84174
CT
545#include <machine/stdarg.h> /* see subr_prf.c */
546
175f072e 547/*ARGSUSED2*/
e3a67891 548void
aba84174
CT
549#if __STDC__
550iprintf(const char *fmt, ...)
551#else
552iprintf(fmt /* , va_alist */)
553 char *fmt;
554 /* va_dcl */
555#endif
175f072e
KM
556{
557 register int i;
aba84174 558 va_list ap;
175f072e 559
aba84174 560 for (i = indent; i >= 8; i -= 8)
165f38d6 561 printf("\t");
aba84174 562 while (--i >= 0)
165f38d6 563 printf(" ");
aba84174
CT
564 va_start(ap, fmt);
565 printf("%r", fmt, ap);
566 va_end(ap);
175f072e 567}