unmap (user) address space early enough to allow sleep (if releasing
[unix-history] / usr / src / sys / kern / kern_synch.c
CommitLineData
f406ae69
KB
1/*-
2 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * %sccs.include.redist.c%
da7c5cc6 7 *
f406ae69 8 * @(#)kern_synch.c 7.17 (Berkeley) %G%
da7c5cc6 9 */
961945a8 10
94368568
JB
11#include "param.h"
12#include "systm.h"
94368568 13#include "proc.h"
94368568
JB
14#include "kernel.h"
15#include "buf.h"
c081e302
MK
16#include "signalvar.h"
17#include "resourcevar.h"
1edb1cf8 18
132d8a6d 19#include "machine/cpu.h"
9db58063 20
70ca6a82
MK
21u_char curpri; /* usrpri of curproc */
22
1edb1cf8
BJ
23/*
24 * Force switch among equal priority processes every 100ms.
25 */
26roundrobin()
27{
28
132d8a6d 29 need_resched();
b32450f4 30 timeout(roundrobin, (caddr_t)0, hz / 10);
1edb1cf8
BJ
31}
32
d048c9b6
KM
33/*
34 * constants for digital decay and forget
35 * 90% of (p_cpu) usage in 5*loadav time
36 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
37 * Note that, as ps(1) mentions, this can let percentages
38 * total over 100% (I've seen 137.9% for 3 processes).
39 *
40 * Note that hardclock updates p_cpu and p_cpticks independently.
41 *
42 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
43 * That is, the system wants to compute a value of decay such
44 * that the following for loop:
45 * for (i = 0; i < (5 * loadavg); i++)
46 * p_cpu *= decay;
47 * will compute
48 * p_cpu *= 0.1;
49 * for all values of loadavg:
50 *
51 * Mathematically this loop can be expressed by saying:
52 * decay ** (5 * loadavg) ~= .1
53 *
54 * The system computes decay as:
55 * decay = (2 * loadavg) / (2 * loadavg + 1)
56 *
57 * We wish to prove that the system's computation of decay
58 * will always fulfill the equation:
59 * decay ** (5 * loadavg) ~= .1
60 *
61 * If we compute b as:
62 * b = 2 * loadavg
63 * then
64 * decay = b / (b + 1)
65 *
66 * We now need to prove two things:
67 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
68 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
69 *
70 * Facts:
71 * For x close to zero, exp(x) =~ 1 + x, since
72 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
73 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
74 * For x close to zero, ln(1+x) =~ x, since
75 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
76 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
77 * ln(.1) =~ -2.30
78 *
79 * Proof of (1):
80 * Solve (factor)**(power) =~ .1 given power (5*loadav):
81 * solving for factor,
82 * ln(factor) =~ (-2.30/5*loadav), or
132d8a6d 83 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
d048c9b6
KM
84 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
85 *
86 * Proof of (2):
87 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
88 * solving for power,
89 * power*ln(b/(b+1)) =~ -2.30, or
90 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
91 *
92 * Actual power values for the implemented algorithm are as follows:
93 * loadav: 1 2 3 4
94 * power: 5.68 10.32 14.94 19.55
95 */
1e35e051 96
80b6b780 97/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
132d8a6d
MK
98#define loadfactor(loadav) (2 * (loadav))
99#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
80b6b780
KM
100
101/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
102fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
103
104/*
105 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
106 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
107 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
108 *
109 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
110 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
111 *
112 * If you dont want to bother with the faster/more-accurate formula, you
113 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
114 * (more general) method of calculating the %age of CPU used by a process.
115 */
116#define CCPU_SHIFT 11
1edb1cf8 117
1edb1cf8
BJ
118/*
119 * Recompute process priorities, once a second
120 */
121schedcpu()
122{
132d8a6d 123 register fixpt_t loadfac = loadfactor(averunnable[0]);
1edb1cf8 124 register struct proc *p;
132d8a6d
MK
125 register int s;
126 register unsigned int newcpu;
1edb1cf8 127
1edb1cf8 128 wakeup((caddr_t)&lbolt);
1d348849 129 for (p = allproc; p != NULL; p = p->p_nxt) {
132d8a6d
MK
130 /*
131 * Increment time in/out of memory and sleep time
132 * (if sleeping). We ignore overflow; with 16-bit int's
133 * (remember them?) overflow takes 45 days.
134 */
135 p->p_time++;
136 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
137 p->p_slptime++;
80b6b780 138 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1e35e051
MK
139 /*
140 * If the process has slept the entire second,
141 * stop recalculating its priority until it wakes up.
142 */
80b6b780 143 if (p->p_slptime > 1)
1e35e051 144 continue;
1e35e051
MK
145 /*
146 * p_pctcpu is only for ps.
147 */
80b6b780
KM
148#if (FSHIFT >= CCPU_SHIFT)
149 p->p_pctcpu += (hz == 100)?
150 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
151 100 * (((fixpt_t) p->p_cpticks)
152 << (FSHIFT - CCPU_SHIFT)) / hz;
153#else
154 p->p_pctcpu += ((FSCALE - ccpu) *
155 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
156#endif
1edb1cf8 157 p->p_cpticks = 0;
132d8a6d
MK
158 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice;
159 p->p_cpu = min(newcpu, UCHAR_MAX);
160 setpri(p);
1e35e051 161 s = splhigh(); /* prevent state changes */
1edb1cf8 162 if (p->p_pri >= PUSER) {
132d8a6d 163#define PPQ (128 / NQS) /* priorities per queue */
c081e302 164 if ((p != curproc) &&
1edb1cf8
BJ
165 p->p_stat == SRUN &&
166 (p->p_flag & SLOAD) &&
fab25db3 167 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
1edb1cf8
BJ
168 remrq(p);
169 p->p_pri = p->p_usrpri;
170 setrq(p);
171 } else
172 p->p_pri = p->p_usrpri;
173 }
174 splx(s);
175 }
176 vmmeter();
1edb1cf8 177 if (bclnlist != NULL)
132d8a6d 178 wakeup((caddr_t)pageproc);
b32450f4 179 timeout(schedcpu, (caddr_t)0, hz);
1edb1cf8 180}
a379cce8 181
1e35e051
MK
182/*
183 * Recalculate the priority of a process after it has slept for a while.
132d8a6d
MK
184 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least
185 * six times the loadfactor will decay p_cpu to zero.
1e35e051
MK
186 */
187updatepri(p)
188 register struct proc *p;
189{
132d8a6d
MK
190 register unsigned int newcpu = p->p_cpu;
191 register fixpt_t loadfac = loadfactor(averunnable[0]);
192
193 if (p->p_slptime > 5 * loadfac)
194 p->p_cpu = 0;
195 else {
196 p->p_slptime--; /* the first time was done in schedcpu */
197 while (newcpu && --p->p_slptime)
198 newcpu = (int) decay_cpu(loadfac, newcpu);
199 p->p_cpu = min(newcpu, UCHAR_MAX);
200 }
201 setpri(p);
1e35e051
MK
202}
203
a379cce8
BJ
204#define SQSIZE 0100 /* Must be power of 2 */
205#define HASH(x) (( (int) x >> 5) & (SQSIZE-1))
3abb418a
KM
206struct slpque {
207 struct proc *sq_head;
208 struct proc **sq_tailp;
209} slpque[SQSIZE];
a379cce8 210
ffa9c89a
MK
211/*
212 * During autoconfiguration or after a panic, a sleep will simply
213 * lower the priority briefly to allow interrupts, then return.
214 * The priority to be used (safepri) is machine-dependent, thus this
215 * value is initialized and maintained in the machine-dependent layers.
216 * This priority will typically be 0, or the lowest priority
217 * that is safe for use on the interrupt stack; it can be made
218 * higher to block network software interrupts after panics.
219 */
220int safepri;
221
a379cce8 222/*
25667a4a
MK
223 * General sleep call.
224 * Suspends current process until a wakeup is made on chan.
225 * The process will then be made runnable with priority pri.
226 * Sleeps at most timo/hz seconds (0 means no timeout).
227 * If pri includes PCATCH flag, signals are checked
228 * before and after sleeping, else signals are not checked.
229 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
230 * If PCATCH is set and a signal needs to be delivered,
231 * ERESTART is returned if the current system call should be restarted
232 * if possible, and EINTR is returned if the system call should
233 * be interrupted by the signal (return EINTR).
a379cce8 234 */
25667a4a 235tsleep(chan, pri, wmesg, timo)
67e9a600
MT
236 caddr_t chan;
237 int pri;
238 char *wmesg;
239 int timo;
240{
c081e302 241 register struct proc *p = curproc;
67e9a600
MT
242 register struct slpque *qp;
243 register s;
25667a4a 244 int sig, catch = pri & PCATCH;
67e9a600
MT
245 extern int cold;
246 int endtsleep();
247
67e9a600
MT
248 s = splhigh();
249 if (cold || panicstr) {
250 /*
251 * After a panic, or during autoconfiguration,
252 * just give interrupts a chance, then just return;
253 * don't run any other procs or panic below,
254 * in case this is the idle process and already asleep.
67e9a600 255 */
ffa9c89a 256 splx(safepri);
67e9a600
MT
257 splx(s);
258 return (0);
259 }
260#ifdef DIAGNOSTIC
132d8a6d 261 if (chan == 0 || p->p_stat != SRUN || p->p_rlink)
25667a4a 262 panic("tsleep");
67e9a600 263#endif
132d8a6d
MK
264 p->p_wchan = chan;
265 p->p_wmesg = wmesg;
266 p->p_slptime = 0;
267 p->p_pri = pri & PRIMASK;
67e9a600
MT
268 qp = &slpque[HASH(chan)];
269 if (qp->sq_head == 0)
132d8a6d 270 qp->sq_head = p;
67e9a600 271 else
132d8a6d
MK
272 *qp->sq_tailp = p;
273 *(qp->sq_tailp = &p->p_link) = 0;
ffa9c89a 274 if (timo)
132d8a6d 275 timeout(endtsleep, (caddr_t)p, timo);
67e9a600 276 /*
132d8a6d
MK
277 * We put ourselves on the sleep queue and start our timeout
278 * before calling CURSIG, as we could stop there, and a wakeup
279 * or a SIGCONT (or both) could occur while we were stopped.
ffa9c89a
MK
280 * A SIGCONT would cause us to be marked as SSLEEP
281 * without resuming us, thus we must be ready for sleep
282 * when CURSIG is called. If the wakeup happens while we're
132d8a6d 283 * stopped, p->p_wchan will be 0 upon return from CURSIG.
67e9a600 284 */
25667a4a 285 if (catch) {
132d8a6d
MK
286 p->p_flag |= SSINTR;
287 if (sig = CURSIG(p)) {
288 if (p->p_wchan)
289 unsleep(p);
290 p->p_stat = SRUN;
ffa9c89a 291 goto resume;
25667a4a 292 }
132d8a6d 293 if (p->p_wchan == 0) {
ffa9c89a
MK
294 catch = 0;
295 goto resume;
25667a4a 296 }
67e9a600 297 }
132d8a6d 298 p->p_stat = SSLEEP;
67e9a600 299 (void) spl0();
132d8a6d 300 p->p_stats->p_ru.ru_nvcsw++;
67e9a600 301 swtch();
ffa9c89a 302resume:
132d8a6d 303 curpri = p->p_usrpri;
67e9a600 304 splx(s);
132d8a6d
MK
305 p->p_flag &= ~SSINTR;
306 if (p->p_flag & STIMO) {
307 p->p_flag &= ~STIMO;
ffa9c89a
MK
308 if (catch == 0 || sig == 0)
309 return (EWOULDBLOCK);
310 } else if (timo)
132d8a6d
MK
311 untimeout(endtsleep, (caddr_t)p);
312 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
313 if (p->p_sigacts->ps_sigintr & sigmask(sig))
25667a4a
MK
314 return (EINTR);
315 return (ERESTART);
316 }
67e9a600
MT
317 return (0);
318}
319
320/*
321 * Implement timeout for tsleep.
322 * If process hasn't been awakened (wchan non-zero),
323 * set timeout flag and undo the sleep. If proc
324 * is stopped, just unsleep so it will remain stopped.
325 */
326endtsleep(p)
327 register struct proc *p;
328{
329 int s = splhigh();
330
331 if (p->p_wchan) {
332 if (p->p_stat == SSLEEP)
333 setrun(p);
334 else
335 unsleep(p);
336 p->p_flag |= STIMO;
337 }
338 splx(s);
339}
340
25667a4a
MK
341/*
342 * Short-term, non-interruptable sleep.
343 */
a379cce8 344sleep(chan, pri)
bd76c595
BJ
345 caddr_t chan;
346 int pri;
a379cce8 347{
c081e302 348 register struct proc *p = curproc;
3abb418a 349 register struct slpque *qp;
6fdc0335 350 register s;
79a4402e 351 extern int cold;
a379cce8 352
25667a4a
MK
353#ifdef DIAGNOSTIC
354 if (pri > PZERO) {
355 printf("sleep called with pri %d > PZERO, wchan: %x\n",
356 pri, chan);
357 panic("old sleep");
358 }
359#endif
1e35e051 360 s = splhigh();
79a4402e 361 if (cold || panicstr) {
76acd871 362 /*
79a4402e
MK
363 * After a panic, or during autoconfiguration,
364 * just give interrupts a chance, then just return;
365 * don't run any other procs or panic below,
366 * in case this is the idle process and already asleep.
76acd871 367 */
ffa9c89a 368 splx(safepri);
76acd871
MK
369 splx(s);
370 return;
371 }
67e9a600 372#ifdef DIAGNOSTIC
132d8a6d 373 if (chan==0 || p->p_stat != SRUN || p->p_rlink)
a379cce8 374 panic("sleep");
67e9a600 375#endif
132d8a6d
MK
376 p->p_wchan = chan;
377 p->p_wmesg = NULL;
378 p->p_slptime = 0;
379 p->p_pri = pri;
3abb418a
KM
380 qp = &slpque[HASH(chan)];
381 if (qp->sq_head == 0)
132d8a6d 382 qp->sq_head = p;
3abb418a 383 else
132d8a6d
MK
384 *qp->sq_tailp = p;
385 *(qp->sq_tailp = &p->p_link) = 0;
386 p->p_stat = SSLEEP;
25667a4a 387 (void) spl0();
132d8a6d 388 p->p_stats->p_ru.ru_nvcsw++;
25667a4a 389 swtch();
132d8a6d 390 curpri = p->p_usrpri;
a379cce8 391 splx(s);
a379cce8
BJ
392}
393
87d0f32e
BJ
394/*
395 * Remove a process from its wait queue
396 */
397unsleep(p)
18a4549b 398 register struct proc *p;
87d0f32e 399{
3abb418a 400 register struct slpque *qp;
87d0f32e 401 register struct proc **hp;
3abb418a 402 int s;
87d0f32e 403
1e35e051 404 s = splhigh();
87d0f32e 405 if (p->p_wchan) {
3abb418a 406 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
87d0f32e
BJ
407 while (*hp != p)
408 hp = &(*hp)->p_link;
409 *hp = p->p_link;
3abb418a
KM
410 if (qp->sq_tailp == &p->p_link)
411 qp->sq_tailp = hp;
87d0f32e
BJ
412 p->p_wchan = 0;
413 }
414 splx(s);
415}
416
a379cce8 417/*
132d8a6d
MK
418 * Wakeup on "chan"; set all processes
419 * sleeping on chan to run state.
a379cce8
BJ
420 */
421wakeup(chan)
18a4549b 422 register caddr_t chan;
a379cce8 423{
3abb418a
KM
424 register struct slpque *qp;
425 register struct proc *p, **q;
a379cce8
BJ
426 int s;
427
1e35e051 428 s = splhigh();
3abb418a 429 qp = &slpque[HASH(chan)];
a379cce8 430restart:
3abb418a 431 for (q = &qp->sq_head; p = *q; ) {
67e9a600 432#ifdef DIAGNOSTIC
87d0f32e 433 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
a379cce8 434 panic("wakeup");
67e9a600 435#endif
132d8a6d 436 if (p->p_wchan == chan) {
a379cce8 437 p->p_wchan = 0;
e5df4be8 438 *q = p->p_link;
3abb418a
KM
439 if (qp->sq_tailp == &p->p_link)
440 qp->sq_tailp = q;
87d0f32e
BJ
441 if (p->p_stat == SSLEEP) {
442 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */
6f414c22
MK
443 if (p->p_slptime > 1)
444 updatepri(p);
132d8a6d 445 p->p_slptime = 0;
87d0f32e 446 p->p_stat = SRUN;
c74c8a79 447 if (p->p_flag & SLOAD)
87d0f32e 448 setrq(p);
fab25db3
MK
449 /*
450 * Since curpri is a usrpri,
451 * p->p_pri is always better than curpri.
452 */
132d8a6d
MK
453 if ((p->p_flag&SLOAD) == 0)
454 wakeup((caddr_t)&proc0);
455 else
456 need_resched();
87d0f32e 457 /* END INLINE EXPANSION */
e5df4be8 458 goto restart;
a379cce8 459 }
e5df4be8
BJ
460 } else
461 q = &p->p_link;
a379cce8
BJ
462 }
463 splx(s);
464}
465
a379cce8
BJ
466/*
467 * Initialize the (doubly-linked) run queues
468 * to be empty.
469 */
470rqinit()
471{
472 register int i;
473
474 for (i = 0; i < NQS; i++)
475 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
476}
a379cce8
BJ
477
478/*
132d8a6d
MK
479 * Change process state to be runnable,
480 * placing it on the run queue if it is in memory,
481 * and awakening the swapper if it isn't in memory.
a379cce8
BJ
482 */
483setrun(p)
18a4549b 484 register struct proc *p;
a379cce8 485{
18a4549b 486 register int s;
a379cce8 487
1e35e051 488 s = splhigh();
a379cce8
BJ
489 switch (p->p_stat) {
490
491 case 0:
492 case SWAIT:
493 case SRUN:
494 case SZOMB:
495 default:
496 panic("setrun");
497
6fdc0335 498 case SSTOP:
a379cce8 499 case SSLEEP:
87d0f32e 500 unsleep(p); /* e.g. when sending signals */
a379cce8
BJ
501 break;
502
503 case SIDL:
a379cce8
BJ
504 break;
505 }
506 p->p_stat = SRUN;
507 if (p->p_flag & SLOAD)
508 setrq(p);
509 splx(s);
27bc21f7
MK
510 if (p->p_slptime > 1)
511 updatepri(p);
132d8a6d
MK
512 p->p_slptime = 0;
513 if ((p->p_flag&SLOAD) == 0)
514 wakeup((caddr_t)&proc0);
515 else if (p->p_pri < curpri)
516 need_resched();
a379cce8
BJ
517}
518
519/*
132d8a6d
MK
520 * Compute priority of process when running in user mode.
521 * Arrange to reschedule if the resulting priority
522 * is better than that of the current process.
a379cce8 523 */
132d8a6d
MK
524setpri(p)
525 register struct proc *p;
a379cce8 526{
132d8a6d
MK
527 register unsigned int newpri;
528
529 newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice;
530 newpri = min(newpri, MAXPRI);
531 p->p_usrpri = newpri;
532 if (newpri < curpri)
533 need_resched();
a379cce8 534}