debugging version
[unix-history] / usr / src / sys / kern / kern_synch.c
CommitLineData
da7c5cc6 1/*
c081e302 2 * Copyright (c) 1982, 1986, 1990, 1991 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
70ca6a82 6 * @(#)kern_synch.c 7.16 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
94368568
JB
9#include "param.h"
10#include "systm.h"
94368568 11#include "proc.h"
94368568
JB
12#include "kernel.h"
13#include "buf.h"
c081e302
MK
14#include "signalvar.h"
15#include "resourcevar.h"
1edb1cf8 16
132d8a6d 17#include "machine/cpu.h"
9db58063 18
70ca6a82
MK
19u_char curpri; /* usrpri of curproc */
20
1edb1cf8
BJ
21/*
22 * Force switch among equal priority processes every 100ms.
23 */
24roundrobin()
25{
26
132d8a6d 27 need_resched();
b32450f4 28 timeout(roundrobin, (caddr_t)0, hz / 10);
1edb1cf8
BJ
29}
30
d048c9b6
KM
31/*
32 * constants for digital decay and forget
33 * 90% of (p_cpu) usage in 5*loadav time
34 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
35 * Note that, as ps(1) mentions, this can let percentages
36 * total over 100% (I've seen 137.9% for 3 processes).
37 *
38 * Note that hardclock updates p_cpu and p_cpticks independently.
39 *
40 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
41 * That is, the system wants to compute a value of decay such
42 * that the following for loop:
43 * for (i = 0; i < (5 * loadavg); i++)
44 * p_cpu *= decay;
45 * will compute
46 * p_cpu *= 0.1;
47 * for all values of loadavg:
48 *
49 * Mathematically this loop can be expressed by saying:
50 * decay ** (5 * loadavg) ~= .1
51 *
52 * The system computes decay as:
53 * decay = (2 * loadavg) / (2 * loadavg + 1)
54 *
55 * We wish to prove that the system's computation of decay
56 * will always fulfill the equation:
57 * decay ** (5 * loadavg) ~= .1
58 *
59 * If we compute b as:
60 * b = 2 * loadavg
61 * then
62 * decay = b / (b + 1)
63 *
64 * We now need to prove two things:
65 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
66 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
67 *
68 * Facts:
69 * For x close to zero, exp(x) =~ 1 + x, since
70 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
71 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
72 * For x close to zero, ln(1+x) =~ x, since
73 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
74 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
75 * ln(.1) =~ -2.30
76 *
77 * Proof of (1):
78 * Solve (factor)**(power) =~ .1 given power (5*loadav):
79 * solving for factor,
80 * ln(factor) =~ (-2.30/5*loadav), or
132d8a6d 81 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
d048c9b6
KM
82 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
83 *
84 * Proof of (2):
85 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
86 * solving for power,
87 * power*ln(b/(b+1)) =~ -2.30, or
88 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
89 *
90 * Actual power values for the implemented algorithm are as follows:
91 * loadav: 1 2 3 4
92 * power: 5.68 10.32 14.94 19.55
93 */
1e35e051 94
80b6b780 95/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
132d8a6d
MK
96#define loadfactor(loadav) (2 * (loadav))
97#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
80b6b780
KM
98
99/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
100fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
101
102/*
103 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
104 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
105 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
106 *
107 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
108 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
109 *
110 * If you dont want to bother with the faster/more-accurate formula, you
111 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
112 * (more general) method of calculating the %age of CPU used by a process.
113 */
114#define CCPU_SHIFT 11
1edb1cf8 115
1edb1cf8
BJ
116/*
117 * Recompute process priorities, once a second
118 */
119schedcpu()
120{
132d8a6d 121 register fixpt_t loadfac = loadfactor(averunnable[0]);
1edb1cf8 122 register struct proc *p;
132d8a6d
MK
123 register int s;
124 register unsigned int newcpu;
1edb1cf8 125
1edb1cf8 126 wakeup((caddr_t)&lbolt);
1d348849 127 for (p = allproc; p != NULL; p = p->p_nxt) {
132d8a6d
MK
128 /*
129 * Increment time in/out of memory and sleep time
130 * (if sleeping). We ignore overflow; with 16-bit int's
131 * (remember them?) overflow takes 45 days.
132 */
133 p->p_time++;
134 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
135 p->p_slptime++;
80b6b780 136 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1e35e051
MK
137 /*
138 * If the process has slept the entire second,
139 * stop recalculating its priority until it wakes up.
140 */
80b6b780 141 if (p->p_slptime > 1)
1e35e051 142 continue;
1e35e051
MK
143 /*
144 * p_pctcpu is only for ps.
145 */
80b6b780
KM
146#if (FSHIFT >= CCPU_SHIFT)
147 p->p_pctcpu += (hz == 100)?
148 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
149 100 * (((fixpt_t) p->p_cpticks)
150 << (FSHIFT - CCPU_SHIFT)) / hz;
151#else
152 p->p_pctcpu += ((FSCALE - ccpu) *
153 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
154#endif
1edb1cf8 155 p->p_cpticks = 0;
132d8a6d
MK
156 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice;
157 p->p_cpu = min(newcpu, UCHAR_MAX);
158 setpri(p);
1e35e051 159 s = splhigh(); /* prevent state changes */
1edb1cf8 160 if (p->p_pri >= PUSER) {
132d8a6d 161#define PPQ (128 / NQS) /* priorities per queue */
c081e302 162 if ((p != curproc) &&
1edb1cf8
BJ
163 p->p_stat == SRUN &&
164 (p->p_flag & SLOAD) &&
fab25db3 165 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
1edb1cf8
BJ
166 remrq(p);
167 p->p_pri = p->p_usrpri;
168 setrq(p);
169 } else
170 p->p_pri = p->p_usrpri;
171 }
172 splx(s);
173 }
174 vmmeter();
1edb1cf8 175 if (bclnlist != NULL)
132d8a6d 176 wakeup((caddr_t)pageproc);
b32450f4 177 timeout(schedcpu, (caddr_t)0, hz);
1edb1cf8 178}
a379cce8 179
1e35e051
MK
180/*
181 * Recalculate the priority of a process after it has slept for a while.
132d8a6d
MK
182 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least
183 * six times the loadfactor will decay p_cpu to zero.
1e35e051
MK
184 */
185updatepri(p)
186 register struct proc *p;
187{
132d8a6d
MK
188 register unsigned int newcpu = p->p_cpu;
189 register fixpt_t loadfac = loadfactor(averunnable[0]);
190
191 if (p->p_slptime > 5 * loadfac)
192 p->p_cpu = 0;
193 else {
194 p->p_slptime--; /* the first time was done in schedcpu */
195 while (newcpu && --p->p_slptime)
196 newcpu = (int) decay_cpu(loadfac, newcpu);
197 p->p_cpu = min(newcpu, UCHAR_MAX);
198 }
199 setpri(p);
1e35e051
MK
200}
201
a379cce8
BJ
202#define SQSIZE 0100 /* Must be power of 2 */
203#define HASH(x) (( (int) x >> 5) & (SQSIZE-1))
3abb418a
KM
204struct slpque {
205 struct proc *sq_head;
206 struct proc **sq_tailp;
207} slpque[SQSIZE];
a379cce8 208
ffa9c89a
MK
209/*
210 * During autoconfiguration or after a panic, a sleep will simply
211 * lower the priority briefly to allow interrupts, then return.
212 * The priority to be used (safepri) is machine-dependent, thus this
213 * value is initialized and maintained in the machine-dependent layers.
214 * This priority will typically be 0, or the lowest priority
215 * that is safe for use on the interrupt stack; it can be made
216 * higher to block network software interrupts after panics.
217 */
218int safepri;
219
a379cce8 220/*
25667a4a
MK
221 * General sleep call.
222 * Suspends current process until a wakeup is made on chan.
223 * The process will then be made runnable with priority pri.
224 * Sleeps at most timo/hz seconds (0 means no timeout).
225 * If pri includes PCATCH flag, signals are checked
226 * before and after sleeping, else signals are not checked.
227 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
228 * If PCATCH is set and a signal needs to be delivered,
229 * ERESTART is returned if the current system call should be restarted
230 * if possible, and EINTR is returned if the system call should
231 * be interrupted by the signal (return EINTR).
a379cce8 232 */
25667a4a 233tsleep(chan, pri, wmesg, timo)
67e9a600
MT
234 caddr_t chan;
235 int pri;
236 char *wmesg;
237 int timo;
238{
c081e302 239 register struct proc *p = curproc;
67e9a600
MT
240 register struct slpque *qp;
241 register s;
25667a4a 242 int sig, catch = pri & PCATCH;
67e9a600
MT
243 extern int cold;
244 int endtsleep();
245
67e9a600
MT
246 s = splhigh();
247 if (cold || panicstr) {
248 /*
249 * After a panic, or during autoconfiguration,
250 * just give interrupts a chance, then just return;
251 * don't run any other procs or panic below,
252 * in case this is the idle process and already asleep.
67e9a600 253 */
ffa9c89a 254 splx(safepri);
67e9a600
MT
255 splx(s);
256 return (0);
257 }
258#ifdef DIAGNOSTIC
132d8a6d 259 if (chan == 0 || p->p_stat != SRUN || p->p_rlink)
25667a4a 260 panic("tsleep");
67e9a600 261#endif
132d8a6d
MK
262 p->p_wchan = chan;
263 p->p_wmesg = wmesg;
264 p->p_slptime = 0;
265 p->p_pri = pri & PRIMASK;
67e9a600
MT
266 qp = &slpque[HASH(chan)];
267 if (qp->sq_head == 0)
132d8a6d 268 qp->sq_head = p;
67e9a600 269 else
132d8a6d
MK
270 *qp->sq_tailp = p;
271 *(qp->sq_tailp = &p->p_link) = 0;
ffa9c89a 272 if (timo)
132d8a6d 273 timeout(endtsleep, (caddr_t)p, timo);
67e9a600 274 /*
132d8a6d
MK
275 * We put ourselves on the sleep queue and start our timeout
276 * before calling CURSIG, as we could stop there, and a wakeup
277 * or a SIGCONT (or both) could occur while we were stopped.
ffa9c89a
MK
278 * A SIGCONT would cause us to be marked as SSLEEP
279 * without resuming us, thus we must be ready for sleep
280 * when CURSIG is called. If the wakeup happens while we're
132d8a6d 281 * stopped, p->p_wchan will be 0 upon return from CURSIG.
67e9a600 282 */
25667a4a 283 if (catch) {
132d8a6d
MK
284 p->p_flag |= SSINTR;
285 if (sig = CURSIG(p)) {
286 if (p->p_wchan)
287 unsleep(p);
288 p->p_stat = SRUN;
ffa9c89a 289 goto resume;
25667a4a 290 }
132d8a6d 291 if (p->p_wchan == 0) {
ffa9c89a
MK
292 catch = 0;
293 goto resume;
25667a4a 294 }
67e9a600 295 }
132d8a6d 296 p->p_stat = SSLEEP;
67e9a600 297 (void) spl0();
132d8a6d 298 p->p_stats->p_ru.ru_nvcsw++;
67e9a600 299 swtch();
ffa9c89a 300resume:
132d8a6d 301 curpri = p->p_usrpri;
67e9a600 302 splx(s);
132d8a6d
MK
303 p->p_flag &= ~SSINTR;
304 if (p->p_flag & STIMO) {
305 p->p_flag &= ~STIMO;
ffa9c89a
MK
306 if (catch == 0 || sig == 0)
307 return (EWOULDBLOCK);
308 } else if (timo)
132d8a6d
MK
309 untimeout(endtsleep, (caddr_t)p);
310 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
311 if (p->p_sigacts->ps_sigintr & sigmask(sig))
25667a4a
MK
312 return (EINTR);
313 return (ERESTART);
314 }
67e9a600
MT
315 return (0);
316}
317
318/*
319 * Implement timeout for tsleep.
320 * If process hasn't been awakened (wchan non-zero),
321 * set timeout flag and undo the sleep. If proc
322 * is stopped, just unsleep so it will remain stopped.
323 */
324endtsleep(p)
325 register struct proc *p;
326{
327 int s = splhigh();
328
329 if (p->p_wchan) {
330 if (p->p_stat == SSLEEP)
331 setrun(p);
332 else
333 unsleep(p);
334 p->p_flag |= STIMO;
335 }
336 splx(s);
337}
338
25667a4a
MK
339/*
340 * Short-term, non-interruptable sleep.
341 */
a379cce8 342sleep(chan, pri)
bd76c595
BJ
343 caddr_t chan;
344 int pri;
a379cce8 345{
c081e302 346 register struct proc *p = curproc;
3abb418a 347 register struct slpque *qp;
6fdc0335 348 register s;
79a4402e 349 extern int cold;
a379cce8 350
25667a4a
MK
351#ifdef DIAGNOSTIC
352 if (pri > PZERO) {
353 printf("sleep called with pri %d > PZERO, wchan: %x\n",
354 pri, chan);
355 panic("old sleep");
356 }
357#endif
1e35e051 358 s = splhigh();
79a4402e 359 if (cold || panicstr) {
76acd871 360 /*
79a4402e
MK
361 * After a panic, or during autoconfiguration,
362 * just give interrupts a chance, then just return;
363 * don't run any other procs or panic below,
364 * in case this is the idle process and already asleep.
76acd871 365 */
ffa9c89a 366 splx(safepri);
76acd871
MK
367 splx(s);
368 return;
369 }
67e9a600 370#ifdef DIAGNOSTIC
132d8a6d 371 if (chan==0 || p->p_stat != SRUN || p->p_rlink)
a379cce8 372 panic("sleep");
67e9a600 373#endif
132d8a6d
MK
374 p->p_wchan = chan;
375 p->p_wmesg = NULL;
376 p->p_slptime = 0;
377 p->p_pri = pri;
3abb418a
KM
378 qp = &slpque[HASH(chan)];
379 if (qp->sq_head == 0)
132d8a6d 380 qp->sq_head = p;
3abb418a 381 else
132d8a6d
MK
382 *qp->sq_tailp = p;
383 *(qp->sq_tailp = &p->p_link) = 0;
384 p->p_stat = SSLEEP;
25667a4a 385 (void) spl0();
132d8a6d 386 p->p_stats->p_ru.ru_nvcsw++;
25667a4a 387 swtch();
132d8a6d 388 curpri = p->p_usrpri;
a379cce8 389 splx(s);
a379cce8
BJ
390}
391
87d0f32e
BJ
392/*
393 * Remove a process from its wait queue
394 */
395unsleep(p)
18a4549b 396 register struct proc *p;
87d0f32e 397{
3abb418a 398 register struct slpque *qp;
87d0f32e 399 register struct proc **hp;
3abb418a 400 int s;
87d0f32e 401
1e35e051 402 s = splhigh();
87d0f32e 403 if (p->p_wchan) {
3abb418a 404 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
87d0f32e
BJ
405 while (*hp != p)
406 hp = &(*hp)->p_link;
407 *hp = p->p_link;
3abb418a
KM
408 if (qp->sq_tailp == &p->p_link)
409 qp->sq_tailp = hp;
87d0f32e
BJ
410 p->p_wchan = 0;
411 }
412 splx(s);
413}
414
a379cce8 415/*
132d8a6d
MK
416 * Wakeup on "chan"; set all processes
417 * sleeping on chan to run state.
a379cce8
BJ
418 */
419wakeup(chan)
18a4549b 420 register caddr_t chan;
a379cce8 421{
3abb418a
KM
422 register struct slpque *qp;
423 register struct proc *p, **q;
a379cce8
BJ
424 int s;
425
1e35e051 426 s = splhigh();
3abb418a 427 qp = &slpque[HASH(chan)];
a379cce8 428restart:
3abb418a 429 for (q = &qp->sq_head; p = *q; ) {
67e9a600 430#ifdef DIAGNOSTIC
87d0f32e 431 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
a379cce8 432 panic("wakeup");
67e9a600 433#endif
132d8a6d 434 if (p->p_wchan == chan) {
a379cce8 435 p->p_wchan = 0;
e5df4be8 436 *q = p->p_link;
3abb418a
KM
437 if (qp->sq_tailp == &p->p_link)
438 qp->sq_tailp = q;
87d0f32e
BJ
439 if (p->p_stat == SSLEEP) {
440 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */
6f414c22
MK
441 if (p->p_slptime > 1)
442 updatepri(p);
132d8a6d 443 p->p_slptime = 0;
87d0f32e 444 p->p_stat = SRUN;
c74c8a79 445 if (p->p_flag & SLOAD)
87d0f32e 446 setrq(p);
fab25db3
MK
447 /*
448 * Since curpri is a usrpri,
449 * p->p_pri is always better than curpri.
450 */
132d8a6d
MK
451 if ((p->p_flag&SLOAD) == 0)
452 wakeup((caddr_t)&proc0);
453 else
454 need_resched();
87d0f32e 455 /* END INLINE EXPANSION */
e5df4be8 456 goto restart;
a379cce8 457 }
e5df4be8
BJ
458 } else
459 q = &p->p_link;
a379cce8
BJ
460 }
461 splx(s);
462}
463
a379cce8
BJ
464/*
465 * Initialize the (doubly-linked) run queues
466 * to be empty.
467 */
468rqinit()
469{
470 register int i;
471
472 for (i = 0; i < NQS; i++)
473 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
474}
a379cce8
BJ
475
476/*
132d8a6d
MK
477 * Change process state to be runnable,
478 * placing it on the run queue if it is in memory,
479 * and awakening the swapper if it isn't in memory.
a379cce8
BJ
480 */
481setrun(p)
18a4549b 482 register struct proc *p;
a379cce8 483{
18a4549b 484 register int s;
a379cce8 485
1e35e051 486 s = splhigh();
a379cce8
BJ
487 switch (p->p_stat) {
488
489 case 0:
490 case SWAIT:
491 case SRUN:
492 case SZOMB:
493 default:
494 panic("setrun");
495
6fdc0335 496 case SSTOP:
a379cce8 497 case SSLEEP:
87d0f32e 498 unsleep(p); /* e.g. when sending signals */
a379cce8
BJ
499 break;
500
501 case SIDL:
a379cce8
BJ
502 break;
503 }
504 p->p_stat = SRUN;
505 if (p->p_flag & SLOAD)
506 setrq(p);
507 splx(s);
27bc21f7
MK
508 if (p->p_slptime > 1)
509 updatepri(p);
132d8a6d
MK
510 p->p_slptime = 0;
511 if ((p->p_flag&SLOAD) == 0)
512 wakeup((caddr_t)&proc0);
513 else if (p->p_pri < curpri)
514 need_resched();
a379cce8
BJ
515}
516
517/*
132d8a6d
MK
518 * Compute priority of process when running in user mode.
519 * Arrange to reschedule if the resulting priority
520 * is better than that of the current process.
a379cce8 521 */
132d8a6d
MK
522setpri(p)
523 register struct proc *p;
a379cce8 524{
132d8a6d
MK
525 register unsigned int newpri;
526
527 newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice;
528 newpri = min(newpri, MAXPRI);
529 p->p_usrpri = newpri;
530 if (newpri < curpri)
531 need_resched();
a379cce8 532}