minor optimization
[unix-history] / usr / src / sys / kern / kern_synch.c
CommitLineData
da7c5cc6 1/*
0880b18e 2 * Copyright (c) 1982, 1986 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
80b6b780 6 * @(#)kern_synch.c 7.9 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
40ed2c45
KM
9#include "machine/pte.h"
10#include "machine/psl.h"
11#include "machine/mtpr.h"
a379cce8 12
94368568
JB
13#include "param.h"
14#include "systm.h"
94368568
JB
15#include "user.h"
16#include "proc.h"
94368568
JB
17#include "vm.h"
18#include "kernel.h"
19#include "buf.h"
1edb1cf8
BJ
20
21/*
22 * Force switch among equal priority processes every 100ms.
23 */
24roundrobin()
25{
26
27 runrun++;
28 aston();
b32450f4 29 timeout(roundrobin, (caddr_t)0, hz / 10);
1edb1cf8
BJ
30}
31
d048c9b6
KM
32/*
33 * constants for digital decay and forget
34 * 90% of (p_cpu) usage in 5*loadav time
35 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
36 * Note that, as ps(1) mentions, this can let percentages
37 * total over 100% (I've seen 137.9% for 3 processes).
38 *
39 * Note that hardclock updates p_cpu and p_cpticks independently.
40 *
41 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
42 * That is, the system wants to compute a value of decay such
43 * that the following for loop:
44 * for (i = 0; i < (5 * loadavg); i++)
45 * p_cpu *= decay;
46 * will compute
47 * p_cpu *= 0.1;
48 * for all values of loadavg:
49 *
50 * Mathematically this loop can be expressed by saying:
51 * decay ** (5 * loadavg) ~= .1
52 *
53 * The system computes decay as:
54 * decay = (2 * loadavg) / (2 * loadavg + 1)
55 *
56 * We wish to prove that the system's computation of decay
57 * will always fulfill the equation:
58 * decay ** (5 * loadavg) ~= .1
59 *
60 * If we compute b as:
61 * b = 2 * loadavg
62 * then
63 * decay = b / (b + 1)
64 *
65 * We now need to prove two things:
66 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
67 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
68 *
69 * Facts:
70 * For x close to zero, exp(x) =~ 1 + x, since
71 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
72 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
73 * For x close to zero, ln(1+x) =~ x, since
74 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
75 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
76 * ln(.1) =~ -2.30
77 *
78 * Proof of (1):
79 * Solve (factor)**(power) =~ .1 given power (5*loadav):
80 * solving for factor,
81 * ln(factor) =~ (-2.30/5*loadav), or
82 * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) =
83 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
84 *
85 * Proof of (2):
86 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
87 * solving for power,
88 * power*ln(b/(b+1)) =~ -2.30, or
89 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
90 *
91 * Actual power values for the implemented algorithm are as follows:
92 * loadav: 1 2 3 4
93 * power: 5.68 10.32 14.94 19.55
94 */
1e35e051 95
80b6b780
KM
96/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
97#define get_b(loadav) (2 * (loadav))
98#define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE))
99
100/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
101fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
102
103/*
104 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
105 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
106 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
107 *
108 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
109 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
110 *
111 * If you dont want to bother with the faster/more-accurate formula, you
112 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
113 * (more general) method of calculating the %age of CPU used by a process.
114 */
115#define CCPU_SHIFT 11
1edb1cf8 116
1edb1cf8
BJ
117/*
118 * Recompute process priorities, once a second
119 */
120schedcpu()
121{
80b6b780 122 register fixpt_t b = get_b(averunnable[0]);
1edb1cf8
BJ
123 register struct proc *p;
124 register int s, a;
125
1edb1cf8 126 wakeup((caddr_t)&lbolt);
1d348849 127 for (p = allproc; p != NULL; p = p->p_nxt) {
1edb1cf8
BJ
128 if (p->p_time != 127)
129 p->p_time++;
1edb1cf8
BJ
130 if (p->p_stat==SSLEEP || p->p_stat==SSTOP)
131 if (p->p_slptime != 127)
132 p->p_slptime++;
80b6b780 133 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1e35e051
MK
134 /*
135 * If the process has slept the entire second,
136 * stop recalculating its priority until it wakes up.
137 */
80b6b780 138 if (p->p_slptime > 1)
1e35e051 139 continue;
1e35e051
MK
140 /*
141 * p_pctcpu is only for ps.
142 */
80b6b780
KM
143#if (FSHIFT >= CCPU_SHIFT)
144 p->p_pctcpu += (hz == 100)?
145 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
146 100 * (((fixpt_t) p->p_cpticks)
147 << (FSHIFT - CCPU_SHIFT)) / hz;
148#else
149 p->p_pctcpu += ((FSCALE - ccpu) *
150 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
151#endif
1edb1cf8 152 p->p_cpticks = 0;
80b6b780 153 a = (int) get_pcpu(b, p->p_cpu) + p->p_nice;
1edb1cf8
BJ
154 if (a < 0)
155 a = 0;
156 if (a > 255)
157 a = 255;
158 p->p_cpu = a;
159 (void) setpri(p);
1e35e051 160 s = splhigh(); /* prevent state changes */
1edb1cf8 161 if (p->p_pri >= PUSER) {
fab25db3 162#define PPQ (128 / NQS)
1edb1cf8
BJ
163 if ((p != u.u_procp || noproc) &&
164 p->p_stat == SRUN &&
165 (p->p_flag & SLOAD) &&
fab25db3 166 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
1edb1cf8
BJ
167 remrq(p);
168 p->p_pri = p->p_usrpri;
169 setrq(p);
170 } else
171 p->p_pri = p->p_usrpri;
172 }
173 splx(s);
174 }
175 vmmeter();
176 if (runin!=0) {
177 runin = 0;
178 wakeup((caddr_t)&runin);
179 }
180 if (bclnlist != NULL)
181 wakeup((caddr_t)&proc[2]);
b32450f4 182 timeout(schedcpu, (caddr_t)0, hz);
1edb1cf8 183}
a379cce8 184
1e35e051
MK
185/*
186 * Recalculate the priority of a process after it has slept for a while.
187 */
188updatepri(p)
189 register struct proc *p;
190{
191 register int a = p->p_cpu & 0377;
80b6b780 192 register fixpt_t b = get_b(averunnable[0]);
1e35e051
MK
193
194 p->p_slptime--; /* the first time was done in schedcpu */
195 while (a && --p->p_slptime)
80b6b780 196 a = (int) get_pcpu(b, a) /* + p->p_nice */;
27bc21f7 197 p->p_slptime = 0;
1e35e051
MK
198 if (a < 0)
199 a = 0;
200 if (a > 255)
201 a = 255;
202 p->p_cpu = a;
203 (void) setpri(p);
204}
205
a379cce8
BJ
206#define SQSIZE 0100 /* Must be power of 2 */
207#define HASH(x) (( (int) x >> 5) & (SQSIZE-1))
3abb418a
KM
208struct slpque {
209 struct proc *sq_head;
210 struct proc **sq_tailp;
211} slpque[SQSIZE];
a379cce8
BJ
212
213/*
214 * Give up the processor till a wakeup occurs
215 * on chan, at which time the process
216 * enters the scheduling queue at priority pri.
217 * The most important effect of pri is that when
218 * pri<=PZERO a signal cannot disturb the sleep;
219 * if pri>PZERO signals will be processed.
220 * Callers of this routine must be prepared for
221 * premature return, and check that the reason for
222 * sleeping has gone away.
223 */
224sleep(chan, pri)
bd76c595
BJ
225 caddr_t chan;
226 int pri;
a379cce8 227{
3abb418a
KM
228 register struct proc *rp;
229 register struct slpque *qp;
6fdc0335 230 register s;
79a4402e 231 extern int cold;
a379cce8
BJ
232
233 rp = u.u_procp;
1e35e051 234 s = splhigh();
79a4402e 235 if (cold || panicstr) {
76acd871 236 /*
79a4402e
MK
237 * After a panic, or during autoconfiguration,
238 * just give interrupts a chance, then just return;
239 * don't run any other procs or panic below,
240 * in case this is the idle process and already asleep.
76acd871
MK
241 * The splnet should be spl0 if the network was being used
242 * by the filesystem, but for now avoid network interrupts
243 * that might cause another panic.
244 */
245 (void) splnet();
246 splx(s);
247 return;
248 }
249 if (chan==0 || rp->p_stat != SRUN || rp->p_rlink)
a379cce8 250 panic("sleep");
a379cce8
BJ
251 rp->p_wchan = chan;
252 rp->p_slptime = 0;
253 rp->p_pri = pri;
3abb418a
KM
254 qp = &slpque[HASH(chan)];
255 if (qp->sq_head == 0)
256 qp->sq_head = rp;
257 else
258 *qp->sq_tailp = rp;
259 *(qp->sq_tailp = &rp->p_link) = 0;
18a4549b 260 if (pri > PZERO) {
6f414c22
MK
261 /*
262 * If we stop in issig(), wakeup may already have happened
263 * when we return (rp->p_wchan will then be 0).
264 */
18a4549b 265 if (ISSIG(rp)) {
e5df4be8
BJ
266 if (rp->p_wchan)
267 unsleep(rp);
a379cce8 268 rp->p_stat = SRUN;
81263dba 269 (void) spl0();
a379cce8
BJ
270 goto psig;
271 }
e5df4be8
BJ
272 if (rp->p_wchan == 0)
273 goto out;
274 rp->p_stat = SSLEEP;
81263dba 275 (void) spl0();
bd76c595 276 u.u_ru.ru_nvcsw++;
a379cce8 277 swtch();
18a4549b 278 if (ISSIG(rp))
a379cce8
BJ
279 goto psig;
280 } else {
6fdc0335 281 rp->p_stat = SSLEEP;
81263dba 282 (void) spl0();
bd76c595 283 u.u_ru.ru_nvcsw++;
a379cce8
BJ
284 swtch();
285 }
fab25db3 286 curpri = rp->p_usrpri;
e5df4be8 287out:
a379cce8
BJ
288 splx(s);
289 return;
290
291 /*
292 * If priority was low (>PZERO) and
18a4549b 293 * there has been a signal, execute non-local goto through
d01b68d6 294 * u.u_qsave, aborting the system call in progress (see trap.c)
a379cce8
BJ
295 */
296psig:
d01b68d6 297 longjmp(&u.u_qsave);
a379cce8
BJ
298 /*NOTREACHED*/
299}
300
87d0f32e
BJ
301/*
302 * Remove a process from its wait queue
303 */
304unsleep(p)
18a4549b 305 register struct proc *p;
87d0f32e 306{
3abb418a 307 register struct slpque *qp;
87d0f32e 308 register struct proc **hp;
3abb418a 309 int s;
87d0f32e 310
1e35e051 311 s = splhigh();
87d0f32e 312 if (p->p_wchan) {
3abb418a 313 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
87d0f32e
BJ
314 while (*hp != p)
315 hp = &(*hp)->p_link;
316 *hp = p->p_link;
3abb418a
KM
317 if (qp->sq_tailp == &p->p_link)
318 qp->sq_tailp = hp;
87d0f32e
BJ
319 p->p_wchan = 0;
320 }
321 splx(s);
322}
323
a379cce8
BJ
324/*
325 * Wake up all processes sleeping on chan.
326 */
327wakeup(chan)
18a4549b 328 register caddr_t chan;
a379cce8 329{
3abb418a
KM
330 register struct slpque *qp;
331 register struct proc *p, **q;
a379cce8
BJ
332 int s;
333
1e35e051 334 s = splhigh();
3abb418a 335 qp = &slpque[HASH(chan)];
a379cce8 336restart:
3abb418a 337 for (q = &qp->sq_head; p = *q; ) {
87d0f32e 338 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
a379cce8 339 panic("wakeup");
6fdc0335 340 if (p->p_wchan==chan) {
a379cce8 341 p->p_wchan = 0;
e5df4be8 342 *q = p->p_link;
3abb418a
KM
343 if (qp->sq_tailp == &p->p_link)
344 qp->sq_tailp = q;
87d0f32e
BJ
345 if (p->p_stat == SSLEEP) {
346 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */
6f414c22
MK
347 if (p->p_slptime > 1)
348 updatepri(p);
87d0f32e 349 p->p_stat = SRUN;
c74c8a79 350 if (p->p_flag & SLOAD)
87d0f32e 351 setrq(p);
fab25db3
MK
352 /*
353 * Since curpri is a usrpri,
354 * p->p_pri is always better than curpri.
355 */
356 runrun++;
357 aston();
7eb2e67e
BJ
358 if ((p->p_flag&SLOAD) == 0) {
359 if (runout != 0) {
360 runout = 0;
361 wakeup((caddr_t)&runout);
362 }
363 wantin++;
87d0f32e
BJ
364 }
365 /* END INLINE EXPANSION */
e5df4be8 366 goto restart;
a379cce8 367 }
e5df4be8
BJ
368 } else
369 q = &p->p_link;
a379cce8
BJ
370 }
371 splx(s);
372}
373
a379cce8
BJ
374/*
375 * Initialize the (doubly-linked) run queues
376 * to be empty.
377 */
378rqinit()
379{
380 register int i;
381
382 for (i = 0; i < NQS; i++)
383 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
384}
a379cce8
BJ
385
386/*
387 * Set the process running;
388 * arrange for it to be swapped in if necessary.
389 */
390setrun(p)
18a4549b 391 register struct proc *p;
a379cce8 392{
18a4549b 393 register int s;
a379cce8 394
1e35e051 395 s = splhigh();
a379cce8
BJ
396 switch (p->p_stat) {
397
398 case 0:
399 case SWAIT:
400 case SRUN:
401 case SZOMB:
402 default:
403 panic("setrun");
404
6fdc0335 405 case SSTOP:
a379cce8 406 case SSLEEP:
87d0f32e 407 unsleep(p); /* e.g. when sending signals */
a379cce8
BJ
408 break;
409
410 case SIDL:
a379cce8
BJ
411 break;
412 }
413 p->p_stat = SRUN;
414 if (p->p_flag & SLOAD)
415 setrq(p);
416 splx(s);
27bc21f7
MK
417 if (p->p_slptime > 1)
418 updatepri(p);
18a4549b 419 if (p->p_pri < curpri) {
a379cce8 420 runrun++;
534d9295
BJ
421 aston();
422 }
7eb2e67e 423 if ((p->p_flag&SLOAD) == 0) {
18a4549b 424 if (runout != 0) {
7eb2e67e
BJ
425 runout = 0;
426 wakeup((caddr_t)&runout);
427 }
428 wantin++;
a379cce8
BJ
429 }
430}
431
432/*
433 * Set user priority.
434 * The rescheduling flag (runrun)
435 * is set if the priority is better
436 * than the currently running process.
437 */
438setpri(pp)
18a4549b 439 register struct proc *pp;
a379cce8 440{
18a4549b 441 register int p;
a379cce8 442
16a64baa 443 p = (pp->p_cpu & 0377)/4;
1e35e051 444 p += PUSER + 2 * pp->p_nice;
9afea775
BJ
445 if (pp->p_rssize > pp->p_maxrss && freemem < desfree)
446 p += 2*4; /* effectively, nice(4) */
18a4549b 447 if (p > 127)
a379cce8 448 p = 127;
18a4549b 449 if (p < curpri) {
a379cce8 450 runrun++;
a51a6e74
BJ
451 aston();
452 }
a379cce8 453 pp->p_usrpri = p;
18a4549b 454 return (p);
a379cce8 455}