This commit was manufactured by cvs2svn to create tag 'FreeBSD-release/1.0'.
[unix-history] / sys / kern / kern_synch.c
CommitLineData
15637ed4
RG
1/*-
2 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
78ed81a3 34 * from: @(#)kern_synch.c 7.18 (Berkeley) 6/27/91
35 * $Id$
15637ed4
RG
36 */
37
38#include "param.h"
39#include "systm.h"
40#include "proc.h"
41#include "kernel.h"
42#include "buf.h"
43#include "signalvar.h"
44#include "resourcevar.h"
45
46#include "machine/cpu.h"
47
48u_char curpri; /* usrpri of curproc */
49
50/*
51 * Force switch among equal priority processes every 100ms.
52 */
53roundrobin()
54{
55
56 need_resched();
57 timeout(roundrobin, (caddr_t)0, hz / 10);
58}
59
60/*
61 * constants for digital decay and forget
62 * 90% of (p_cpu) usage in 5*loadav time
63 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
64 * Note that, as ps(1) mentions, this can let percentages
65 * total over 100% (I've seen 137.9% for 3 processes).
66 *
67 * Note that hardclock updates p_cpu and p_cpticks independently.
68 *
69 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
70 * That is, the system wants to compute a value of decay such
71 * that the following for loop:
72 * for (i = 0; i < (5 * loadavg); i++)
73 * p_cpu *= decay;
74 * will compute
75 * p_cpu *= 0.1;
76 * for all values of loadavg:
77 *
78 * Mathematically this loop can be expressed by saying:
79 * decay ** (5 * loadavg) ~= .1
80 *
81 * The system computes decay as:
82 * decay = (2 * loadavg) / (2 * loadavg + 1)
83 *
84 * We wish to prove that the system's computation of decay
85 * will always fulfill the equation:
86 * decay ** (5 * loadavg) ~= .1
87 *
88 * If we compute b as:
89 * b = 2 * loadavg
90 * then
91 * decay = b / (b + 1)
92 *
93 * We now need to prove two things:
94 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
95 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
96 *
97 * Facts:
98 * For x close to zero, exp(x) =~ 1 + x, since
99 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
100 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
101 * For x close to zero, ln(1+x) =~ x, since
102 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
103 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
104 * ln(.1) =~ -2.30
105 *
106 * Proof of (1):
107 * Solve (factor)**(power) =~ .1 given power (5*loadav):
108 * solving for factor,
109 * ln(factor) =~ (-2.30/5*loadav), or
110 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
111 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
112 *
113 * Proof of (2):
114 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
115 * solving for power,
116 * power*ln(b/(b+1)) =~ -2.30, or
117 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
118 *
119 * Actual power values for the implemented algorithm are as follows:
120 * loadav: 1 2 3 4
121 * power: 5.68 10.32 14.94 19.55
122 */
123
124/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
125#define loadfactor(loadav) (2 * (loadav))
126#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
127
128/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
129fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
130
131/*
132 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
133 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
134 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
135 *
136 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
137 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
138 *
139 * If you dont want to bother with the faster/more-accurate formula, you
140 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
141 * (more general) method of calculating the %age of CPU used by a process.
142 */
143#define CCPU_SHIFT 11
144
145/*
146 * Recompute process priorities, once a second
147 */
148schedcpu()
149{
150 register fixpt_t loadfac = loadfactor(averunnable[0]);
151 register struct proc *p;
152 register int s;
153 register unsigned int newcpu;
154
155 wakeup((caddr_t)&lbolt);
156 for (p = allproc; p != NULL; p = p->p_nxt) {
157 /*
158 * Increment time in/out of memory and sleep time
159 * (if sleeping). We ignore overflow; with 16-bit int's
160 * (remember them?) overflow takes 45 days.
161 */
162 p->p_time++;
163 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
164 p->p_slptime++;
165 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
166 /*
167 * If the process has slept the entire second,
168 * stop recalculating its priority until it wakes up.
169 */
170 if (p->p_slptime > 1)
171 continue;
172 /*
173 * p_pctcpu is only for ps.
174 */
175#if (FSHIFT >= CCPU_SHIFT)
176 p->p_pctcpu += (hz == 100)?
177 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
178 100 * (((fixpt_t) p->p_cpticks)
179 << (FSHIFT - CCPU_SHIFT)) / hz;
180#else
181 p->p_pctcpu += ((FSCALE - ccpu) *
182 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
183#endif
184 p->p_cpticks = 0;
185 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice;
186 p->p_cpu = min(newcpu, UCHAR_MAX);
187 setpri(p);
188 s = splhigh(); /* prevent state changes */
189 if (p->p_pri >= PUSER) {
190#define PPQ (128 / NQS) /* priorities per queue */
191 if ((p != curproc) &&
192 p->p_stat == SRUN &&
193 (p->p_flag & (SLOAD|SWEXIT)) == SLOAD &&
194 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
195 remrq(p);
196 p->p_pri = p->p_usrpri;
197 setrq(p);
198 } else
199 p->p_pri = p->p_usrpri;
200 }
201 splx(s);
202 }
203 vmmeter();
204 if (bclnlist != NULL)
205 wakeup((caddr_t)pageproc);
206 timeout(schedcpu, (caddr_t)0, hz);
207}
208
209/*
210 * Recalculate the priority of a process after it has slept for a while.
211 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least
212 * six times the loadfactor will decay p_cpu to zero.
213 */
214updatepri(p)
215 register struct proc *p;
216{
217 register unsigned int newcpu = p->p_cpu;
218 register fixpt_t loadfac = loadfactor(averunnable[0]);
219
220 if (p->p_slptime > 5 * loadfac)
221 p->p_cpu = 0;
222 else {
223 p->p_slptime--; /* the first time was done in schedcpu */
224 while (newcpu && --p->p_slptime)
225 newcpu = (int) decay_cpu(loadfac, newcpu);
226 p->p_cpu = min(newcpu, UCHAR_MAX);
227 }
228 setpri(p);
229}
230
231#define SQSIZE 0100 /* Must be power of 2 */
232#define HASH(x) (( (int) x >> 5) & (SQSIZE-1))
233struct slpque {
234 struct proc *sq_head;
235 struct proc **sq_tailp;
236} slpque[SQSIZE];
237
238/*
239 * During autoconfiguration or after a panic, a sleep will simply
240 * lower the priority briefly to allow interrupts, then return.
241 * The priority to be used (safepri) is machine-dependent, thus this
242 * value is initialized and maintained in the machine-dependent layers.
243 * This priority will typically be 0, or the lowest priority
244 * that is safe for use on the interrupt stack; it can be made
245 * higher to block network software interrupts after panics.
246 */
247int safepri;
248
249/*
250 * General sleep call.
251 * Suspends current process until a wakeup is made on chan.
252 * The process will then be made runnable with priority pri.
253 * Sleeps at most timo/hz seconds (0 means no timeout).
254 * If pri includes PCATCH flag, signals are checked
255 * before and after sleeping, else signals are not checked.
256 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
257 * If PCATCH is set and a signal needs to be delivered,
258 * ERESTART is returned if the current system call should be restarted
259 * if possible, and EINTR is returned if the system call should
260 * be interrupted by the signal (return EINTR).
261 */
262tsleep(chan, pri, wmesg, timo)
263 caddr_t chan;
264 int pri;
265 char *wmesg;
266 int timo;
267{
268 register struct proc *p = curproc;
269 register struct slpque *qp;
270 register s;
271 int sig, catch = pri & PCATCH;
272 extern int cold;
273 int endtsleep();
274
275 s = splhigh();
276 if (cold || panicstr) {
277 /*
278 * After a panic, or during autoconfiguration,
279 * just give interrupts a chance, then just return;
280 * don't run any other procs or panic below,
281 * in case this is the idle process and already asleep.
282 */
283 splx(safepri);
284 splx(s);
285 return (0);
286 }
287#ifdef DIAGNOSTIC
288 if (chan == 0 || p->p_stat != SRUN || p->p_rlink)
289 panic("tsleep");
290#endif
291 p->p_wchan = chan;
292 p->p_wmesg = wmesg;
293 p->p_slptime = 0;
294 p->p_pri = pri & PRIMASK;
295 qp = &slpque[HASH(chan)];
296 if (qp->sq_head == 0)
297 qp->sq_head = p;
298 else
299 *qp->sq_tailp = p;
300 *(qp->sq_tailp = &p->p_link) = 0;
301 if (timo)
302 timeout(endtsleep, (caddr_t)p, timo);
303 /*
304 * We put ourselves on the sleep queue and start our timeout
305 * before calling CURSIG, as we could stop there, and a wakeup
306 * or a SIGCONT (or both) could occur while we were stopped.
307 * A SIGCONT would cause us to be marked as SSLEEP
308 * without resuming us, thus we must be ready for sleep
309 * when CURSIG is called. If the wakeup happens while we're
310 * stopped, p->p_wchan will be 0 upon return from CURSIG.
311 */
312 if (catch) {
313 p->p_flag |= SSINTR;
314 if (sig = CURSIG(p)) {
315 if (p->p_wchan)
316 unsleep(p);
317 p->p_stat = SRUN;
318 goto resume;
319 }
320 if (p->p_wchan == 0) {
321 catch = 0;
322 goto resume;
323 }
324 }
325 p->p_stat = SSLEEP;
326 p->p_stats->p_ru.ru_nvcsw++;
327 swtch();
328#include "ddb.h"
329#ifdef NDDB
330 /* handy breakpoint location after process "wakes" */
331 asm(".globl bpendtsleep ; bpendtsleep:");
332#endif
333resume:
334 curpri = p->p_usrpri;
335 splx(s);
336 p->p_flag &= ~SSINTR;
337 if (p->p_flag & STIMO) {
338 p->p_flag &= ~STIMO;
339 if (catch == 0 || sig == 0)
340 return (EWOULDBLOCK);
341 } else if (timo)
342 untimeout(endtsleep, (caddr_t)p);
343 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
344 if (p->p_sigacts->ps_sigintr & sigmask(sig))
345 return (EINTR);
346 return (ERESTART);
347 }
348 return (0);
349}
350
351/*
352 * Implement timeout for tsleep.
353 * If process hasn't been awakened (wchan non-zero),
354 * set timeout flag and undo the sleep. If proc
355 * is stopped, just unsleep so it will remain stopped.
356 */
357endtsleep(p)
358 register struct proc *p;
359{
360 int s = splhigh();
361
362 if (p->p_wchan) {
363 if (p->p_stat == SSLEEP)
364 setrun(p);
365 else
366 unsleep(p);
367 p->p_flag |= STIMO;
368 }
369 splx(s);
370}
371
372/*
373 * Short-term, non-interruptable sleep.
374 */
375sleep(chan, pri)
376 caddr_t chan;
377 int pri;
378{
379 register struct proc *p = curproc;
380 register struct slpque *qp;
381 register s;
382 extern int cold;
383
384#ifdef DIAGNOSTIC
385 if (pri > PZERO) {
386 printf("sleep called with pri %d > PZERO, wchan: %x\n",
387 pri, chan);
388 panic("old sleep");
389 }
390#endif
391 s = splhigh();
392 if (cold || panicstr) {
393 /*
394 * After a panic, or during autoconfiguration,
395 * just give interrupts a chance, then just return;
396 * don't run any other procs or panic below,
397 * in case this is the idle process and already asleep.
398 */
399 splx(safepri);
400 splx(s);
401 return;
402 }
403#ifdef DIAGNOSTIC
404 if (chan==0 || p->p_stat != SRUN || p->p_rlink)
405 panic("sleep");
406#endif
407 p->p_wchan = chan;
408 p->p_wmesg = NULL;
409 p->p_slptime = 0;
410 p->p_pri = pri;
411 qp = &slpque[HASH(chan)];
412 if (qp->sq_head == 0)
413 qp->sq_head = p;
414 else
415 *qp->sq_tailp = p;
416 *(qp->sq_tailp = &p->p_link) = 0;
417 p->p_stat = SSLEEP;
418 p->p_stats->p_ru.ru_nvcsw++;
419 swtch();
420#ifdef NDDB
421 /* handy breakpoint location after process "wakes" */
422 asm(".globl bpendsleep ; bpendsleep:");
423#endif
424 curpri = p->p_usrpri;
425 splx(s);
426}
427
428/*
429 * Remove a process from its wait queue
430 */
431unsleep(p)
432 register struct proc *p;
433{
434 register struct slpque *qp;
435 register struct proc **hp;
436 int s;
437
438 s = splhigh();
439 if (p->p_wchan) {
440 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
441 while (*hp != p)
442 hp = &(*hp)->p_link;
443 *hp = p->p_link;
444 if (qp->sq_tailp == &p->p_link)
445 qp->sq_tailp = hp;
446 p->p_wchan = 0;
447 }
448 splx(s);
449}
450
451/*
452 * Wakeup on "chan"; set all processes
453 * sleeping on chan to run state.
454 */
455wakeup(chan)
456 register caddr_t chan;
457{
458 register struct slpque *qp;
459 register struct proc *p, **q;
460 int s;
461
462 s = splhigh();
463 qp = &slpque[HASH(chan)];
464restart:
465 for (q = &qp->sq_head; p = *q; ) {
466#ifdef DIAGNOSTIC
467 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
468 panic("wakeup");
469#endif
470 if (p->p_wchan == chan) {
471 p->p_wchan = 0;
472 *q = p->p_link;
473 if (qp->sq_tailp == &p->p_link)
474 qp->sq_tailp = q;
475 if (p->p_stat == SSLEEP) {
476 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */
477 if (p->p_slptime > 1)
478 updatepri(p);
479 p->p_slptime = 0;
480 p->p_stat = SRUN;
481 if (p->p_flag & SLOAD)
482 setrq(p);
483 /*
484 * Since curpri is a usrpri,
485 * p->p_pri is always better than curpri.
486 */
487 if ((p->p_flag&SLOAD) == 0)
488 wakeup((caddr_t)&proc0);
489 else
490 need_resched();
491 /* END INLINE EXPANSION */
492 goto restart;
493 }
494 } else
495 q = &p->p_link;
496 }
497 splx(s);
498}
499
500/*
501 * Initialize the (doubly-linked) run queues
502 * to be empty.
503 */
504rqinit()
505{
506 register int i;
507
508 for (i = 0; i < NQS; i++)
509 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
510}
511
512/*
513 * Change process state to be runnable,
514 * placing it on the run queue if it is in memory,
515 * and awakening the swapper if it isn't in memory.
516 */
517setrun(p)
518 register struct proc *p;
519{
520 register int s;
521
522 s = splhigh();
523 switch (p->p_stat) {
524
525 case 0:
526 case SWAIT:
527 case SRUN:
528 case SZOMB:
529 default:
530 panic("setrun");
531
532 case SSTOP:
533 case SSLEEP:
534 unsleep(p); /* e.g. when sending signals */
535 break;
536
537 case SIDL:
538 break;
539 }
540 p->p_stat = SRUN;
541 if (p->p_flag & SLOAD)
542 setrq(p);
543 splx(s);
544 if (p->p_slptime > 1)
545 updatepri(p);
546 p->p_slptime = 0;
547 if ((p->p_flag&SLOAD) == 0)
548 wakeup((caddr_t)&proc0);
549 else if (p->p_pri < curpri)
550 need_resched();
551}
552
553/*
554 * Compute priority of process when running in user mode.
555 * Arrange to reschedule if the resulting priority
556 * is better than that of the current process.
557 */
558setpri(p)
559 register struct proc *p;
560{
561 register unsigned int newpri;
562
563 newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice;
564 newpri = min(newpri, MAXPRI);
565 p->p_usrpri = newpri;
566 if (newpri < curpri)
567 need_resched();
568}
569
570#ifdef NDDB
571#define DDBFUNC(s) ddb_##s
572DDBFUNC(ps) () {
573 int np;
574 struct proc *ap, *p, *pp;
575 np = nprocs;
576 p = ap = allproc;
577 printf(" pid proc addr uid ppid pgrp flag stat comm wchan\n");
578 while (--np >= 0) {
579 pp = p->p_pptr;
580 if (pp == 0)
581 pp = p;
582 if (p->p_stat) {
583 printf("%5d %06x %06x %3d %5d %5d %06x %d %s ",
584 p->p_pid, ap, p->p_addr, p->p_cred->p_ruid, pp->p_pid,
585 p->p_pgrp->pg_id, p->p_flag, p->p_stat,
586 p->p_comm);
587 if (p->p_wchan) {
588 if (p->p_wmesg)
589 printf("%s ", p->p_wmesg);
590 printf("%x", p->p_wchan);
591 }
592 printf("\n");
593 }
594 ap = p->p_nxt;
595 if (ap == 0 && np > 0)
596 ap = zombproc;
597 p = ap;
598 }
599}
600#endif