add internet-common options in pcb, add ip header prototype for ttl and tos;
[unix-history] / usr / src / sys / kern / kern_synch.c
CommitLineData
da7c5cc6 1/*
25667a4a 2 * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
25667a4a 6 * @(#)kern_synch.c 7.11 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
40ed2c45
KM
9#include "machine/pte.h"
10#include "machine/psl.h"
11#include "machine/mtpr.h"
a379cce8 12
94368568
JB
13#include "param.h"
14#include "systm.h"
94368568
JB
15#include "user.h"
16#include "proc.h"
94368568
JB
17#include "vm.h"
18#include "kernel.h"
19#include "buf.h"
1edb1cf8
BJ
20
21/*
22 * Force switch among equal priority processes every 100ms.
23 */
24roundrobin()
25{
26
27 runrun++;
28 aston();
b32450f4 29 timeout(roundrobin, (caddr_t)0, hz / 10);
1edb1cf8
BJ
30}
31
d048c9b6
KM
32/*
33 * constants for digital decay and forget
34 * 90% of (p_cpu) usage in 5*loadav time
35 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
36 * Note that, as ps(1) mentions, this can let percentages
37 * total over 100% (I've seen 137.9% for 3 processes).
38 *
39 * Note that hardclock updates p_cpu and p_cpticks independently.
40 *
41 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
42 * That is, the system wants to compute a value of decay such
43 * that the following for loop:
44 * for (i = 0; i < (5 * loadavg); i++)
45 * p_cpu *= decay;
46 * will compute
47 * p_cpu *= 0.1;
48 * for all values of loadavg:
49 *
50 * Mathematically this loop can be expressed by saying:
51 * decay ** (5 * loadavg) ~= .1
52 *
53 * The system computes decay as:
54 * decay = (2 * loadavg) / (2 * loadavg + 1)
55 *
56 * We wish to prove that the system's computation of decay
57 * will always fulfill the equation:
58 * decay ** (5 * loadavg) ~= .1
59 *
60 * If we compute b as:
61 * b = 2 * loadavg
62 * then
63 * decay = b / (b + 1)
64 *
65 * We now need to prove two things:
66 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
67 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
68 *
69 * Facts:
70 * For x close to zero, exp(x) =~ 1 + x, since
71 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
72 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
73 * For x close to zero, ln(1+x) =~ x, since
74 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
75 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
76 * ln(.1) =~ -2.30
77 *
78 * Proof of (1):
79 * Solve (factor)**(power) =~ .1 given power (5*loadav):
80 * solving for factor,
81 * ln(factor) =~ (-2.30/5*loadav), or
82 * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) =
83 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
84 *
85 * Proof of (2):
86 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
87 * solving for power,
88 * power*ln(b/(b+1)) =~ -2.30, or
89 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
90 *
91 * Actual power values for the implemented algorithm are as follows:
92 * loadav: 1 2 3 4
93 * power: 5.68 10.32 14.94 19.55
94 */
1e35e051 95
80b6b780
KM
96/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
97#define get_b(loadav) (2 * (loadav))
98#define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE))
99
100/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
101fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
102
103/*
104 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
105 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
106 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
107 *
108 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
109 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
110 *
111 * If you dont want to bother with the faster/more-accurate formula, you
112 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
113 * (more general) method of calculating the %age of CPU used by a process.
114 */
115#define CCPU_SHIFT 11
1edb1cf8 116
1edb1cf8
BJ
117/*
118 * Recompute process priorities, once a second
119 */
120schedcpu()
121{
80b6b780 122 register fixpt_t b = get_b(averunnable[0]);
1edb1cf8
BJ
123 register struct proc *p;
124 register int s, a;
125
1edb1cf8 126 wakeup((caddr_t)&lbolt);
1d348849 127 for (p = allproc; p != NULL; p = p->p_nxt) {
1edb1cf8
BJ
128 if (p->p_time != 127)
129 p->p_time++;
1edb1cf8
BJ
130 if (p->p_stat==SSLEEP || p->p_stat==SSTOP)
131 if (p->p_slptime != 127)
132 p->p_slptime++;
80b6b780 133 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1e35e051
MK
134 /*
135 * If the process has slept the entire second,
136 * stop recalculating its priority until it wakes up.
137 */
80b6b780 138 if (p->p_slptime > 1)
1e35e051 139 continue;
1e35e051
MK
140 /*
141 * p_pctcpu is only for ps.
142 */
80b6b780
KM
143#if (FSHIFT >= CCPU_SHIFT)
144 p->p_pctcpu += (hz == 100)?
145 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
146 100 * (((fixpt_t) p->p_cpticks)
147 << (FSHIFT - CCPU_SHIFT)) / hz;
148#else
149 p->p_pctcpu += ((FSCALE - ccpu) *
150 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
151#endif
1edb1cf8 152 p->p_cpticks = 0;
80b6b780 153 a = (int) get_pcpu(b, p->p_cpu) + p->p_nice;
1edb1cf8
BJ
154 if (a < 0)
155 a = 0;
156 if (a > 255)
157 a = 255;
158 p->p_cpu = a;
159 (void) setpri(p);
1e35e051 160 s = splhigh(); /* prevent state changes */
1edb1cf8 161 if (p->p_pri >= PUSER) {
fab25db3 162#define PPQ (128 / NQS)
1edb1cf8
BJ
163 if ((p != u.u_procp || noproc) &&
164 p->p_stat == SRUN &&
165 (p->p_flag & SLOAD) &&
fab25db3 166 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
1edb1cf8
BJ
167 remrq(p);
168 p->p_pri = p->p_usrpri;
169 setrq(p);
170 } else
171 p->p_pri = p->p_usrpri;
172 }
173 splx(s);
174 }
175 vmmeter();
176 if (runin!=0) {
177 runin = 0;
178 wakeup((caddr_t)&runin);
179 }
180 if (bclnlist != NULL)
181 wakeup((caddr_t)&proc[2]);
b32450f4 182 timeout(schedcpu, (caddr_t)0, hz);
1edb1cf8 183}
a379cce8 184
1e35e051
MK
185/*
186 * Recalculate the priority of a process after it has slept for a while.
187 */
188updatepri(p)
189 register struct proc *p;
190{
191 register int a = p->p_cpu & 0377;
80b6b780 192 register fixpt_t b = get_b(averunnable[0]);
1e35e051
MK
193
194 p->p_slptime--; /* the first time was done in schedcpu */
195 while (a && --p->p_slptime)
80b6b780 196 a = (int) get_pcpu(b, a) /* + p->p_nice */;
27bc21f7 197 p->p_slptime = 0;
1e35e051
MK
198 if (a < 0)
199 a = 0;
200 if (a > 255)
201 a = 255;
202 p->p_cpu = a;
203 (void) setpri(p);
204}
205
a379cce8
BJ
206#define SQSIZE 0100 /* Must be power of 2 */
207#define HASH(x) (( (int) x >> 5) & (SQSIZE-1))
3abb418a
KM
208struct slpque {
209 struct proc *sq_head;
210 struct proc **sq_tailp;
211} slpque[SQSIZE];
a379cce8
BJ
212
213/*
25667a4a
MK
214 * General sleep call.
215 * Suspends current process until a wakeup is made on chan.
216 * The process will then be made runnable with priority pri.
217 * Sleeps at most timo/hz seconds (0 means no timeout).
218 * If pri includes PCATCH flag, signals are checked
219 * before and after sleeping, else signals are not checked.
220 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
221 * If PCATCH is set and a signal needs to be delivered,
222 * ERESTART is returned if the current system call should be restarted
223 * if possible, and EINTR is returned if the system call should
224 * be interrupted by the signal (return EINTR).
a379cce8 225 */
25667a4a 226tsleep(chan, pri, wmesg, timo)
67e9a600
MT
227 caddr_t chan;
228 int pri;
229 char *wmesg;
230 int timo;
231{
232 register struct proc *rp;
233 register struct slpque *qp;
234 register s;
25667a4a 235 int sig, catch = pri & PCATCH;
67e9a600
MT
236 extern int cold;
237 int endtsleep();
238
239 rp = u.u_procp;
240 s = splhigh();
241 if (cold || panicstr) {
242 /*
243 * After a panic, or during autoconfiguration,
244 * just give interrupts a chance, then just return;
245 * don't run any other procs or panic below,
246 * in case this is the idle process and already asleep.
67e9a600 247 */
25667a4a 248 (void) spl0();
67e9a600
MT
249 splx(s);
250 return (0);
251 }
252#ifdef DIAGNOSTIC
25667a4a
MK
253 if (chan == 0 || rp->p_stat != SRUN || rp->p_rlink)
254 panic("tsleep");
67e9a600
MT
255#endif
256 rp->p_wchan = chan;
257 rp->p_wmesg = wmesg;
258 rp->p_slptime = 0;
25667a4a 259 rp->p_pri = pri & PRIMASK;
67e9a600
MT
260 qp = &slpque[HASH(chan)];
261 if (qp->sq_head == 0)
262 qp->sq_head = rp;
263 else
264 *qp->sq_tailp = rp;
265 *(qp->sq_tailp = &rp->p_link) = 0;
266 /*
25667a4a
MK
267 * If we stop in CURSIG/issig(), wakeup may already
268 * have happened when we return.
269 * rp->p_wchan will then be 0.
67e9a600 270 */
25667a4a
MK
271 if (catch) {
272 if (sig = CURSIG(rp)) {
273 if (rp->p_wchan)
274 unsleep(rp);
275 rp->p_stat = SRUN;
276 splx(s);
277 if (u.u_sigintr & sigmask(sig))
278 return (EINTR);
279 return (ERESTART);
280 }
281 if (rp->p_wchan == 0) {
282 splx(s);
283 return (0);
284 }
285 rp->p_flag |= SSINTR;
67e9a600
MT
286 }
287 rp->p_stat = SSLEEP;
288 if (timo)
289 timeout(endtsleep, (caddr_t)rp, timo);
290 (void) spl0();
291 u.u_ru.ru_nvcsw++;
292 swtch();
293 curpri = rp->p_usrpri;
294 splx(s);
25667a4a 295 rp->p_flag &= ~SSINTR;
67e9a600
MT
296 if (rp->p_flag & STIMO) {
297 rp->p_flag &= ~STIMO;
298 return (EWOULDBLOCK);
299 }
300 if (timo)
301 untimeout(endtsleep, (caddr_t)rp);
25667a4a
MK
302 if (catch && (sig = CURSIG(rp))) {
303 if (u.u_sigintr & sigmask(sig))
304 return (EINTR);
305 return (ERESTART);
306 }
67e9a600
MT
307 return (0);
308}
309
310/*
311 * Implement timeout for tsleep.
312 * If process hasn't been awakened (wchan non-zero),
313 * set timeout flag and undo the sleep. If proc
314 * is stopped, just unsleep so it will remain stopped.
315 */
316endtsleep(p)
317 register struct proc *p;
318{
319 int s = splhigh();
320
321 if (p->p_wchan) {
322 if (p->p_stat == SSLEEP)
323 setrun(p);
324 else
325 unsleep(p);
326 p->p_flag |= STIMO;
327 }
328 splx(s);
329}
330
25667a4a
MK
331/*
332 * Short-term, non-interruptable sleep.
333 */
a379cce8 334sleep(chan, pri)
bd76c595
BJ
335 caddr_t chan;
336 int pri;
a379cce8 337{
3abb418a
KM
338 register struct proc *rp;
339 register struct slpque *qp;
6fdc0335 340 register s;
79a4402e 341 extern int cold;
a379cce8 342
25667a4a
MK
343#ifdef DIAGNOSTIC
344 if (pri > PZERO) {
345 printf("sleep called with pri %d > PZERO, wchan: %x\n",
346 pri, chan);
347 panic("old sleep");
348 }
349#endif
a379cce8 350 rp = u.u_procp;
1e35e051 351 s = splhigh();
79a4402e 352 if (cold || panicstr) {
76acd871 353 /*
79a4402e
MK
354 * After a panic, or during autoconfiguration,
355 * just give interrupts a chance, then just return;
356 * don't run any other procs or panic below,
357 * in case this is the idle process and already asleep.
76acd871 358 */
25667a4a 359 (void) spl0();
76acd871
MK
360 splx(s);
361 return;
362 }
67e9a600 363#ifdef DIAGNOSTIC
76acd871 364 if (chan==0 || rp->p_stat != SRUN || rp->p_rlink)
a379cce8 365 panic("sleep");
67e9a600 366#endif
a379cce8 367 rp->p_wchan = chan;
67e9a600 368 rp->p_wmesg = NULL;
a379cce8
BJ
369 rp->p_slptime = 0;
370 rp->p_pri = pri;
3abb418a
KM
371 qp = &slpque[HASH(chan)];
372 if (qp->sq_head == 0)
373 qp->sq_head = rp;
374 else
375 *qp->sq_tailp = rp;
376 *(qp->sq_tailp = &rp->p_link) = 0;
25667a4a
MK
377 rp->p_stat = SSLEEP;
378 (void) spl0();
379 u.u_ru.ru_nvcsw++;
380 swtch();
fab25db3 381 curpri = rp->p_usrpri;
a379cce8 382 splx(s);
a379cce8
BJ
383}
384
87d0f32e
BJ
385/*
386 * Remove a process from its wait queue
387 */
388unsleep(p)
18a4549b 389 register struct proc *p;
87d0f32e 390{
3abb418a 391 register struct slpque *qp;
87d0f32e 392 register struct proc **hp;
3abb418a 393 int s;
87d0f32e 394
1e35e051 395 s = splhigh();
87d0f32e 396 if (p->p_wchan) {
3abb418a 397 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
87d0f32e
BJ
398 while (*hp != p)
399 hp = &(*hp)->p_link;
400 *hp = p->p_link;
3abb418a
KM
401 if (qp->sq_tailp == &p->p_link)
402 qp->sq_tailp = hp;
87d0f32e
BJ
403 p->p_wchan = 0;
404 }
405 splx(s);
406}
407
a379cce8
BJ
408/*
409 * Wake up all processes sleeping on chan.
410 */
411wakeup(chan)
18a4549b 412 register caddr_t chan;
a379cce8 413{
3abb418a
KM
414 register struct slpque *qp;
415 register struct proc *p, **q;
a379cce8
BJ
416 int s;
417
1e35e051 418 s = splhigh();
3abb418a 419 qp = &slpque[HASH(chan)];
a379cce8 420restart:
3abb418a 421 for (q = &qp->sq_head; p = *q; ) {
67e9a600 422#ifdef DIAGNOSTIC
87d0f32e 423 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
a379cce8 424 panic("wakeup");
67e9a600 425#endif
6fdc0335 426 if (p->p_wchan==chan) {
a379cce8 427 p->p_wchan = 0;
e5df4be8 428 *q = p->p_link;
3abb418a
KM
429 if (qp->sq_tailp == &p->p_link)
430 qp->sq_tailp = q;
87d0f32e
BJ
431 if (p->p_stat == SSLEEP) {
432 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */
6f414c22
MK
433 if (p->p_slptime > 1)
434 updatepri(p);
87d0f32e 435 p->p_stat = SRUN;
c74c8a79 436 if (p->p_flag & SLOAD)
87d0f32e 437 setrq(p);
fab25db3
MK
438 /*
439 * Since curpri is a usrpri,
440 * p->p_pri is always better than curpri.
441 */
442 runrun++;
443 aston();
7eb2e67e
BJ
444 if ((p->p_flag&SLOAD) == 0) {
445 if (runout != 0) {
446 runout = 0;
447 wakeup((caddr_t)&runout);
448 }
449 wantin++;
87d0f32e
BJ
450 }
451 /* END INLINE EXPANSION */
e5df4be8 452 goto restart;
a379cce8 453 }
e5df4be8
BJ
454 } else
455 q = &p->p_link;
a379cce8
BJ
456 }
457 splx(s);
458}
459
a379cce8
BJ
460/*
461 * Initialize the (doubly-linked) run queues
462 * to be empty.
463 */
464rqinit()
465{
466 register int i;
467
468 for (i = 0; i < NQS; i++)
469 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
470}
a379cce8
BJ
471
472/*
473 * Set the process running;
474 * arrange for it to be swapped in if necessary.
475 */
476setrun(p)
18a4549b 477 register struct proc *p;
a379cce8 478{
18a4549b 479 register int s;
a379cce8 480
1e35e051 481 s = splhigh();
a379cce8
BJ
482 switch (p->p_stat) {
483
484 case 0:
485 case SWAIT:
486 case SRUN:
487 case SZOMB:
488 default:
489 panic("setrun");
490
6fdc0335 491 case SSTOP:
a379cce8 492 case SSLEEP:
87d0f32e 493 unsleep(p); /* e.g. when sending signals */
a379cce8
BJ
494 break;
495
496 case SIDL:
a379cce8
BJ
497 break;
498 }
499 p->p_stat = SRUN;
500 if (p->p_flag & SLOAD)
501 setrq(p);
502 splx(s);
27bc21f7
MK
503 if (p->p_slptime > 1)
504 updatepri(p);
18a4549b 505 if (p->p_pri < curpri) {
a379cce8 506 runrun++;
534d9295
BJ
507 aston();
508 }
7eb2e67e 509 if ((p->p_flag&SLOAD) == 0) {
18a4549b 510 if (runout != 0) {
7eb2e67e
BJ
511 runout = 0;
512 wakeup((caddr_t)&runout);
513 }
514 wantin++;
a379cce8
BJ
515 }
516}
517
518/*
519 * Set user priority.
520 * The rescheduling flag (runrun)
521 * is set if the priority is better
522 * than the currently running process.
523 */
524setpri(pp)
18a4549b 525 register struct proc *pp;
a379cce8 526{
18a4549b 527 register int p;
a379cce8 528
16a64baa 529 p = (pp->p_cpu & 0377)/4;
1e35e051 530 p += PUSER + 2 * pp->p_nice;
9afea775
BJ
531 if (pp->p_rssize > pp->p_maxrss && freemem < desfree)
532 p += 2*4; /* effectively, nice(4) */
18a4549b 533 if (p > 127)
a379cce8 534 p = 127;
18a4549b 535 if (p < curpri) {
a379cce8 536 runrun++;
a51a6e74
BJ
537 aston();
538 }
a379cce8 539 pp->p_usrpri = p;
18a4549b 540 return (p);
a379cce8 541}