format
[unix-history] / usr / src / sys / kern / kern_clock.c
CommitLineData
53a32545 1/* kern_clock.c 4.52 83/03/03 */
961945a8
SL
2
3#include "../machine/reg.h"
4#include "../machine/psl.h"
83be5fac
BJ
5
6#include "../h/param.h"
7#include "../h/systm.h"
d9b8447e 8#include "../h/dk.h"
0a34b6fd 9#include "../h/callout.h"
83be5fac
BJ
10#include "../h/dir.h"
11#include "../h/user.h"
f0da6d20 12#include "../h/kernel.h"
83be5fac 13#include "../h/proc.h"
83be5fac 14#include "../h/vm.h"
83be5fac 15#include "../h/text.h"
c53dce5d
RE
16#ifdef MUSH
17#include "../h/quota.h"
18#include "../h/share.h"
19#endif
83be5fac 20
961945a8
SL
21#ifdef vax
22#include "../vax/mtpr.h"
23#endif
24
8487304f
KM
25#ifdef GPROF
26#include "../h/gprof.h"
27#endif
28
53a32545
SL
29#ifdef KGCLOCK
30extern int phz;
31#endif
32
76b2a182
BJ
33/*
34 * Clock handling routines.
35 *
53a32545
SL
36 * This code is written to operate with two timers which run
37 * independently of each other. The main clock, running at hz
38 * times per second, is used to do scheduling and timeout calculations.
39 * The second timer does resource utilization estimation statistically
40 * based on the state of the machine phz times a second. Both functions
41 * can be performed by a single clock (ie hz == phz), however the
42 * statistics will be much more prone to errors. Ideally a machine
43 * would have separate clocks measuring time spent in user state, system
44 * state, interrupt state, and idle state. These clocks would allow a non-
45 * approximate measure of resource utilization.
76b2a182 46 */
6602c75b 47
76b2a182
BJ
48/*
49 * TODO:
50 * * Keep more accurate statistics by simulating good interval timers.
51 * * Use the time-of-day clock on the VAX to keep more accurate time
52 * than is possible by repeated use of the interval timer.
53 * * Allocate more timeout table slots when table overflows.
53a32545 54 * * Get all resource allocation to use second timer.
76b2a182 55 */
83be5fac 56
76b2a182
BJ
57/* bump a timeval by a small number of usec's */
58#define bumptime(tp, usec) \
59 (tp)->tv_usec += usec; \
27b91f59
BJ
60 if ((tp)->tv_usec >= 1000000) { \
61 (tp)->tv_usec -= 1000000; \
62 (tp)->tv_sec++; \
63 }
72857acf 64
76b2a182 65/*
53a32545
SL
66 * The hz hardware interval timer.
67 * We update the events relating to real time.
68 * If this timer is also being used to gather statistics,
69 * we run through the statistics gathering routine as well.
76b2a182 70 */
260ea681 71/*ARGSUSED*/
b4e32d36 72#ifdef vax
f403d99f 73hardclock(pc, ps)
4512b9a4 74 caddr_t pc;
460ab27f 75 int ps;
83be5fac 76{
460ab27f 77#endif
b4e32d36 78#ifdef sun
460ab27f
BJ
79hardclock(regs)
80 struct regs regs;
81{
82 int ps = regs.r_sr;
83 caddr_t pc = (caddr_t)regs.r_pc;
84#endif
0a34b6fd 85 register struct callout *p1;
27b91f59 86 register struct proc *p;
f403d99f 87 register int s, cpstate;
83be5fac 88
961945a8
SL
89#ifdef sun
90 if (USERMODE(ps)) /* aston needs ar0 */
91 u.u_ar0 = &regs.r_r0;
92#endif
76b2a182
BJ
93 /*
94 * Update real-time timeout queue.
95 * At front of queue are some number of events which are ``due''.
96 * The time to these is <= 0 and if negative represents the
97 * number of ticks which have passed since it was supposed to happen.
98 * The rest of the q elements (times > 0) are events yet to happen,
99 * where the time for each is given as a delta from the previous.
100 * Decrementing just the first of these serves to decrement the time
101 * to all events.
102 */
c4710996 103 for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
d01b68d6 104 --p1->c_time;
c4710996 105 if (p1)
d01b68d6 106 --p1->c_time;
5da67d35 107
76b2a182
BJ
108 /*
109 * Charge the time out based on the mode the cpu is in.
110 * Here again we fudge for the lack of proper interval timers
111 * assuming that the current state has been around at least
112 * one tick.
113 */
83be5fac 114 if (USERMODE(ps)) {
76b2a182
BJ
115 /*
116 * CPU was in user state. Increment
117 * user time counter, and process process-virtual time
877ef342 118 * interval timer.
76b2a182
BJ
119 */
120 bumptime(&u.u_ru.ru_utime, tick);
27b91f59
BJ
121 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) &&
122 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0)
123 psignal(u.u_procp, SIGVTALRM);
f0da6d20 124 if (u.u_procp->p_nice > NZERO)
41888f16
BJ
125 cpstate = CP_NICE;
126 else
127 cpstate = CP_USER;
83be5fac 128 } else {
76b2a182
BJ
129 /*
130 * CPU was in system state. If profiling kernel
131 * increment a counter. If no process is running
132 * then this is a system tick if we were running
133 * at a non-zero IPL (in a driver). If a process is running,
134 * then we charge it with system time even if we were
135 * at a non-zero IPL, since the system often runs
136 * this way during processing of system calls.
137 * This is approximate, but the lack of true interval
138 * timers makes doing anything else difficult.
139 */
41888f16 140 cpstate = CP_SYS;
ddb3ced5 141 if (noproc) {
460ab27f 142 if (BASEPRI(ps))
ddb3ced5 143 cpstate = CP_IDLE;
f0da6d20 144 } else {
76b2a182 145 bumptime(&u.u_ru.ru_stime, tick);
f0da6d20 146 }
83be5fac 147 }
27b91f59 148
9fb1a8d0
SL
149 /*
150 * If the cpu is currently scheduled to a process, then
151 * charge it with resource utilization for a tick, updating
152 * statistics which run in (user+system) virtual time,
153 * such as the cpu time limit and profiling timers.
154 * This assumes that the current process has been running
155 * the entire last tick.
156 */
157 if (noproc == 0 && cpstate != CP_IDLE) {
158 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) >
159 u.u_rlimit[RLIMIT_CPU].rlim_cur) {
160 psignal(u.u_procp, SIGXCPU);
161 if (u.u_rlimit[RLIMIT_CPU].rlim_cur <
162 u.u_rlimit[RLIMIT_CPU].rlim_max)
163 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5;
164 }
165 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) &&
166 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0)
167 psignal(u.u_procp, SIGPROF);
168 s = u.u_procp->p_rssize;
169 u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */
170 if (u.u_procp->p_textp) {
171 register int xrss = u.u_procp->p_textp->x_rssize;
172
173 s += xrss;
174 u.u_ru.ru_ixrss += xrss;
175 }
176 if (s > u.u_ru.ru_maxrss)
177 u.u_ru.ru_maxrss = s;
178 }
179
76b2a182
BJ
180 /*
181 * We adjust the priority of the current process.
182 * The priority of a process gets worse as it accumulates
183 * CPU time. The cpu usage estimator (p_cpu) is increased here
184 * and the formula for computing priorities (in kern_synch.c)
185 * will compute a different value each time the p_cpu increases
186 * by 4. The cpu usage estimator ramps up quite quickly when
187 * the process is running (linearly), and decays away exponentially,
188 * at a rate which is proportionally slower when the system is
189 * busy. The basic principal is that the system will 90% forget
190 * that a process used a lot of CPU time in 5*loadav seconds.
191 * This causes the system to favor processes which haven't run
192 * much recently, and to round-robin among other processes.
193 */
83be5fac 194 if (!noproc) {
27b91f59
BJ
195 p = u.u_procp;
196 p->p_cpticks++;
197 if (++p->p_cpu == 0)
198 p->p_cpu--;
c53dce5d 199#ifdef MUSH
27b91f59
BJ
200 p->p_quota->q_cost += (p->p_nice > NZERO ?
201 (shconsts.sc_tic * ((2*NZERO)-p->p_nice)) / NZERO :
c53dce5d
RE
202 shconsts.sc_tic) * (((int)avenrun[0]+2)/3);
203#endif
76b2a182 204 if ((p->p_cpu&3) == 0) {
27b91f59
BJ
205 (void) setpri(p);
206 if (p->p_pri >= PUSER)
207 p->p_pri = p->p_usrpri;
83be5fac
BJ
208 }
209 }
76b2a182 210
53a32545
SL
211 /*
212 * If this is the only timer then we have to use it to
213 * gather statistics.
214 */
215#ifndef KGCLOCK
216 gatherstats(pc, ps);
217#else
218 /*
219 * If the alternate clock has not made itself known then
220 * we must gather the statistics.
221 */
222 if (phz == 0)
223 gatherstats(pc, ps);
224#endif
225
76b2a182
BJ
226 /*
227 * Increment the time-of-day, and schedule
228 * processing of the callouts at a very low cpu priority,
229 * so we don't keep the relatively high clock interrupt
230 * priority any longer than necessary.
231 */
232 bumptime(&time, tick);
f403d99f
BJ
233 setsoftclock();
234}
235
53a32545
SL
236/*
237 * Gather statistics on resource utilization.
238 *
239 * We make a gross assumption: that the system has been in the
240 * state it is in (user state, kernel state, interrupt state,
241 * or idle state) for the entire last time interval, and
242 * update statistics accordingly.
243 */
244gatherstats(pc, ps)
245 caddr_t pc;
246 int ps;
247{
248 int cpstate, s;
249
250 /*
251 * Determine what state the cpu is in.
252 */
253 if (USERMODE(ps)) {
254 /*
255 * CPU was in user state.
256 */
257 if (u.u_procp->p_nice > NZERO)
258 cpstate = CP_NICE;
259 else
260 cpstate = CP_USER;
261 } else {
262 /*
263 * CPU was in system state. If profiling kernel
264 * increment a counter.
265 */
266 cpstate = CP_SYS;
267 if (noproc && BASEPRI(ps))
268 cpstate = CP_IDLE;
269#ifdef GPROF
270 s = pc - s_lowpc;
271 if (profiling < 2 && s < s_textsize)
272 kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
273#endif
274 }
275 /*
276 * We maintain statistics shown by user-level statistics
277 * programs: the amount of time in each cpu state, and
278 * the amount of time each of DK_NDRIVE ``drives'' is busy.
279 */
280 cp_time[cpstate]++;
281 for (s = 0; s < DK_NDRIVE; s++)
282 if (dk_busy&(1<<s))
283 dk_time[s]++;
284}
285
76b2a182
BJ
286/*
287 * Software priority level clock interrupt.
288 * Run periodic events from timeout queue.
289 */
260ea681 290/*ARGSUSED*/
b4e32d36 291#ifdef vax
f403d99f 292softclock(pc, ps)
4512b9a4 293 caddr_t pc;
460ab27f 294 int ps;
f403d99f 295{
460ab27f 296#endif
b4e32d36 297#ifdef sun
961945a8 298softclock()
460ab27f 299{
961945a8
SL
300 int ps = u.u_ar0[PS];
301 caddr_t pc = (caddr_t)u.u_ar0[PC];
460ab27f 302#endif
f403d99f 303
27b91f59 304 for (;;) {
76b2a182
BJ
305 register struct callout *p1;
306 register caddr_t arg;
307 register int (*func)();
308 register int a, s;
309
27b91f59
BJ
310 s = spl7();
311 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
312 splx(s);
313 break;
f403d99f 314 }
76b2a182 315 arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
27b91f59 316 calltodo.c_next = p1->c_next;
27b91f59
BJ
317 p1->c_next = callfree;
318 callfree = p1;
4f083fd7 319 splx(s);
d01b68d6 320 (*func)(arg, a);
f403d99f 321 }
877ef342
SL
322 /*
323 * If trapped user-mode, give it a profiling tick.
324 */
325 if (USERMODE(ps) && u.u_prof.pr_scale) {
326 u.u_procp->p_flag |= SOWEUPC;
327 aston();
328 }
83be5fac
BJ
329}
330
331/*
27b91f59 332 * Arrange that (*fun)(arg) is called in tim/hz seconds.
83be5fac
BJ
333 */
334timeout(fun, arg, tim)
4512b9a4
BJ
335 int (*fun)();
336 caddr_t arg;
27b91f59 337 int tim;
83be5fac 338{
c4710996 339 register struct callout *p1, *p2, *pnew;
83be5fac
BJ
340 register int t;
341 int s;
342
343 t = tim;
83be5fac 344 s = spl7();
c4710996
BJ
345 pnew = callfree;
346 if (pnew == NULL)
347 panic("timeout table overflow");
348 callfree = pnew->c_next;
349 pnew->c_arg = arg;
350 pnew->c_func = fun;
351 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
d45b61eb
SL
352 if (p2->c_time > 0)
353 t -= p2->c_time;
c4710996
BJ
354 p1->c_next = pnew;
355 pnew->c_next = p2;
356 pnew->c_time = t;
357 if (p2)
358 p2->c_time -= t;
83be5fac
BJ
359 splx(s);
360}
1fa9ff62
SL
361
362/*
363 * untimeout is called to remove a function timeout call
364 * from the callout structure.
365 */
27b91f59 366untimeout(fun, arg)
1fa9ff62
SL
367 int (*fun)();
368 caddr_t arg;
369{
1fa9ff62
SL
370 register struct callout *p1, *p2;
371 register int s;
372
373 s = spl7();
374 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
375 if (p2->c_func == fun && p2->c_arg == arg) {
d01b68d6 376 if (p2->c_next && p2->c_time > 0)
1fa9ff62
SL
377 p2->c_next->c_time += p2->c_time;
378 p1->c_next = p2->c_next;
379 p2->c_next = callfree;
380 callfree = p2;
381 break;
382 }
383 }
384 splx(s);
385}
d01b68d6 386
76b2a182
BJ
387/*
388 * Compute number of hz until specified time.
389 * Used to compute third argument to timeout() from an
390 * absolute time.
391 */
d01b68d6
BJ
392hzto(tv)
393 struct timeval *tv;
394{
76b2a182
BJ
395 register long ticks;
396 register long sec;
d01b68d6
BJ
397 int s = spl7();
398
76b2a182
BJ
399 /*
400 * If number of milliseconds will fit in 32 bit arithmetic,
401 * then compute number of milliseconds to time and scale to
402 * ticks. Otherwise just compute number of hz in time, rounding
403 * times greater than representible to maximum value.
404 *
405 * Delta times less than 25 days can be computed ``exactly''.
406 * Maximum value for any timeout in 10ms ticks is 250 days.
407 */
408 sec = tv->tv_sec - time.tv_sec;
409 if (sec <= 0x7fffffff / 1000 - 1000)
410 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
411 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
412 else if (sec <= 0x7fffffff / hz)
413 ticks = sec * hz;
414 else
415 ticks = 0x7fffffff;
d01b68d6
BJ
416 splx(s);
417 return (ticks);
418}