typo
[unix-history] / usr / src / sys / kern / kern_clock.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1982, 1986 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
6 * @(#)kern_clock.c 7.2 (Berkeley) %G%
7 */
8
9#include "../machine/reg.h"
10#include "../machine/psl.h"
11
12#include "param.h"
13#include "systm.h"
14#include "dkstat.h"
15#include "callout.h"
16#include "dir.h"
17#include "user.h"
18#include "kernel.h"
19#include "proc.h"
20#include "vm.h"
21#include "text.h"
22
23#if defined(vax) || defined(tahoe)
24#include "../machine/mtpr.h"
25#include "../machine/clock.h"
26#endif
27
28#ifdef GPROF
29#include "gprof.h"
30#endif
31
32#define ADJTIME /* For now... */
33#define ADJ_TICK 1000
34int adjtimedelta;
35
36/*
37 * Clock handling routines.
38 *
39 * This code is written to operate with two timers which run
40 * independently of each other. The main clock, running at hz
41 * times per second, is used to do scheduling and timeout calculations.
42 * The second timer does resource utilization estimation statistically
43 * based on the state of the machine phz times a second. Both functions
44 * can be performed by a single clock (ie hz == phz), however the
45 * statistics will be much more prone to errors. Ideally a machine
46 * would have separate clocks measuring time spent in user state, system
47 * state, interrupt state, and idle state. These clocks would allow a non-
48 * approximate measure of resource utilization.
49 */
50
51/*
52 * TODO:
53 * time of day, system/user timing, timeouts, profiling on separate timers
54 * allocate more timeout table slots when table overflows.
55 */
56
57/*
58 * Bump a timeval by a small number of usec's.
59 */
60#define BUMPTIME(t, usec) { \
61 register struct timeval *tp = (t); \
62 \
63 tp->tv_usec += (usec); \
64 if (tp->tv_usec >= 1000000) { \
65 tp->tv_usec -= 1000000; \
66 tp->tv_sec++; \
67 } \
68}
69
70/*
71 * The hz hardware interval timer.
72 * We update the events relating to real time.
73 * If this timer is also being used to gather statistics,
74 * we run through the statistics gathering routine as well.
75 */
76/*ARGSUSED*/
77hardclock(pc, ps)
78 caddr_t pc;
79 int ps;
80{
81 register struct callout *p1;
82 register struct proc *p;
83 register int s;
84
85 /*
86 * Update real-time timeout queue.
87 * At front of queue are some number of events which are ``due''.
88 * The time to these is <= 0 and if negative represents the
89 * number of ticks which have passed since it was supposed to happen.
90 * The rest of the q elements (times > 0) are events yet to happen,
91 * where the time for each is given as a delta from the previous.
92 * Decrementing just the first of these serves to decrement the time
93 * to all events.
94 */
95 p1 = calltodo.c_next;
96 while (p1) {
97 if (--p1->c_time > 0)
98 break;
99 if (p1->c_time == 0)
100 break;
101 p1 = p1->c_next;
102 }
103
104 /*
105 * Charge the time out based on the mode the cpu is in.
106 * Here again we fudge for the lack of proper interval timers
107 * assuming that the current state has been around at least
108 * one tick.
109 */
110 if (USERMODE(ps)) {
111 /*
112 * CPU was in user state. Increment
113 * user time counter, and process process-virtual time
114 * interval timer.
115 */
116 BUMPTIME(&u.u_ru.ru_utime, tick);
117 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) &&
118 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0)
119 psignal(u.u_procp, SIGVTALRM);
120 } else {
121 /*
122 * CPU was in system state.
123 */
124 if (!noproc)
125 BUMPTIME(&u.u_ru.ru_stime, tick);
126 }
127
128 /*
129 * If the cpu is currently scheduled to a process, then
130 * charge it with resource utilization for a tick, updating
131 * statistics which run in (user+system) virtual time,
132 * such as the cpu time limit and profiling timers.
133 * This assumes that the current process has been running
134 * the entire last tick.
135 */
136 if (noproc == 0) {
137 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) >
138 u.u_rlimit[RLIMIT_CPU].rlim_cur) {
139 psignal(u.u_procp, SIGXCPU);
140 if (u.u_rlimit[RLIMIT_CPU].rlim_cur <
141 u.u_rlimit[RLIMIT_CPU].rlim_max)
142 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5;
143 }
144 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) &&
145 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0)
146 psignal(u.u_procp, SIGPROF);
147 s = u.u_procp->p_rssize;
148 u.u_ru.ru_idrss += s;
149#ifdef notdef
150 u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */
151#endif
152 if (u.u_procp->p_textp) {
153 register int xrss = u.u_procp->p_textp->x_rssize;
154
155 s += xrss;
156 u.u_ru.ru_ixrss += xrss;
157 }
158 if (s > u.u_ru.ru_maxrss)
159 u.u_ru.ru_maxrss = s;
160 }
161
162 /*
163 * We adjust the priority of the current process.
164 * The priority of a process gets worse as it accumulates
165 * CPU time. The cpu usage estimator (p_cpu) is increased here
166 * and the formula for computing priorities (in kern_synch.c)
167 * will compute a different value each time the p_cpu increases
168 * by 4. The cpu usage estimator ramps up quite quickly when
169 * the process is running (linearly), and decays away exponentially,
170 * at a rate which is proportionally slower when the system is
171 * busy. The basic principal is that the system will 90% forget
172 * that a process used a lot of CPU time in 5*loadav seconds.
173 * This causes the system to favor processes which haven't run
174 * much recently, and to round-robin among other processes.
175 */
176 if (!noproc) {
177 p = u.u_procp;
178 p->p_cpticks++;
179 if (++p->p_cpu == 0)
180 p->p_cpu--;
181 if ((p->p_cpu&3) == 0) {
182 (void) setpri(p);
183 if (p->p_pri >= PUSER)
184 p->p_pri = p->p_usrpri;
185 }
186 }
187
188 /*
189 * If the alternate clock has not made itself known then
190 * we must gather the statistics.
191 */
192 if (phz == 0)
193 gatherstats(pc, ps);
194
195 /*
196 * Increment the time-of-day, and schedule
197 * processing of the callouts at a very low cpu priority,
198 * so we don't keep the relatively high clock interrupt
199 * priority any longer than necessary.
200 */
201#ifdef ADJTIME
202 if (adjtimedelta == 0)
203 bumptime(&time, tick);
204 else {
205 if (adjtimedelta < 0) {
206 bumptime(&time, tick-ADJ_TICK);
207 adjtimedelta++;
208 } else {
209 bumptime(&time, tick+ADJ_TICK);
210 adjtimedelta--;
211 }
212 }
213#else
214 if (timedelta == 0)
215 BUMPTIME(&time, tick)
216 else {
217 register delta;
218
219 if (timedelta < 0) {
220 delta = tick - tickdelta;
221 timedelta += tickdelta;
222 } else {
223 delta = tick + tickdelta;
224 timedelta -= tickdelta;
225 }
226 BUMPTIME(&time, delta);
227 }
228#endif
229 setsoftclock();
230}
231
232int dk_ndrive = DK_NDRIVE;
233/*
234 * Gather statistics on resource utilization.
235 *
236 * We make a gross assumption: that the system has been in the
237 * state it is in (user state, kernel state, interrupt state,
238 * or idle state) for the entire last time interval, and
239 * update statistics accordingly.
240 */
241/*ARGSUSED*/
242gatherstats(pc, ps)
243 caddr_t pc;
244 int ps;
245{
246 register int cpstate, s;
247
248 /*
249 * Determine what state the cpu is in.
250 */
251 if (USERMODE(ps)) {
252 /*
253 * CPU was in user state.
254 */
255 if (u.u_procp->p_nice > NZERO)
256 cpstate = CP_NICE;
257 else
258 cpstate = CP_USER;
259 } else {
260 /*
261 * CPU was in system state. If profiling kernel
262 * increment a counter. If no process is running
263 * then this is a system tick if we were running
264 * at a non-zero IPL (in a driver). If a process is running,
265 * then we charge it with system time even if we were
266 * at a non-zero IPL, since the system often runs
267 * this way during processing of system calls.
268 * This is approximate, but the lack of true interval
269 * timers makes doing anything else difficult.
270 */
271 cpstate = CP_SYS;
272 if (noproc && BASEPRI(ps))
273 cpstate = CP_IDLE;
274#ifdef GPROF
275 s = pc - s_lowpc;
276 if (profiling < 2 && s < s_textsize)
277 kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
278#endif
279 }
280 /*
281 * We maintain statistics shown by user-level statistics
282 * programs: the amount of time in each cpu state, and
283 * the amount of time each of DK_NDRIVE ``drives'' is busy.
284 */
285 cp_time[cpstate]++;
286 for (s = 0; s < DK_NDRIVE; s++)
287 if (dk_busy&(1<<s))
288 dk_time[s]++;
289}
290
291/*
292 * Software priority level clock interrupt.
293 * Run periodic events from timeout queue.
294 */
295/*ARGSUSED*/
296softclock(pc, ps)
297 caddr_t pc;
298 int ps;
299{
300
301 for (;;) {
302 register struct callout *p1;
303 register caddr_t arg;
304 register int (*func)();
305 register int a, s;
306
307 s = splhigh();
308 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
309 splx(s);
310 break;
311 }
312 arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
313 calltodo.c_next = p1->c_next;
314 p1->c_next = callfree;
315 callfree = p1;
316 splx(s);
317 (*func)(arg, a);
318 }
319 /*
320 * If trapped user-mode and profiling, give it
321 * a profiling tick.
322 */
323 if (USERMODE(ps)) {
324 register struct proc *p = u.u_procp;
325
326 if (u.u_prof.pr_scale) {
327 p->p_flag |= SOWEUPC;
328 aston();
329 }
330 /*
331 * Check to see if process has accumulated
332 * more than 10 minutes of user time. If so
333 * reduce priority to give others a chance.
334 */
335 if (p->p_uid && p->p_nice == NZERO &&
336 u.u_ru.ru_utime.tv_sec > 10 * 60) {
337 p->p_nice = NZERO+4;
338 (void) setpri(p);
339 p->p_pri = p->p_usrpri;
340 }
341 }
342}
343
344/*
345 * Arrange that (*fun)(arg) is called in t/hz seconds.
346 */
347timeout(fun, arg, t)
348 int (*fun)();
349 caddr_t arg;
350 register int t;
351{
352 register struct callout *p1, *p2, *pnew;
353 register int s = splhigh();
354
355 if (t <= 0)
356 t = 1;
357 pnew = callfree;
358 if (pnew == NULL)
359 panic("timeout table overflow");
360 callfree = pnew->c_next;
361 pnew->c_arg = arg;
362 pnew->c_func = fun;
363 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
364 if (p2->c_time > 0)
365 t -= p2->c_time;
366 p1->c_next = pnew;
367 pnew->c_next = p2;
368 pnew->c_time = t;
369 if (p2)
370 p2->c_time -= t;
371 splx(s);
372}
373
374/*
375 * untimeout is called to remove a function timeout call
376 * from the callout structure.
377 */
378untimeout(fun, arg)
379 int (*fun)();
380 caddr_t arg;
381{
382 register struct callout *p1, *p2;
383 register int s;
384
385 s = splhigh();
386 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
387 if (p2->c_func == fun && p2->c_arg == arg) {
388 if (p2->c_next && p2->c_time > 0)
389 p2->c_next->c_time += p2->c_time;
390 p1->c_next = p2->c_next;
391 p2->c_next = callfree;
392 callfree = p2;
393 break;
394 }
395 }
396 splx(s);
397}
398
399/*
400 * Compute number of hz until specified time.
401 * Used to compute third argument to timeout() from an
402 * absolute time.
403 */
404hzto(tv)
405 struct timeval *tv;
406{
407 register long ticks;
408 register long sec;
409 int s = splhigh();
410
411 /*
412 * If number of milliseconds will fit in 32 bit arithmetic,
413 * then compute number of milliseconds to time and scale to
414 * ticks. Otherwise just compute number of hz in time, rounding
415 * times greater than representible to maximum value.
416 *
417 * Delta times less than 25 days can be computed ``exactly''.
418 * Maximum value for any timeout in 10ms ticks is 250 days.
419 */
420 sec = tv->tv_sec - time.tv_sec;
421 if (sec <= 0x7fffffff / 1000 - 1000)
422 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
423 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
424 else if (sec <= 0x7fffffff / hz)
425 ticks = sec * hz;
426 else
427 ticks = 0x7fffffff;
428 splx(s);
429 return (ticks);
430}
431
432profil()
433{
434 register struct a {
435 short *bufbase;
436 unsigned bufsize;
437 unsigned pcoffset;
438 unsigned pcscale;
439 } *uap = (struct a *)u.u_ap;
440 register struct uprof *upp = &u.u_prof;
441
442 upp->pr_base = uap->bufbase;
443 upp->pr_size = uap->bufsize;
444 upp->pr_off = uap->pcoffset;
445 upp->pr_scale = uap->pcscale;
446}
447
448#ifdef COMPAT
449opause()
450{
451
452 for (;;)
453 sleep((caddr_t)&u, PSLEP);
454}
455#endif