add USL's copyright notice
[unix-history] / usr / src / sys / kern / kern_clock.c
CommitLineData
f406ae69 1/*-
1acdbcea
KB
2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
adb35f79
KB
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
f406ae69
KB
9 *
10 * %sccs.include.redist.c%
da7c5cc6 11 *
adb35f79 12 * @(#)kern_clock.c 8.5 (Berkeley) %G%
da7c5cc6 13 */
961945a8 14
38a01dbe
KB
15#include <sys/param.h>
16#include <sys/systm.h>
17#include <sys/dkstat.h>
18#include <sys/callout.h>
19#include <sys/kernel.h>
20#include <sys/proc.h>
21#include <sys/resourcevar.h>
83be5fac 22
38a01dbe 23#include <machine/cpu.h>
961945a8 24
8487304f 25#ifdef GPROF
38a01dbe 26#include <sys/gmon.h>
8487304f
KM
27#endif
28
45e9acec
MK
29#define ADJTIME /* For now... */
30#define ADJ_TICK 1000
31int adjtimedelta;
32
76b2a182
BJ
33/*
34 * Clock handling routines.
35 *
b44234ac
CT
36 * This code is written to operate with two timers that run independently of
37 * each other. The main clock, running hz times per second, is used to keep
38 * track of real time. The second timer handles kernel and user profiling,
39 * and does resource use estimation. If the second timer is programmable,
40 * it is randomized to avoid aliasing between the two clocks. For example,
41 * the randomization prevents an adversary from always giving up the cpu
42 * just before its quantum expires. Otherwise, it would never accumulate
43 * cpu ticks. The mean frequency of the second timer is stathz.
44 *
45 * If no second timer exists, stathz will be zero; in this case we drive
46 * profiling and statistics off the main clock. This WILL NOT be accurate;
47 * do not do it unless absolutely necessary.
48 *
49 * The statistics clock may (or may not) be run at a higher rate while
50 * profiling. This profile clock runs at profhz. We require that profhz
51 * be an integral multiple of stathz.
52 *
53 * If the statistics clock is running fast, it must be divided by the ratio
54 * profhz/stathz for statistics. (For profiling, every tick counts.)
76b2a182 55 */
6602c75b 56
76b2a182
BJ
57/*
58 * TODO:
88a7a62a 59 * allocate more timeout table slots when table overflows.
76b2a182 60 */
9c5cfb8b 61
ad8023d1
KM
62/*
63 * Bump a timeval by a small number of usec's.
64 */
ad8023d1 65#define BUMPTIME(t, usec) { \
b44234ac
CT
66 register volatile struct timeval *tp = (t); \
67 register long us; \
ad8023d1 68 \
b44234ac
CT
69 tp->tv_usec = us = tp->tv_usec + (usec); \
70 if (us >= 1000000) { \
71 tp->tv_usec = us - 1000000; \
ad8023d1
KM
72 tp->tv_sec++; \
73 } \
74}
83be5fac 75
272cb936 76int stathz;
cbd9e613 77int profhz;
37d40d06 78int profprocs;
5fe6e3b1 79int ticks;
3916e002 80static int psdiv, pscnt; /* prof => stat divider */
be389c2a 81int psratio; /* ratio: prof / stat */
b44234ac
CT
82
83volatile struct timeval time;
84volatile struct timeval mono_time;
85
76b2a182 86/*
b44234ac 87 * Initialize clock frequencies and start both clocks running.
76b2a182 88 */
b44234ac
CT
89void
90initclocks()
91{
92 register int i;
93
94 /*
95 * Set divisors to 1 (normal case) and let the machine-specific
96 * code do its bit.
97 */
98 psdiv = pscnt = 1;
99 cpu_initclocks();
100
101 /*
102 * Compute profhz/stathz, and fix profhz if needed.
103 */
104 i = stathz ? stathz : hz;
105 if (profhz == 0)
106 profhz = i;
107 psratio = profhz / i;
108}
109
110/*
111 * The real-time timer, interrupting hz times per second.
112 */
113void
d293217c 114hardclock(frame)
b44234ac 115 register struct clockframe *frame;
83be5fac 116{
0a34b6fd 117 register struct callout *p1;
83be5fac 118
76b2a182
BJ
119 /*
120 * Update real-time timeout queue.
121 * At front of queue are some number of events which are ``due''.
122 * The time to these is <= 0 and if negative represents the
123 * number of ticks which have passed since it was supposed to happen.
124 * The rest of the q elements (times > 0) are events yet to happen,
125 * where the time for each is given as a delta from the previous.
126 * Decrementing just the first of these serves to decrement the time
127 * to all events.
128 */
b44234ac
CT
129 needsoft = 0;
130 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
88a7a62a
SL
131 if (--p1->c_time > 0)
132 break;
88a7a62a
SL
133 if (p1->c_time == 0)
134 break;
88a7a62a 135 }
5da67d35 136
76b2a182 137 /*
b44234ac 138 * Run current process's virtual and profile time, as needed.
76b2a182 139 */
b44234ac
CT
140 pstats = p->p_stats;
141 if (CLKF_USERMODE(frame) &&
142 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
0157085f 143 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
53fbb3b3 144 psignal(p, SIGVTALRM);
0157085f
MK
145 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
146 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
53fbb3b3 147 psignal(p, SIGPROF);
83be5fac 148 }
76b2a182 149
53a32545 150 /*
b44234ac 151 * If no separate statistics clock is available, run it from here.
53a32545 152 */
272cb936 153 if (stathz == 0)
b44234ac 154 statclock(frame);
53a32545 155
76b2a182 156 /*
cfd66a29
CT
157 * Increment the time-of-day. The increment is just ``tick'' unless
158 * we are still adjusting the clock; see adjtime().
76b2a182 159 */
5fe6e3b1 160 ticks++;
45e9acec
MK
161#ifdef ADJTIME
162 if (adjtimedelta == 0)
163 bumptime(&time, tick);
164 else {
165 if (adjtimedelta < 0) {
166 bumptime(&time, tick-ADJ_TICK);
167 adjtimedelta++;
168 } else {
169 bumptime(&time, tick+ADJ_TICK);
170 adjtimedelta--;
171 }
172 }
173#else
cfd66a29
CT
174 if (timedelta == 0)
175 delta = tick;
176 else {
177 delta = tick + tickdelta;
178 timedelta -= tickdelta;
99e47f6b 179 }
cfd66a29
CT
180 BUMPTIME(&time, delta);
181 BUMPTIME(&mono_time, delta);
53a32545
SL
182
183 /*
b44234ac
CT
184 * Process callouts at a very low cpu priority, so we don't keep the
185 * relatively high clock interrupt priority any longer than necessary.
53a32545 186 */
53a32545 187#endif
b44234ac 188 setsoftclock();
53a32545
SL
189}
190
76b2a182 191/*
b44234ac 192 * Software (low priority) clock interrupt.
76b2a182
BJ
193 * Run periodic events from timeout queue.
194 */
260ea681 195/*ARGSUSED*/
b44234ac
CT
196void
197softclock()
f403d99f 198{
b44234ac
CT
199 register struct callout *c;
200 register void *arg;
201 register void (*func) __P((void *));
202 register int s;
f403d99f 203
b44234ac
CT
204 s = splhigh();
205 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
206 func = c->c_func;
207 arg = c->c_arg;
208 calltodo.c_next = c->c_next;
209 c->c_next = callfree;
210 callfree = c;
4f083fd7 211 splx(s);
b44234ac
CT
212 (*func)(arg);
213 (void) splhigh();
877ef342 214 }
b44234ac 215 splx(s);
37d40d06
KM
216}
217
88a7a62a 218/*
5685f766
KB
219 * timeout --
220 * Execute a function after a specified length of time.
221 *
222 * untimeout --
223 * Cancel previous timeout function call.
224 *
225 * See AT&T BCI Driver Reference Manual for specification. This
226 * implementation differs from that one in that no identification
227 * value is returned from timeout, rather, the original arguments
228 * to timeout are used to identify entries for untimeout.
83be5fac 229 */
b44234ac 230void
5685f766
KB
231timeout(ftn, arg, ticks)
232 void (*ftn) __P((void *));
b44234ac 233 void *arg;
5685f766 234 register int ticks;
83be5fac 235{
5685f766 236 register struct callout *new, *p, *t;
b44234ac 237 register int s;
83be5fac 238
5685f766
KB
239 if (ticks <= 0)
240 ticks = 1;
241
242 /* Lock out the clock. */
b44234ac 243 s = splhigh();
5685f766
KB
244
245 /* Fill in the next free callout structure. */
246 if (callfree == NULL)
247 panic("timeout table full");
248 new = callfree;
249 callfree = new->c_next;
250 new->c_arg = arg;
251 new->c_func = ftn;
252
253 /*
254 * The time for each event is stored as a difference from the time
255 * of the previous event on the queue. Walk the queue, correcting
256 * the ticks argument for queue entries passed. Correct the ticks
257 * value for the queue entry immediately after the insertion point
54c401b6
CT
258 * as well. Watch out for negative c_time values; these represent
259 * overdue events.
5685f766
KB
260 */
261 for (p = &calltodo;
262 (t = p->c_next) != NULL && ticks > t->c_time; p = t)
54c401b6
CT
263 if (t->c_time > 0)
264 ticks -= t->c_time;
5685f766
KB
265 new->c_time = ticks;
266 if (t != NULL)
267 t->c_time -= ticks;
268
269 /* Insert the new entry into the queue. */
270 p->c_next = new;
271 new->c_next = t;
83be5fac
BJ
272 splx(s);
273}
1fa9ff62 274
b44234ac 275void
5685f766
KB
276untimeout(ftn, arg)
277 void (*ftn) __P((void *));
b44234ac 278 void *arg;
1fa9ff62 279{
5685f766 280 register struct callout *p, *t;
1fa9ff62
SL
281 register int s;
282
9c5cfb8b 283 s = splhigh();
5685f766
KB
284 for (p = &calltodo; (t = p->c_next) != NULL; p = t)
285 if (t->c_func == ftn && t->c_arg == arg) {
286 /* Increment next entry's tick count. */
287 if (t->c_next && t->c_time > 0)
288 t->c_next->c_time += t->c_time;
289
290 /* Move entry from callout queue to callfree queue. */
291 p->c_next = t->c_next;
292 t->c_next = callfree;
293 callfree = t;
1fa9ff62
SL
294 break;
295 }
1fa9ff62
SL
296 splx(s);
297}
d01b68d6 298
76b2a182 299/*
5685f766
KB
300 * Compute number of hz until specified time. Used to
301 * compute third argument to timeout() from an absolute time.
76b2a182 302 */
b44234ac 303int
d01b68d6
BJ
304hzto(tv)
305 struct timeval *tv;
306{
b44234ac
CT
307 register long ticks, sec;
308 int s;
d01b68d6 309
76b2a182
BJ
310 /*
311 * If number of milliseconds will fit in 32 bit arithmetic,
312 * then compute number of milliseconds to time and scale to
313 * ticks. Otherwise just compute number of hz in time, rounding
314 * times greater than representible to maximum value.
315 *
316 * Delta times less than 25 days can be computed ``exactly''.
317 * Maximum value for any timeout in 10ms ticks is 250 days.
318 */
b44234ac 319 s = splhigh();
76b2a182
BJ
320 sec = tv->tv_sec - time.tv_sec;
321 if (sec <= 0x7fffffff / 1000 - 1000)
322 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
323 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
324 else if (sec <= 0x7fffffff / hz)
325 ticks = sec * hz;
326 else
327 ticks = 0x7fffffff;
d01b68d6
BJ
328 splx(s);
329 return (ticks);
330}
014adec1 331
b44234ac
CT
332/*
333 * Start profiling on a process.
334 *
335 * Kernel profiling passes proc0 which never exits and hence
336 * keeps the profile clock running constantly.
337 */
338void
339startprofclock(p)
340 register struct proc *p;
341{
342 int s;
343
cf5ef508
KB
344 if ((p->p_flag & P_PROFIL) == 0) {
345 p->p_flag |= P_PROFIL;
b44234ac
CT
346 if (++profprocs == 1 && stathz != 0) {
347 s = splstatclock();
348 psdiv = pscnt = psratio;
349 setstatclockrate(profhz);
350 splx(s);
351 }
352 }
353}
354
355/*
356 * Stop profiling on a process.
357 */
358void
359stopprofclock(p)
360 register struct proc *p;
361{
362 int s;
363
cf5ef508
KB
364 if (p->p_flag & P_PROFIL) {
365 p->p_flag &= ~P_PROFIL;
b44234ac
CT
366 if (--profprocs == 0 && stathz != 0) {
367 s = splstatclock();
368 psdiv = pscnt = 1;
369 setstatclockrate(stathz);
370 splx(s);
371 }
372 }
373}
374
375int dk_ndrive = DK_NDRIVE;
376
377/*
378 * Statistics clock. Grab profile sample, and if divider reaches 0,
379 * do process and kernel statistics.
380 */
381void
382statclock(frame)
383 register struct clockframe *frame;
384{
385#ifdef GPROF
386 register struct gmonparam *g;
387#endif
388 register struct proc *p;
389 register int i;
390
391 if (CLKF_USERMODE(frame)) {
392 p = curproc;
cf5ef508 393 if (p->p_flag & P_PROFIL)
b44234ac
CT
394 addupc_intr(p, CLKF_PC(frame), 1);
395 if (--pscnt > 0)
396 return;
397 /*
398 * Came from user mode; CPU was in user state.
399 * If this process is being profiled record the tick.
400 */
401 p->p_uticks++;
402 if (p->p_nice > NZERO)
403 cp_time[CP_NICE]++;
404 else
405 cp_time[CP_USER]++;
406 } else {
407#ifdef GPROF
408 /*
409 * Kernel statistics are just like addupc_intr, only easier.
410 */
411 g = &_gmonparam;
412 if (g->state == GMON_PROF_ON) {
413 i = CLKF_PC(frame) - g->lowpc;
f929d58c
KM
414 if (i < g->textsize) {
415 i /= HISTFRACTION * sizeof(*g->kcount);
416 g->kcount[i]++;
417 }
b44234ac
CT
418 }
419#endif
420 if (--pscnt > 0)
421 return;
422 /*
423 * Came from kernel mode, so we were:
424 * - handling an interrupt,
425 * - doing syscall or trap work on behalf of the current
426 * user process, or
427 * - spinning in the idle loop.
428 * Whichever it is, charge the time as appropriate.
429 * Note that we charge interrupts to the current process,
430 * regardless of whether they are ``for'' that process,
431 * so that we know how much of its real time was spent
432 * in ``non-process'' (i.e., interrupt) work.
433 */
434 p = curproc;
435 if (CLKF_INTR(frame)) {
436 if (p != NULL)
437 p->p_iticks++;
438 cp_time[CP_INTR]++;
439 } else if (p != NULL) {
440 p->p_sticks++;
441 cp_time[CP_SYS]++;
442 } else
443 cp_time[CP_IDLE]++;
444 }
445 pscnt = psdiv;
446
447 /*
448 * We maintain statistics shown by user-level statistics
449 * programs: the amount of time in each cpu state, and
450 * the amount of time each of DK_NDRIVE ``drives'' is busy.
451 *
452 * XXX should either run linked list of drives, or (better)
453 * grab timestamps in the start & done code.
454 */
455 for (i = 0; i < DK_NDRIVE; i++)
456 if (dk_busy & (1 << i))
457 dk_time[i]++;
458
459 /*
cf5ef508
KB
460 * We adjust the priority of the current process. The priority of
461 * a process gets worse as it accumulates CPU time. The cpu usage
462 * estimator (p_estcpu) is increased here. The formula for computing
463 * priorities (in kern_synch.c) will compute a different value each
464 * time p_estcpu increases by 4. The cpu usage estimator ramps up
465 * quite quickly when the process is running (linearly), and decays
466 * away exponentially, at a rate which is proportionally slower when
467 * the system is busy. The basic principal is that the system will
468 * 90% forget that the process used a lot of CPU time in 5 * loadav
469 * seconds. This causes the system to favor processes which haven't
470 * run much recently, and to round-robin among other processes.
b44234ac
CT
471 */
472 if (p != NULL) {
473 p->p_cpticks++;
cf5ef508
KB
474 if (++p->p_estcpu == 0)
475 p->p_estcpu--;
476 if ((p->p_estcpu & 3) == 0) {
5685f766 477 resetpriority(p);
cf5ef508
KB
478 if (p->p_priority >= PUSER)
479 p->p_priority = p->p_usrpri;
b44234ac
CT
480 }
481 }
482}
483
014adec1
KM
484/*
485 * Return information about system clocks.
486 */
4f522b46 487sysctl_clockrate(where, sizep)
014adec1 488 register char *where;
c1909da4 489 size_t *sizep;
014adec1 490{
4f522b46 491 struct clockinfo clkinfo;
014adec1 492
014adec1 493 /*
4f522b46 494 * Construct clockinfo structure.
014adec1 495 */
4f522b46
KM
496 clkinfo.hz = hz;
497 clkinfo.tick = tick;
498 clkinfo.profhz = profhz;
499 clkinfo.stathz = stathz ? stathz : hz;
500 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
014adec1 501}