correctly init v_ralen and v_maxra
[unix-history] / usr / src / sys / kern / kern_clock.c
CommitLineData
f406ae69 1/*-
1acdbcea
KB
2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
f406ae69
KB
4 *
5 * %sccs.include.redist.c%
da7c5cc6 6 *
54c401b6 7 * @(#)kern_clock.c 8.4 (Berkeley) %G%
da7c5cc6 8 */
961945a8 9
38a01dbe
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/dkstat.h>
13#include <sys/callout.h>
14#include <sys/kernel.h>
15#include <sys/proc.h>
16#include <sys/resourcevar.h>
83be5fac 17
38a01dbe 18#include <machine/cpu.h>
961945a8 19
8487304f 20#ifdef GPROF
38a01dbe 21#include <sys/gmon.h>
8487304f
KM
22#endif
23
45e9acec
MK
24#define ADJTIME /* For now... */
25#define ADJ_TICK 1000
26int adjtimedelta;
27
76b2a182
BJ
28/*
29 * Clock handling routines.
30 *
b44234ac
CT
31 * This code is written to operate with two timers that run independently of
32 * each other. The main clock, running hz times per second, is used to keep
33 * track of real time. The second timer handles kernel and user profiling,
34 * and does resource use estimation. If the second timer is programmable,
35 * it is randomized to avoid aliasing between the two clocks. For example,
36 * the randomization prevents an adversary from always giving up the cpu
37 * just before its quantum expires. Otherwise, it would never accumulate
38 * cpu ticks. The mean frequency of the second timer is stathz.
39 *
40 * If no second timer exists, stathz will be zero; in this case we drive
41 * profiling and statistics off the main clock. This WILL NOT be accurate;
42 * do not do it unless absolutely necessary.
43 *
44 * The statistics clock may (or may not) be run at a higher rate while
45 * profiling. This profile clock runs at profhz. We require that profhz
46 * be an integral multiple of stathz.
47 *
48 * If the statistics clock is running fast, it must be divided by the ratio
49 * profhz/stathz for statistics. (For profiling, every tick counts.)
76b2a182 50 */
6602c75b 51
76b2a182
BJ
52/*
53 * TODO:
88a7a62a 54 * allocate more timeout table slots when table overflows.
76b2a182 55 */
9c5cfb8b 56
ad8023d1
KM
57/*
58 * Bump a timeval by a small number of usec's.
59 */
ad8023d1 60#define BUMPTIME(t, usec) { \
b44234ac
CT
61 register volatile struct timeval *tp = (t); \
62 register long us; \
ad8023d1 63 \
b44234ac
CT
64 tp->tv_usec = us = tp->tv_usec + (usec); \
65 if (us >= 1000000) { \
66 tp->tv_usec = us - 1000000; \
ad8023d1
KM
67 tp->tv_sec++; \
68 } \
69}
83be5fac 70
272cb936 71int stathz;
cbd9e613 72int profhz;
37d40d06 73int profprocs;
5fe6e3b1 74int ticks;
3916e002 75static int psdiv, pscnt; /* prof => stat divider */
be389c2a 76int psratio; /* ratio: prof / stat */
b44234ac
CT
77
78volatile struct timeval time;
79volatile struct timeval mono_time;
80
76b2a182 81/*
b44234ac 82 * Initialize clock frequencies and start both clocks running.
76b2a182 83 */
b44234ac
CT
84void
85initclocks()
86{
87 register int i;
88
89 /*
90 * Set divisors to 1 (normal case) and let the machine-specific
91 * code do its bit.
92 */
93 psdiv = pscnt = 1;
94 cpu_initclocks();
95
96 /*
97 * Compute profhz/stathz, and fix profhz if needed.
98 */
99 i = stathz ? stathz : hz;
100 if (profhz == 0)
101 profhz = i;
102 psratio = profhz / i;
103}
104
105/*
106 * The real-time timer, interrupting hz times per second.
107 */
108void
d293217c 109hardclock(frame)
b44234ac 110 register struct clockframe *frame;
83be5fac 111{
0a34b6fd 112 register struct callout *p1;
83be5fac 113
76b2a182
BJ
114 /*
115 * Update real-time timeout queue.
116 * At front of queue are some number of events which are ``due''.
117 * The time to these is <= 0 and if negative represents the
118 * number of ticks which have passed since it was supposed to happen.
119 * The rest of the q elements (times > 0) are events yet to happen,
120 * where the time for each is given as a delta from the previous.
121 * Decrementing just the first of these serves to decrement the time
122 * to all events.
123 */
b44234ac
CT
124 needsoft = 0;
125 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
88a7a62a
SL
126 if (--p1->c_time > 0)
127 break;
88a7a62a
SL
128 if (p1->c_time == 0)
129 break;
88a7a62a 130 }
5da67d35 131
76b2a182 132 /*
b44234ac 133 * Run current process's virtual and profile time, as needed.
76b2a182 134 */
b44234ac
CT
135 pstats = p->p_stats;
136 if (CLKF_USERMODE(frame) &&
137 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
0157085f 138 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
53fbb3b3 139 psignal(p, SIGVTALRM);
0157085f
MK
140 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
141 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
53fbb3b3 142 psignal(p, SIGPROF);
83be5fac 143 }
76b2a182 144
53a32545 145 /*
b44234ac 146 * If no separate statistics clock is available, run it from here.
53a32545 147 */
272cb936 148 if (stathz == 0)
b44234ac 149 statclock(frame);
53a32545 150
76b2a182 151 /*
cfd66a29
CT
152 * Increment the time-of-day. The increment is just ``tick'' unless
153 * we are still adjusting the clock; see adjtime().
76b2a182 154 */
5fe6e3b1 155 ticks++;
45e9acec
MK
156#ifdef ADJTIME
157 if (adjtimedelta == 0)
158 bumptime(&time, tick);
159 else {
160 if (adjtimedelta < 0) {
161 bumptime(&time, tick-ADJ_TICK);
162 adjtimedelta++;
163 } else {
164 bumptime(&time, tick+ADJ_TICK);
165 adjtimedelta--;
166 }
167 }
168#else
cfd66a29
CT
169 if (timedelta == 0)
170 delta = tick;
171 else {
172 delta = tick + tickdelta;
173 timedelta -= tickdelta;
99e47f6b 174 }
cfd66a29
CT
175 BUMPTIME(&time, delta);
176 BUMPTIME(&mono_time, delta);
53a32545
SL
177
178 /*
b44234ac
CT
179 * Process callouts at a very low cpu priority, so we don't keep the
180 * relatively high clock interrupt priority any longer than necessary.
53a32545 181 */
53a32545 182#endif
b44234ac 183 setsoftclock();
53a32545
SL
184}
185
76b2a182 186/*
b44234ac 187 * Software (low priority) clock interrupt.
76b2a182
BJ
188 * Run periodic events from timeout queue.
189 */
260ea681 190/*ARGSUSED*/
b44234ac
CT
191void
192softclock()
f403d99f 193{
b44234ac
CT
194 register struct callout *c;
195 register void *arg;
196 register void (*func) __P((void *));
197 register int s;
f403d99f 198
b44234ac
CT
199 s = splhigh();
200 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
201 func = c->c_func;
202 arg = c->c_arg;
203 calltodo.c_next = c->c_next;
204 c->c_next = callfree;
205 callfree = c;
4f083fd7 206 splx(s);
b44234ac
CT
207 (*func)(arg);
208 (void) splhigh();
877ef342 209 }
b44234ac 210 splx(s);
37d40d06
KM
211}
212
88a7a62a 213/*
5685f766
KB
214 * timeout --
215 * Execute a function after a specified length of time.
216 *
217 * untimeout --
218 * Cancel previous timeout function call.
219 *
220 * See AT&T BCI Driver Reference Manual for specification. This
221 * implementation differs from that one in that no identification
222 * value is returned from timeout, rather, the original arguments
223 * to timeout are used to identify entries for untimeout.
83be5fac 224 */
b44234ac 225void
5685f766
KB
226timeout(ftn, arg, ticks)
227 void (*ftn) __P((void *));
b44234ac 228 void *arg;
5685f766 229 register int ticks;
83be5fac 230{
5685f766 231 register struct callout *new, *p, *t;
b44234ac 232 register int s;
83be5fac 233
5685f766
KB
234 if (ticks <= 0)
235 ticks = 1;
236
237 /* Lock out the clock. */
b44234ac 238 s = splhigh();
5685f766
KB
239
240 /* Fill in the next free callout structure. */
241 if (callfree == NULL)
242 panic("timeout table full");
243 new = callfree;
244 callfree = new->c_next;
245 new->c_arg = arg;
246 new->c_func = ftn;
247
248 /*
249 * The time for each event is stored as a difference from the time
250 * of the previous event on the queue. Walk the queue, correcting
251 * the ticks argument for queue entries passed. Correct the ticks
252 * value for the queue entry immediately after the insertion point
54c401b6
CT
253 * as well. Watch out for negative c_time values; these represent
254 * overdue events.
5685f766
KB
255 */
256 for (p = &calltodo;
257 (t = p->c_next) != NULL && ticks > t->c_time; p = t)
54c401b6
CT
258 if (t->c_time > 0)
259 ticks -= t->c_time;
5685f766
KB
260 new->c_time = ticks;
261 if (t != NULL)
262 t->c_time -= ticks;
263
264 /* Insert the new entry into the queue. */
265 p->c_next = new;
266 new->c_next = t;
83be5fac
BJ
267 splx(s);
268}
1fa9ff62 269
b44234ac 270void
5685f766
KB
271untimeout(ftn, arg)
272 void (*ftn) __P((void *));
b44234ac 273 void *arg;
1fa9ff62 274{
5685f766 275 register struct callout *p, *t;
1fa9ff62
SL
276 register int s;
277
9c5cfb8b 278 s = splhigh();
5685f766
KB
279 for (p = &calltodo; (t = p->c_next) != NULL; p = t)
280 if (t->c_func == ftn && t->c_arg == arg) {
281 /* Increment next entry's tick count. */
282 if (t->c_next && t->c_time > 0)
283 t->c_next->c_time += t->c_time;
284
285 /* Move entry from callout queue to callfree queue. */
286 p->c_next = t->c_next;
287 t->c_next = callfree;
288 callfree = t;
1fa9ff62
SL
289 break;
290 }
1fa9ff62
SL
291 splx(s);
292}
d01b68d6 293
76b2a182 294/*
5685f766
KB
295 * Compute number of hz until specified time. Used to
296 * compute third argument to timeout() from an absolute time.
76b2a182 297 */
b44234ac 298int
d01b68d6
BJ
299hzto(tv)
300 struct timeval *tv;
301{
b44234ac
CT
302 register long ticks, sec;
303 int s;
d01b68d6 304
76b2a182
BJ
305 /*
306 * If number of milliseconds will fit in 32 bit arithmetic,
307 * then compute number of milliseconds to time and scale to
308 * ticks. Otherwise just compute number of hz in time, rounding
309 * times greater than representible to maximum value.
310 *
311 * Delta times less than 25 days can be computed ``exactly''.
312 * Maximum value for any timeout in 10ms ticks is 250 days.
313 */
b44234ac 314 s = splhigh();
76b2a182
BJ
315 sec = tv->tv_sec - time.tv_sec;
316 if (sec <= 0x7fffffff / 1000 - 1000)
317 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
318 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
319 else if (sec <= 0x7fffffff / hz)
320 ticks = sec * hz;
321 else
322 ticks = 0x7fffffff;
d01b68d6
BJ
323 splx(s);
324 return (ticks);
325}
014adec1 326
b44234ac
CT
327/*
328 * Start profiling on a process.
329 *
330 * Kernel profiling passes proc0 which never exits and hence
331 * keeps the profile clock running constantly.
332 */
333void
334startprofclock(p)
335 register struct proc *p;
336{
337 int s;
338
cf5ef508
KB
339 if ((p->p_flag & P_PROFIL) == 0) {
340 p->p_flag |= P_PROFIL;
b44234ac
CT
341 if (++profprocs == 1 && stathz != 0) {
342 s = splstatclock();
343 psdiv = pscnt = psratio;
344 setstatclockrate(profhz);
345 splx(s);
346 }
347 }
348}
349
350/*
351 * Stop profiling on a process.
352 */
353void
354stopprofclock(p)
355 register struct proc *p;
356{
357 int s;
358
cf5ef508
KB
359 if (p->p_flag & P_PROFIL) {
360 p->p_flag &= ~P_PROFIL;
b44234ac
CT
361 if (--profprocs == 0 && stathz != 0) {
362 s = splstatclock();
363 psdiv = pscnt = 1;
364 setstatclockrate(stathz);
365 splx(s);
366 }
367 }
368}
369
370int dk_ndrive = DK_NDRIVE;
371
372/*
373 * Statistics clock. Grab profile sample, and if divider reaches 0,
374 * do process and kernel statistics.
375 */
376void
377statclock(frame)
378 register struct clockframe *frame;
379{
380#ifdef GPROF
381 register struct gmonparam *g;
382#endif
383 register struct proc *p;
384 register int i;
385
386 if (CLKF_USERMODE(frame)) {
387 p = curproc;
cf5ef508 388 if (p->p_flag & P_PROFIL)
b44234ac
CT
389 addupc_intr(p, CLKF_PC(frame), 1);
390 if (--pscnt > 0)
391 return;
392 /*
393 * Came from user mode; CPU was in user state.
394 * If this process is being profiled record the tick.
395 */
396 p->p_uticks++;
397 if (p->p_nice > NZERO)
398 cp_time[CP_NICE]++;
399 else
400 cp_time[CP_USER]++;
401 } else {
402#ifdef GPROF
403 /*
404 * Kernel statistics are just like addupc_intr, only easier.
405 */
406 g = &_gmonparam;
407 if (g->state == GMON_PROF_ON) {
408 i = CLKF_PC(frame) - g->lowpc;
f929d58c
KM
409 if (i < g->textsize) {
410 i /= HISTFRACTION * sizeof(*g->kcount);
411 g->kcount[i]++;
412 }
b44234ac
CT
413 }
414#endif
415 if (--pscnt > 0)
416 return;
417 /*
418 * Came from kernel mode, so we were:
419 * - handling an interrupt,
420 * - doing syscall or trap work on behalf of the current
421 * user process, or
422 * - spinning in the idle loop.
423 * Whichever it is, charge the time as appropriate.
424 * Note that we charge interrupts to the current process,
425 * regardless of whether they are ``for'' that process,
426 * so that we know how much of its real time was spent
427 * in ``non-process'' (i.e., interrupt) work.
428 */
429 p = curproc;
430 if (CLKF_INTR(frame)) {
431 if (p != NULL)
432 p->p_iticks++;
433 cp_time[CP_INTR]++;
434 } else if (p != NULL) {
435 p->p_sticks++;
436 cp_time[CP_SYS]++;
437 } else
438 cp_time[CP_IDLE]++;
439 }
440 pscnt = psdiv;
441
442 /*
443 * We maintain statistics shown by user-level statistics
444 * programs: the amount of time in each cpu state, and
445 * the amount of time each of DK_NDRIVE ``drives'' is busy.
446 *
447 * XXX should either run linked list of drives, or (better)
448 * grab timestamps in the start & done code.
449 */
450 for (i = 0; i < DK_NDRIVE; i++)
451 if (dk_busy & (1 << i))
452 dk_time[i]++;
453
454 /*
cf5ef508
KB
455 * We adjust the priority of the current process. The priority of
456 * a process gets worse as it accumulates CPU time. The cpu usage
457 * estimator (p_estcpu) is increased here. The formula for computing
458 * priorities (in kern_synch.c) will compute a different value each
459 * time p_estcpu increases by 4. The cpu usage estimator ramps up
460 * quite quickly when the process is running (linearly), and decays
461 * away exponentially, at a rate which is proportionally slower when
462 * the system is busy. The basic principal is that the system will
463 * 90% forget that the process used a lot of CPU time in 5 * loadav
464 * seconds. This causes the system to favor processes which haven't
465 * run much recently, and to round-robin among other processes.
b44234ac
CT
466 */
467 if (p != NULL) {
468 p->p_cpticks++;
cf5ef508
KB
469 if (++p->p_estcpu == 0)
470 p->p_estcpu--;
471 if ((p->p_estcpu & 3) == 0) {
5685f766 472 resetpriority(p);
cf5ef508
KB
473 if (p->p_priority >= PUSER)
474 p->p_priority = p->p_usrpri;
b44234ac
CT
475 }
476 }
477}
478
014adec1
KM
479/*
480 * Return information about system clocks.
481 */
4f522b46 482sysctl_clockrate(where, sizep)
014adec1 483 register char *where;
c1909da4 484 size_t *sizep;
014adec1 485{
4f522b46 486 struct clockinfo clkinfo;
014adec1 487
014adec1 488 /*
4f522b46 489 * Construct clockinfo structure.
014adec1 490 */
4f522b46
KM
491 clkinfo.hz = hz;
492 clkinfo.tick = tick;
493 clkinfo.profhz = profhz;
494 clkinfo.stathz = stathz ? stathz : hz;
495 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
014adec1 496}