Commit | Line | Data |
---|---|---|
f406ae69 | 1 | /*- |
1acdbcea KB |
2 | * Copyright (c) 1982, 1986, 1991, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
adb35f79 KB |
4 | * (c) UNIX System Laboratories, Inc. |
5 | * All or some portions of this file are derived from material licensed | |
6 | * to the University of California by American Telephone and Telegraph | |
7 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
8 | * the permission of UNIX System Laboratories, Inc. | |
f406ae69 KB |
9 | * |
10 | * %sccs.include.redist.c% | |
da7c5cc6 | 11 | * |
adb35f79 | 12 | * @(#)kern_clock.c 8.5 (Berkeley) %G% |
da7c5cc6 | 13 | */ |
961945a8 | 14 | |
38a01dbe KB |
15 | #include <sys/param.h> |
16 | #include <sys/systm.h> | |
17 | #include <sys/dkstat.h> | |
18 | #include <sys/callout.h> | |
19 | #include <sys/kernel.h> | |
20 | #include <sys/proc.h> | |
21 | #include <sys/resourcevar.h> | |
83be5fac | 22 | |
38a01dbe | 23 | #include <machine/cpu.h> |
961945a8 | 24 | |
8487304f | 25 | #ifdef GPROF |
38a01dbe | 26 | #include <sys/gmon.h> |
8487304f KM |
27 | #endif |
28 | ||
45e9acec MK |
29 | #define ADJTIME /* For now... */ |
30 | #define ADJ_TICK 1000 | |
31 | int adjtimedelta; | |
32 | ||
76b2a182 BJ |
33 | /* |
34 | * Clock handling routines. | |
35 | * | |
b44234ac CT |
36 | * This code is written to operate with two timers that run independently of |
37 | * each other. The main clock, running hz times per second, is used to keep | |
38 | * track of real time. The second timer handles kernel and user profiling, | |
39 | * and does resource use estimation. If the second timer is programmable, | |
40 | * it is randomized to avoid aliasing between the two clocks. For example, | |
41 | * the randomization prevents an adversary from always giving up the cpu | |
42 | * just before its quantum expires. Otherwise, it would never accumulate | |
43 | * cpu ticks. The mean frequency of the second timer is stathz. | |
44 | * | |
45 | * If no second timer exists, stathz will be zero; in this case we drive | |
46 | * profiling and statistics off the main clock. This WILL NOT be accurate; | |
47 | * do not do it unless absolutely necessary. | |
48 | * | |
49 | * The statistics clock may (or may not) be run at a higher rate while | |
50 | * profiling. This profile clock runs at profhz. We require that profhz | |
51 | * be an integral multiple of stathz. | |
52 | * | |
53 | * If the statistics clock is running fast, it must be divided by the ratio | |
54 | * profhz/stathz for statistics. (For profiling, every tick counts.) | |
76b2a182 | 55 | */ |
6602c75b | 56 | |
76b2a182 BJ |
57 | /* |
58 | * TODO: | |
88a7a62a | 59 | * allocate more timeout table slots when table overflows. |
76b2a182 | 60 | */ |
9c5cfb8b | 61 | |
ad8023d1 KM |
62 | /* |
63 | * Bump a timeval by a small number of usec's. | |
64 | */ | |
ad8023d1 | 65 | #define BUMPTIME(t, usec) { \ |
b44234ac CT |
66 | register volatile struct timeval *tp = (t); \ |
67 | register long us; \ | |
ad8023d1 | 68 | \ |
b44234ac CT |
69 | tp->tv_usec = us = tp->tv_usec + (usec); \ |
70 | if (us >= 1000000) { \ | |
71 | tp->tv_usec = us - 1000000; \ | |
ad8023d1 KM |
72 | tp->tv_sec++; \ |
73 | } \ | |
74 | } | |
83be5fac | 75 | |
272cb936 | 76 | int stathz; |
cbd9e613 | 77 | int profhz; |
37d40d06 | 78 | int profprocs; |
5fe6e3b1 | 79 | int ticks; |
3916e002 | 80 | static int psdiv, pscnt; /* prof => stat divider */ |
be389c2a | 81 | int psratio; /* ratio: prof / stat */ |
b44234ac CT |
82 | |
83 | volatile struct timeval time; | |
84 | volatile struct timeval mono_time; | |
85 | ||
76b2a182 | 86 | /* |
b44234ac | 87 | * Initialize clock frequencies and start both clocks running. |
76b2a182 | 88 | */ |
b44234ac CT |
89 | void |
90 | initclocks() | |
91 | { | |
92 | register int i; | |
93 | ||
94 | /* | |
95 | * Set divisors to 1 (normal case) and let the machine-specific | |
96 | * code do its bit. | |
97 | */ | |
98 | psdiv = pscnt = 1; | |
99 | cpu_initclocks(); | |
100 | ||
101 | /* | |
102 | * Compute profhz/stathz, and fix profhz if needed. | |
103 | */ | |
104 | i = stathz ? stathz : hz; | |
105 | if (profhz == 0) | |
106 | profhz = i; | |
107 | psratio = profhz / i; | |
108 | } | |
109 | ||
110 | /* | |
111 | * The real-time timer, interrupting hz times per second. | |
112 | */ | |
113 | void | |
d293217c | 114 | hardclock(frame) |
b44234ac | 115 | register struct clockframe *frame; |
83be5fac | 116 | { |
0a34b6fd | 117 | register struct callout *p1; |
83be5fac | 118 | |
76b2a182 BJ |
119 | /* |
120 | * Update real-time timeout queue. | |
121 | * At front of queue are some number of events which are ``due''. | |
122 | * The time to these is <= 0 and if negative represents the | |
123 | * number of ticks which have passed since it was supposed to happen. | |
124 | * The rest of the q elements (times > 0) are events yet to happen, | |
125 | * where the time for each is given as a delta from the previous. | |
126 | * Decrementing just the first of these serves to decrement the time | |
127 | * to all events. | |
128 | */ | |
b44234ac CT |
129 | needsoft = 0; |
130 | for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { | |
88a7a62a SL |
131 | if (--p1->c_time > 0) |
132 | break; | |
88a7a62a SL |
133 | if (p1->c_time == 0) |
134 | break; | |
88a7a62a | 135 | } |
5da67d35 | 136 | |
76b2a182 | 137 | /* |
b44234ac | 138 | * Run current process's virtual and profile time, as needed. |
76b2a182 | 139 | */ |
b44234ac CT |
140 | pstats = p->p_stats; |
141 | if (CLKF_USERMODE(frame) && | |
142 | timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && | |
0157085f | 143 | itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) |
53fbb3b3 | 144 | psignal(p, SIGVTALRM); |
0157085f MK |
145 | if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && |
146 | itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) | |
53fbb3b3 | 147 | psignal(p, SIGPROF); |
83be5fac | 148 | } |
76b2a182 | 149 | |
53a32545 | 150 | /* |
b44234ac | 151 | * If no separate statistics clock is available, run it from here. |
53a32545 | 152 | */ |
272cb936 | 153 | if (stathz == 0) |
b44234ac | 154 | statclock(frame); |
53a32545 | 155 | |
76b2a182 | 156 | /* |
cfd66a29 CT |
157 | * Increment the time-of-day. The increment is just ``tick'' unless |
158 | * we are still adjusting the clock; see adjtime(). | |
76b2a182 | 159 | */ |
5fe6e3b1 | 160 | ticks++; |
45e9acec MK |
161 | #ifdef ADJTIME |
162 | if (adjtimedelta == 0) | |
163 | bumptime(&time, tick); | |
164 | else { | |
165 | if (adjtimedelta < 0) { | |
166 | bumptime(&time, tick-ADJ_TICK); | |
167 | adjtimedelta++; | |
168 | } else { | |
169 | bumptime(&time, tick+ADJ_TICK); | |
170 | adjtimedelta--; | |
171 | } | |
172 | } | |
173 | #else | |
cfd66a29 CT |
174 | if (timedelta == 0) |
175 | delta = tick; | |
176 | else { | |
177 | delta = tick + tickdelta; | |
178 | timedelta -= tickdelta; | |
99e47f6b | 179 | } |
cfd66a29 CT |
180 | BUMPTIME(&time, delta); |
181 | BUMPTIME(&mono_time, delta); | |
53a32545 SL |
182 | |
183 | /* | |
b44234ac CT |
184 | * Process callouts at a very low cpu priority, so we don't keep the |
185 | * relatively high clock interrupt priority any longer than necessary. | |
53a32545 | 186 | */ |
53a32545 | 187 | #endif |
b44234ac | 188 | setsoftclock(); |
53a32545 SL |
189 | } |
190 | ||
76b2a182 | 191 | /* |
b44234ac | 192 | * Software (low priority) clock interrupt. |
76b2a182 BJ |
193 | * Run periodic events from timeout queue. |
194 | */ | |
260ea681 | 195 | /*ARGSUSED*/ |
b44234ac CT |
196 | void |
197 | softclock() | |
f403d99f | 198 | { |
b44234ac CT |
199 | register struct callout *c; |
200 | register void *arg; | |
201 | register void (*func) __P((void *)); | |
202 | register int s; | |
f403d99f | 203 | |
b44234ac CT |
204 | s = splhigh(); |
205 | while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { | |
206 | func = c->c_func; | |
207 | arg = c->c_arg; | |
208 | calltodo.c_next = c->c_next; | |
209 | c->c_next = callfree; | |
210 | callfree = c; | |
4f083fd7 | 211 | splx(s); |
b44234ac CT |
212 | (*func)(arg); |
213 | (void) splhigh(); | |
877ef342 | 214 | } |
b44234ac | 215 | splx(s); |
37d40d06 KM |
216 | } |
217 | ||
88a7a62a | 218 | /* |
5685f766 KB |
219 | * timeout -- |
220 | * Execute a function after a specified length of time. | |
221 | * | |
222 | * untimeout -- | |
223 | * Cancel previous timeout function call. | |
224 | * | |
225 | * See AT&T BCI Driver Reference Manual for specification. This | |
226 | * implementation differs from that one in that no identification | |
227 | * value is returned from timeout, rather, the original arguments | |
228 | * to timeout are used to identify entries for untimeout. | |
83be5fac | 229 | */ |
b44234ac | 230 | void |
5685f766 KB |
231 | timeout(ftn, arg, ticks) |
232 | void (*ftn) __P((void *)); | |
b44234ac | 233 | void *arg; |
5685f766 | 234 | register int ticks; |
83be5fac | 235 | { |
5685f766 | 236 | register struct callout *new, *p, *t; |
b44234ac | 237 | register int s; |
83be5fac | 238 | |
5685f766 KB |
239 | if (ticks <= 0) |
240 | ticks = 1; | |
241 | ||
242 | /* Lock out the clock. */ | |
b44234ac | 243 | s = splhigh(); |
5685f766 KB |
244 | |
245 | /* Fill in the next free callout structure. */ | |
246 | if (callfree == NULL) | |
247 | panic("timeout table full"); | |
248 | new = callfree; | |
249 | callfree = new->c_next; | |
250 | new->c_arg = arg; | |
251 | new->c_func = ftn; | |
252 | ||
253 | /* | |
254 | * The time for each event is stored as a difference from the time | |
255 | * of the previous event on the queue. Walk the queue, correcting | |
256 | * the ticks argument for queue entries passed. Correct the ticks | |
257 | * value for the queue entry immediately after the insertion point | |
54c401b6 CT |
258 | * as well. Watch out for negative c_time values; these represent |
259 | * overdue events. | |
5685f766 KB |
260 | */ |
261 | for (p = &calltodo; | |
262 | (t = p->c_next) != NULL && ticks > t->c_time; p = t) | |
54c401b6 CT |
263 | if (t->c_time > 0) |
264 | ticks -= t->c_time; | |
5685f766 KB |
265 | new->c_time = ticks; |
266 | if (t != NULL) | |
267 | t->c_time -= ticks; | |
268 | ||
269 | /* Insert the new entry into the queue. */ | |
270 | p->c_next = new; | |
271 | new->c_next = t; | |
83be5fac BJ |
272 | splx(s); |
273 | } | |
1fa9ff62 | 274 | |
b44234ac | 275 | void |
5685f766 KB |
276 | untimeout(ftn, arg) |
277 | void (*ftn) __P((void *)); | |
b44234ac | 278 | void *arg; |
1fa9ff62 | 279 | { |
5685f766 | 280 | register struct callout *p, *t; |
1fa9ff62 SL |
281 | register int s; |
282 | ||
9c5cfb8b | 283 | s = splhigh(); |
5685f766 KB |
284 | for (p = &calltodo; (t = p->c_next) != NULL; p = t) |
285 | if (t->c_func == ftn && t->c_arg == arg) { | |
286 | /* Increment next entry's tick count. */ | |
287 | if (t->c_next && t->c_time > 0) | |
288 | t->c_next->c_time += t->c_time; | |
289 | ||
290 | /* Move entry from callout queue to callfree queue. */ | |
291 | p->c_next = t->c_next; | |
292 | t->c_next = callfree; | |
293 | callfree = t; | |
1fa9ff62 SL |
294 | break; |
295 | } | |
1fa9ff62 SL |
296 | splx(s); |
297 | } | |
d01b68d6 | 298 | |
76b2a182 | 299 | /* |
5685f766 KB |
300 | * Compute number of hz until specified time. Used to |
301 | * compute third argument to timeout() from an absolute time. | |
76b2a182 | 302 | */ |
b44234ac | 303 | int |
d01b68d6 BJ |
304 | hzto(tv) |
305 | struct timeval *tv; | |
306 | { | |
b44234ac CT |
307 | register long ticks, sec; |
308 | int s; | |
d01b68d6 | 309 | |
76b2a182 BJ |
310 | /* |
311 | * If number of milliseconds will fit in 32 bit arithmetic, | |
312 | * then compute number of milliseconds to time and scale to | |
313 | * ticks. Otherwise just compute number of hz in time, rounding | |
314 | * times greater than representible to maximum value. | |
315 | * | |
316 | * Delta times less than 25 days can be computed ``exactly''. | |
317 | * Maximum value for any timeout in 10ms ticks is 250 days. | |
318 | */ | |
b44234ac | 319 | s = splhigh(); |
76b2a182 BJ |
320 | sec = tv->tv_sec - time.tv_sec; |
321 | if (sec <= 0x7fffffff / 1000 - 1000) | |
322 | ticks = ((tv->tv_sec - time.tv_sec) * 1000 + | |
323 | (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); | |
324 | else if (sec <= 0x7fffffff / hz) | |
325 | ticks = sec * hz; | |
326 | else | |
327 | ticks = 0x7fffffff; | |
d01b68d6 BJ |
328 | splx(s); |
329 | return (ticks); | |
330 | } | |
014adec1 | 331 | |
b44234ac CT |
332 | /* |
333 | * Start profiling on a process. | |
334 | * | |
335 | * Kernel profiling passes proc0 which never exits and hence | |
336 | * keeps the profile clock running constantly. | |
337 | */ | |
338 | void | |
339 | startprofclock(p) | |
340 | register struct proc *p; | |
341 | { | |
342 | int s; | |
343 | ||
cf5ef508 KB |
344 | if ((p->p_flag & P_PROFIL) == 0) { |
345 | p->p_flag |= P_PROFIL; | |
b44234ac CT |
346 | if (++profprocs == 1 && stathz != 0) { |
347 | s = splstatclock(); | |
348 | psdiv = pscnt = psratio; | |
349 | setstatclockrate(profhz); | |
350 | splx(s); | |
351 | } | |
352 | } | |
353 | } | |
354 | ||
355 | /* | |
356 | * Stop profiling on a process. | |
357 | */ | |
358 | void | |
359 | stopprofclock(p) | |
360 | register struct proc *p; | |
361 | { | |
362 | int s; | |
363 | ||
cf5ef508 KB |
364 | if (p->p_flag & P_PROFIL) { |
365 | p->p_flag &= ~P_PROFIL; | |
b44234ac CT |
366 | if (--profprocs == 0 && stathz != 0) { |
367 | s = splstatclock(); | |
368 | psdiv = pscnt = 1; | |
369 | setstatclockrate(stathz); | |
370 | splx(s); | |
371 | } | |
372 | } | |
373 | } | |
374 | ||
375 | int dk_ndrive = DK_NDRIVE; | |
376 | ||
377 | /* | |
378 | * Statistics clock. Grab profile sample, and if divider reaches 0, | |
379 | * do process and kernel statistics. | |
380 | */ | |
381 | void | |
382 | statclock(frame) | |
383 | register struct clockframe *frame; | |
384 | { | |
385 | #ifdef GPROF | |
386 | register struct gmonparam *g; | |
387 | #endif | |
388 | register struct proc *p; | |
389 | register int i; | |
390 | ||
391 | if (CLKF_USERMODE(frame)) { | |
392 | p = curproc; | |
cf5ef508 | 393 | if (p->p_flag & P_PROFIL) |
b44234ac CT |
394 | addupc_intr(p, CLKF_PC(frame), 1); |
395 | if (--pscnt > 0) | |
396 | return; | |
397 | /* | |
398 | * Came from user mode; CPU was in user state. | |
399 | * If this process is being profiled record the tick. | |
400 | */ | |
401 | p->p_uticks++; | |
402 | if (p->p_nice > NZERO) | |
403 | cp_time[CP_NICE]++; | |
404 | else | |
405 | cp_time[CP_USER]++; | |
406 | } else { | |
407 | #ifdef GPROF | |
408 | /* | |
409 | * Kernel statistics are just like addupc_intr, only easier. | |
410 | */ | |
411 | g = &_gmonparam; | |
412 | if (g->state == GMON_PROF_ON) { | |
413 | i = CLKF_PC(frame) - g->lowpc; | |
f929d58c KM |
414 | if (i < g->textsize) { |
415 | i /= HISTFRACTION * sizeof(*g->kcount); | |
416 | g->kcount[i]++; | |
417 | } | |
b44234ac CT |
418 | } |
419 | #endif | |
420 | if (--pscnt > 0) | |
421 | return; | |
422 | /* | |
423 | * Came from kernel mode, so we were: | |
424 | * - handling an interrupt, | |
425 | * - doing syscall or trap work on behalf of the current | |
426 | * user process, or | |
427 | * - spinning in the idle loop. | |
428 | * Whichever it is, charge the time as appropriate. | |
429 | * Note that we charge interrupts to the current process, | |
430 | * regardless of whether they are ``for'' that process, | |
431 | * so that we know how much of its real time was spent | |
432 | * in ``non-process'' (i.e., interrupt) work. | |
433 | */ | |
434 | p = curproc; | |
435 | if (CLKF_INTR(frame)) { | |
436 | if (p != NULL) | |
437 | p->p_iticks++; | |
438 | cp_time[CP_INTR]++; | |
439 | } else if (p != NULL) { | |
440 | p->p_sticks++; | |
441 | cp_time[CP_SYS]++; | |
442 | } else | |
443 | cp_time[CP_IDLE]++; | |
444 | } | |
445 | pscnt = psdiv; | |
446 | ||
447 | /* | |
448 | * We maintain statistics shown by user-level statistics | |
449 | * programs: the amount of time in each cpu state, and | |
450 | * the amount of time each of DK_NDRIVE ``drives'' is busy. | |
451 | * | |
452 | * XXX should either run linked list of drives, or (better) | |
453 | * grab timestamps in the start & done code. | |
454 | */ | |
455 | for (i = 0; i < DK_NDRIVE; i++) | |
456 | if (dk_busy & (1 << i)) | |
457 | dk_time[i]++; | |
458 | ||
459 | /* | |
cf5ef508 KB |
460 | * We adjust the priority of the current process. The priority of |
461 | * a process gets worse as it accumulates CPU time. The cpu usage | |
462 | * estimator (p_estcpu) is increased here. The formula for computing | |
463 | * priorities (in kern_synch.c) will compute a different value each | |
464 | * time p_estcpu increases by 4. The cpu usage estimator ramps up | |
465 | * quite quickly when the process is running (linearly), and decays | |
466 | * away exponentially, at a rate which is proportionally slower when | |
467 | * the system is busy. The basic principal is that the system will | |
468 | * 90% forget that the process used a lot of CPU time in 5 * loadav | |
469 | * seconds. This causes the system to favor processes which haven't | |
470 | * run much recently, and to round-robin among other processes. | |
b44234ac CT |
471 | */ |
472 | if (p != NULL) { | |
473 | p->p_cpticks++; | |
cf5ef508 KB |
474 | if (++p->p_estcpu == 0) |
475 | p->p_estcpu--; | |
476 | if ((p->p_estcpu & 3) == 0) { | |
5685f766 | 477 | resetpriority(p); |
cf5ef508 KB |
478 | if (p->p_priority >= PUSER) |
479 | p->p_priority = p->p_usrpri; | |
b44234ac CT |
480 | } |
481 | } | |
482 | } | |
483 | ||
014adec1 KM |
484 | /* |
485 | * Return information about system clocks. | |
486 | */ | |
4f522b46 | 487 | sysctl_clockrate(where, sizep) |
014adec1 | 488 | register char *where; |
c1909da4 | 489 | size_t *sizep; |
014adec1 | 490 | { |
4f522b46 | 491 | struct clockinfo clkinfo; |
014adec1 | 492 | |
014adec1 | 493 | /* |
4f522b46 | 494 | * Construct clockinfo structure. |
014adec1 | 495 | */ |
4f522b46 KM |
496 | clkinfo.hz = hz; |
497 | clkinfo.tick = tick; | |
498 | clkinfo.profhz = profhz; | |
499 | clkinfo.stathz = stathz ? stathz : hz; | |
500 | return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); | |
014adec1 | 501 | } |