Commit | Line | Data |
---|---|---|
f406ae69 | 1 | /*- |
1acdbcea KB |
2 | * Copyright (c) 1982, 1986, 1991, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
f406ae69 KB |
4 | * |
5 | * %sccs.include.redist.c% | |
da7c5cc6 | 6 | * |
54c401b6 | 7 | * @(#)kern_clock.c 8.4 (Berkeley) %G% |
da7c5cc6 | 8 | */ |
961945a8 | 9 | |
38a01dbe KB |
10 | #include <sys/param.h> |
11 | #include <sys/systm.h> | |
12 | #include <sys/dkstat.h> | |
13 | #include <sys/callout.h> | |
14 | #include <sys/kernel.h> | |
15 | #include <sys/proc.h> | |
16 | #include <sys/resourcevar.h> | |
83be5fac | 17 | |
38a01dbe | 18 | #include <machine/cpu.h> |
961945a8 | 19 | |
8487304f | 20 | #ifdef GPROF |
38a01dbe | 21 | #include <sys/gmon.h> |
8487304f KM |
22 | #endif |
23 | ||
45e9acec MK |
24 | #define ADJTIME /* For now... */ |
25 | #define ADJ_TICK 1000 | |
26 | int adjtimedelta; | |
27 | ||
76b2a182 BJ |
28 | /* |
29 | * Clock handling routines. | |
30 | * | |
b44234ac CT |
31 | * This code is written to operate with two timers that run independently of |
32 | * each other. The main clock, running hz times per second, is used to keep | |
33 | * track of real time. The second timer handles kernel and user profiling, | |
34 | * and does resource use estimation. If the second timer is programmable, | |
35 | * it is randomized to avoid aliasing between the two clocks. For example, | |
36 | * the randomization prevents an adversary from always giving up the cpu | |
37 | * just before its quantum expires. Otherwise, it would never accumulate | |
38 | * cpu ticks. The mean frequency of the second timer is stathz. | |
39 | * | |
40 | * If no second timer exists, stathz will be zero; in this case we drive | |
41 | * profiling and statistics off the main clock. This WILL NOT be accurate; | |
42 | * do not do it unless absolutely necessary. | |
43 | * | |
44 | * The statistics clock may (or may not) be run at a higher rate while | |
45 | * profiling. This profile clock runs at profhz. We require that profhz | |
46 | * be an integral multiple of stathz. | |
47 | * | |
48 | * If the statistics clock is running fast, it must be divided by the ratio | |
49 | * profhz/stathz for statistics. (For profiling, every tick counts.) | |
76b2a182 | 50 | */ |
6602c75b | 51 | |
76b2a182 BJ |
52 | /* |
53 | * TODO: | |
88a7a62a | 54 | * allocate more timeout table slots when table overflows. |
76b2a182 | 55 | */ |
9c5cfb8b | 56 | |
ad8023d1 KM |
57 | /* |
58 | * Bump a timeval by a small number of usec's. | |
59 | */ | |
ad8023d1 | 60 | #define BUMPTIME(t, usec) { \ |
b44234ac CT |
61 | register volatile struct timeval *tp = (t); \ |
62 | register long us; \ | |
ad8023d1 | 63 | \ |
b44234ac CT |
64 | tp->tv_usec = us = tp->tv_usec + (usec); \ |
65 | if (us >= 1000000) { \ | |
66 | tp->tv_usec = us - 1000000; \ | |
ad8023d1 KM |
67 | tp->tv_sec++; \ |
68 | } \ | |
69 | } | |
83be5fac | 70 | |
272cb936 | 71 | int stathz; |
cbd9e613 | 72 | int profhz; |
37d40d06 | 73 | int profprocs; |
5fe6e3b1 | 74 | int ticks; |
3916e002 | 75 | static int psdiv, pscnt; /* prof => stat divider */ |
be389c2a | 76 | int psratio; /* ratio: prof / stat */ |
b44234ac CT |
77 | |
78 | volatile struct timeval time; | |
79 | volatile struct timeval mono_time; | |
80 | ||
76b2a182 | 81 | /* |
b44234ac | 82 | * Initialize clock frequencies and start both clocks running. |
76b2a182 | 83 | */ |
b44234ac CT |
84 | void |
85 | initclocks() | |
86 | { | |
87 | register int i; | |
88 | ||
89 | /* | |
90 | * Set divisors to 1 (normal case) and let the machine-specific | |
91 | * code do its bit. | |
92 | */ | |
93 | psdiv = pscnt = 1; | |
94 | cpu_initclocks(); | |
95 | ||
96 | /* | |
97 | * Compute profhz/stathz, and fix profhz if needed. | |
98 | */ | |
99 | i = stathz ? stathz : hz; | |
100 | if (profhz == 0) | |
101 | profhz = i; | |
102 | psratio = profhz / i; | |
103 | } | |
104 | ||
105 | /* | |
106 | * The real-time timer, interrupting hz times per second. | |
107 | */ | |
108 | void | |
d293217c | 109 | hardclock(frame) |
b44234ac | 110 | register struct clockframe *frame; |
83be5fac | 111 | { |
0a34b6fd | 112 | register struct callout *p1; |
83be5fac | 113 | |
76b2a182 BJ |
114 | /* |
115 | * Update real-time timeout queue. | |
116 | * At front of queue are some number of events which are ``due''. | |
117 | * The time to these is <= 0 and if negative represents the | |
118 | * number of ticks which have passed since it was supposed to happen. | |
119 | * The rest of the q elements (times > 0) are events yet to happen, | |
120 | * where the time for each is given as a delta from the previous. | |
121 | * Decrementing just the first of these serves to decrement the time | |
122 | * to all events. | |
123 | */ | |
b44234ac CT |
124 | needsoft = 0; |
125 | for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { | |
88a7a62a SL |
126 | if (--p1->c_time > 0) |
127 | break; | |
88a7a62a SL |
128 | if (p1->c_time == 0) |
129 | break; | |
88a7a62a | 130 | } |
5da67d35 | 131 | |
76b2a182 | 132 | /* |
b44234ac | 133 | * Run current process's virtual and profile time, as needed. |
76b2a182 | 134 | */ |
b44234ac CT |
135 | pstats = p->p_stats; |
136 | if (CLKF_USERMODE(frame) && | |
137 | timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && | |
0157085f | 138 | itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) |
53fbb3b3 | 139 | psignal(p, SIGVTALRM); |
0157085f MK |
140 | if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && |
141 | itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) | |
53fbb3b3 | 142 | psignal(p, SIGPROF); |
83be5fac | 143 | } |
76b2a182 | 144 | |
53a32545 | 145 | /* |
b44234ac | 146 | * If no separate statistics clock is available, run it from here. |
53a32545 | 147 | */ |
272cb936 | 148 | if (stathz == 0) |
b44234ac | 149 | statclock(frame); |
53a32545 | 150 | |
76b2a182 | 151 | /* |
cfd66a29 CT |
152 | * Increment the time-of-day. The increment is just ``tick'' unless |
153 | * we are still adjusting the clock; see adjtime(). | |
76b2a182 | 154 | */ |
5fe6e3b1 | 155 | ticks++; |
45e9acec MK |
156 | #ifdef ADJTIME |
157 | if (adjtimedelta == 0) | |
158 | bumptime(&time, tick); | |
159 | else { | |
160 | if (adjtimedelta < 0) { | |
161 | bumptime(&time, tick-ADJ_TICK); | |
162 | adjtimedelta++; | |
163 | } else { | |
164 | bumptime(&time, tick+ADJ_TICK); | |
165 | adjtimedelta--; | |
166 | } | |
167 | } | |
168 | #else | |
cfd66a29 CT |
169 | if (timedelta == 0) |
170 | delta = tick; | |
171 | else { | |
172 | delta = tick + tickdelta; | |
173 | timedelta -= tickdelta; | |
99e47f6b | 174 | } |
cfd66a29 CT |
175 | BUMPTIME(&time, delta); |
176 | BUMPTIME(&mono_time, delta); | |
53a32545 SL |
177 | |
178 | /* | |
b44234ac CT |
179 | * Process callouts at a very low cpu priority, so we don't keep the |
180 | * relatively high clock interrupt priority any longer than necessary. | |
53a32545 | 181 | */ |
53a32545 | 182 | #endif |
b44234ac | 183 | setsoftclock(); |
53a32545 SL |
184 | } |
185 | ||
76b2a182 | 186 | /* |
b44234ac | 187 | * Software (low priority) clock interrupt. |
76b2a182 BJ |
188 | * Run periodic events from timeout queue. |
189 | */ | |
260ea681 | 190 | /*ARGSUSED*/ |
b44234ac CT |
191 | void |
192 | softclock() | |
f403d99f | 193 | { |
b44234ac CT |
194 | register struct callout *c; |
195 | register void *arg; | |
196 | register void (*func) __P((void *)); | |
197 | register int s; | |
f403d99f | 198 | |
b44234ac CT |
199 | s = splhigh(); |
200 | while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { | |
201 | func = c->c_func; | |
202 | arg = c->c_arg; | |
203 | calltodo.c_next = c->c_next; | |
204 | c->c_next = callfree; | |
205 | callfree = c; | |
4f083fd7 | 206 | splx(s); |
b44234ac CT |
207 | (*func)(arg); |
208 | (void) splhigh(); | |
877ef342 | 209 | } |
b44234ac | 210 | splx(s); |
37d40d06 KM |
211 | } |
212 | ||
88a7a62a | 213 | /* |
5685f766 KB |
214 | * timeout -- |
215 | * Execute a function after a specified length of time. | |
216 | * | |
217 | * untimeout -- | |
218 | * Cancel previous timeout function call. | |
219 | * | |
220 | * See AT&T BCI Driver Reference Manual for specification. This | |
221 | * implementation differs from that one in that no identification | |
222 | * value is returned from timeout, rather, the original arguments | |
223 | * to timeout are used to identify entries for untimeout. | |
83be5fac | 224 | */ |
b44234ac | 225 | void |
5685f766 KB |
226 | timeout(ftn, arg, ticks) |
227 | void (*ftn) __P((void *)); | |
b44234ac | 228 | void *arg; |
5685f766 | 229 | register int ticks; |
83be5fac | 230 | { |
5685f766 | 231 | register struct callout *new, *p, *t; |
b44234ac | 232 | register int s; |
83be5fac | 233 | |
5685f766 KB |
234 | if (ticks <= 0) |
235 | ticks = 1; | |
236 | ||
237 | /* Lock out the clock. */ | |
b44234ac | 238 | s = splhigh(); |
5685f766 KB |
239 | |
240 | /* Fill in the next free callout structure. */ | |
241 | if (callfree == NULL) | |
242 | panic("timeout table full"); | |
243 | new = callfree; | |
244 | callfree = new->c_next; | |
245 | new->c_arg = arg; | |
246 | new->c_func = ftn; | |
247 | ||
248 | /* | |
249 | * The time for each event is stored as a difference from the time | |
250 | * of the previous event on the queue. Walk the queue, correcting | |
251 | * the ticks argument for queue entries passed. Correct the ticks | |
252 | * value for the queue entry immediately after the insertion point | |
54c401b6 CT |
253 | * as well. Watch out for negative c_time values; these represent |
254 | * overdue events. | |
5685f766 KB |
255 | */ |
256 | for (p = &calltodo; | |
257 | (t = p->c_next) != NULL && ticks > t->c_time; p = t) | |
54c401b6 CT |
258 | if (t->c_time > 0) |
259 | ticks -= t->c_time; | |
5685f766 KB |
260 | new->c_time = ticks; |
261 | if (t != NULL) | |
262 | t->c_time -= ticks; | |
263 | ||
264 | /* Insert the new entry into the queue. */ | |
265 | p->c_next = new; | |
266 | new->c_next = t; | |
83be5fac BJ |
267 | splx(s); |
268 | } | |
1fa9ff62 | 269 | |
b44234ac | 270 | void |
5685f766 KB |
271 | untimeout(ftn, arg) |
272 | void (*ftn) __P((void *)); | |
b44234ac | 273 | void *arg; |
1fa9ff62 | 274 | { |
5685f766 | 275 | register struct callout *p, *t; |
1fa9ff62 SL |
276 | register int s; |
277 | ||
9c5cfb8b | 278 | s = splhigh(); |
5685f766 KB |
279 | for (p = &calltodo; (t = p->c_next) != NULL; p = t) |
280 | if (t->c_func == ftn && t->c_arg == arg) { | |
281 | /* Increment next entry's tick count. */ | |
282 | if (t->c_next && t->c_time > 0) | |
283 | t->c_next->c_time += t->c_time; | |
284 | ||
285 | /* Move entry from callout queue to callfree queue. */ | |
286 | p->c_next = t->c_next; | |
287 | t->c_next = callfree; | |
288 | callfree = t; | |
1fa9ff62 SL |
289 | break; |
290 | } | |
1fa9ff62 SL |
291 | splx(s); |
292 | } | |
d01b68d6 | 293 | |
76b2a182 | 294 | /* |
5685f766 KB |
295 | * Compute number of hz until specified time. Used to |
296 | * compute third argument to timeout() from an absolute time. | |
76b2a182 | 297 | */ |
b44234ac | 298 | int |
d01b68d6 BJ |
299 | hzto(tv) |
300 | struct timeval *tv; | |
301 | { | |
b44234ac CT |
302 | register long ticks, sec; |
303 | int s; | |
d01b68d6 | 304 | |
76b2a182 BJ |
305 | /* |
306 | * If number of milliseconds will fit in 32 bit arithmetic, | |
307 | * then compute number of milliseconds to time and scale to | |
308 | * ticks. Otherwise just compute number of hz in time, rounding | |
309 | * times greater than representible to maximum value. | |
310 | * | |
311 | * Delta times less than 25 days can be computed ``exactly''. | |
312 | * Maximum value for any timeout in 10ms ticks is 250 days. | |
313 | */ | |
b44234ac | 314 | s = splhigh(); |
76b2a182 BJ |
315 | sec = tv->tv_sec - time.tv_sec; |
316 | if (sec <= 0x7fffffff / 1000 - 1000) | |
317 | ticks = ((tv->tv_sec - time.tv_sec) * 1000 + | |
318 | (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); | |
319 | else if (sec <= 0x7fffffff / hz) | |
320 | ticks = sec * hz; | |
321 | else | |
322 | ticks = 0x7fffffff; | |
d01b68d6 BJ |
323 | splx(s); |
324 | return (ticks); | |
325 | } | |
014adec1 | 326 | |
b44234ac CT |
327 | /* |
328 | * Start profiling on a process. | |
329 | * | |
330 | * Kernel profiling passes proc0 which never exits and hence | |
331 | * keeps the profile clock running constantly. | |
332 | */ | |
333 | void | |
334 | startprofclock(p) | |
335 | register struct proc *p; | |
336 | { | |
337 | int s; | |
338 | ||
cf5ef508 KB |
339 | if ((p->p_flag & P_PROFIL) == 0) { |
340 | p->p_flag |= P_PROFIL; | |
b44234ac CT |
341 | if (++profprocs == 1 && stathz != 0) { |
342 | s = splstatclock(); | |
343 | psdiv = pscnt = psratio; | |
344 | setstatclockrate(profhz); | |
345 | splx(s); | |
346 | } | |
347 | } | |
348 | } | |
349 | ||
350 | /* | |
351 | * Stop profiling on a process. | |
352 | */ | |
353 | void | |
354 | stopprofclock(p) | |
355 | register struct proc *p; | |
356 | { | |
357 | int s; | |
358 | ||
cf5ef508 KB |
359 | if (p->p_flag & P_PROFIL) { |
360 | p->p_flag &= ~P_PROFIL; | |
b44234ac CT |
361 | if (--profprocs == 0 && stathz != 0) { |
362 | s = splstatclock(); | |
363 | psdiv = pscnt = 1; | |
364 | setstatclockrate(stathz); | |
365 | splx(s); | |
366 | } | |
367 | } | |
368 | } | |
369 | ||
370 | int dk_ndrive = DK_NDRIVE; | |
371 | ||
372 | /* | |
373 | * Statistics clock. Grab profile sample, and if divider reaches 0, | |
374 | * do process and kernel statistics. | |
375 | */ | |
376 | void | |
377 | statclock(frame) | |
378 | register struct clockframe *frame; | |
379 | { | |
380 | #ifdef GPROF | |
381 | register struct gmonparam *g; | |
382 | #endif | |
383 | register struct proc *p; | |
384 | register int i; | |
385 | ||
386 | if (CLKF_USERMODE(frame)) { | |
387 | p = curproc; | |
cf5ef508 | 388 | if (p->p_flag & P_PROFIL) |
b44234ac CT |
389 | addupc_intr(p, CLKF_PC(frame), 1); |
390 | if (--pscnt > 0) | |
391 | return; | |
392 | /* | |
393 | * Came from user mode; CPU was in user state. | |
394 | * If this process is being profiled record the tick. | |
395 | */ | |
396 | p->p_uticks++; | |
397 | if (p->p_nice > NZERO) | |
398 | cp_time[CP_NICE]++; | |
399 | else | |
400 | cp_time[CP_USER]++; | |
401 | } else { | |
402 | #ifdef GPROF | |
403 | /* | |
404 | * Kernel statistics are just like addupc_intr, only easier. | |
405 | */ | |
406 | g = &_gmonparam; | |
407 | if (g->state == GMON_PROF_ON) { | |
408 | i = CLKF_PC(frame) - g->lowpc; | |
f929d58c KM |
409 | if (i < g->textsize) { |
410 | i /= HISTFRACTION * sizeof(*g->kcount); | |
411 | g->kcount[i]++; | |
412 | } | |
b44234ac CT |
413 | } |
414 | #endif | |
415 | if (--pscnt > 0) | |
416 | return; | |
417 | /* | |
418 | * Came from kernel mode, so we were: | |
419 | * - handling an interrupt, | |
420 | * - doing syscall or trap work on behalf of the current | |
421 | * user process, or | |
422 | * - spinning in the idle loop. | |
423 | * Whichever it is, charge the time as appropriate. | |
424 | * Note that we charge interrupts to the current process, | |
425 | * regardless of whether they are ``for'' that process, | |
426 | * so that we know how much of its real time was spent | |
427 | * in ``non-process'' (i.e., interrupt) work. | |
428 | */ | |
429 | p = curproc; | |
430 | if (CLKF_INTR(frame)) { | |
431 | if (p != NULL) | |
432 | p->p_iticks++; | |
433 | cp_time[CP_INTR]++; | |
434 | } else if (p != NULL) { | |
435 | p->p_sticks++; | |
436 | cp_time[CP_SYS]++; | |
437 | } else | |
438 | cp_time[CP_IDLE]++; | |
439 | } | |
440 | pscnt = psdiv; | |
441 | ||
442 | /* | |
443 | * We maintain statistics shown by user-level statistics | |
444 | * programs: the amount of time in each cpu state, and | |
445 | * the amount of time each of DK_NDRIVE ``drives'' is busy. | |
446 | * | |
447 | * XXX should either run linked list of drives, or (better) | |
448 | * grab timestamps in the start & done code. | |
449 | */ | |
450 | for (i = 0; i < DK_NDRIVE; i++) | |
451 | if (dk_busy & (1 << i)) | |
452 | dk_time[i]++; | |
453 | ||
454 | /* | |
cf5ef508 KB |
455 | * We adjust the priority of the current process. The priority of |
456 | * a process gets worse as it accumulates CPU time. The cpu usage | |
457 | * estimator (p_estcpu) is increased here. The formula for computing | |
458 | * priorities (in kern_synch.c) will compute a different value each | |
459 | * time p_estcpu increases by 4. The cpu usage estimator ramps up | |
460 | * quite quickly when the process is running (linearly), and decays | |
461 | * away exponentially, at a rate which is proportionally slower when | |
462 | * the system is busy. The basic principal is that the system will | |
463 | * 90% forget that the process used a lot of CPU time in 5 * loadav | |
464 | * seconds. This causes the system to favor processes which haven't | |
465 | * run much recently, and to round-robin among other processes. | |
b44234ac CT |
466 | */ |
467 | if (p != NULL) { | |
468 | p->p_cpticks++; | |
cf5ef508 KB |
469 | if (++p->p_estcpu == 0) |
470 | p->p_estcpu--; | |
471 | if ((p->p_estcpu & 3) == 0) { | |
5685f766 | 472 | resetpriority(p); |
cf5ef508 KB |
473 | if (p->p_priority >= PUSER) |
474 | p->p_priority = p->p_usrpri; | |
b44234ac CT |
475 | } |
476 | } | |
477 | } | |
478 | ||
014adec1 KM |
479 | /* |
480 | * Return information about system clocks. | |
481 | */ | |
4f522b46 | 482 | sysctl_clockrate(where, sizep) |
014adec1 | 483 | register char *where; |
c1909da4 | 484 | size_t *sizep; |
014adec1 | 485 | { |
4f522b46 | 486 | struct clockinfo clkinfo; |
014adec1 | 487 | |
014adec1 | 488 | /* |
4f522b46 | 489 | * Construct clockinfo structure. |
014adec1 | 490 | */ |
4f522b46 KM |
491 | clkinfo.hz = hz; |
492 | clkinfo.tick = tick; | |
493 | clkinfo.profhz = profhz; | |
494 | clkinfo.stathz = stathz ? stathz : hz; | |
495 | return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); | |
014adec1 | 496 | } |