Commit | Line | Data |
---|---|---|
f406ae69 | 1 | /*- |
1acdbcea KB |
2 | * Copyright (c) 1982, 1986, 1991, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
f406ae69 KB |
4 | * |
5 | * %sccs.include.redist.c% | |
da7c5cc6 | 6 | * |
1acdbcea | 7 | * @(#)kern_clock.c 8.1 (Berkeley) %G% |
da7c5cc6 | 8 | */ |
961945a8 | 9 | |
38a01dbe KB |
10 | #include <sys/param.h> |
11 | #include <sys/systm.h> | |
12 | #include <sys/dkstat.h> | |
13 | #include <sys/callout.h> | |
14 | #include <sys/kernel.h> | |
15 | #include <sys/proc.h> | |
16 | #include <sys/resourcevar.h> | |
83be5fac | 17 | |
38a01dbe | 18 | #include <machine/cpu.h> |
961945a8 | 19 | |
8487304f | 20 | #ifdef GPROF |
38a01dbe | 21 | #include <sys/gmon.h> |
8487304f KM |
22 | #endif |
23 | ||
45e9acec MK |
24 | #define ADJTIME /* For now... */ |
25 | #define ADJ_TICK 1000 | |
26 | int adjtimedelta; | |
27 | ||
76b2a182 BJ |
28 | /* |
29 | * Clock handling routines. | |
30 | * | |
b44234ac CT |
31 | * This code is written to operate with two timers that run independently of |
32 | * each other. The main clock, running hz times per second, is used to keep | |
33 | * track of real time. The second timer handles kernel and user profiling, | |
34 | * and does resource use estimation. If the second timer is programmable, | |
35 | * it is randomized to avoid aliasing between the two clocks. For example, | |
36 | * the randomization prevents an adversary from always giving up the cpu | |
37 | * just before its quantum expires. Otherwise, it would never accumulate | |
38 | * cpu ticks. The mean frequency of the second timer is stathz. | |
39 | * | |
40 | * If no second timer exists, stathz will be zero; in this case we drive | |
41 | * profiling and statistics off the main clock. This WILL NOT be accurate; | |
42 | * do not do it unless absolutely necessary. | |
43 | * | |
44 | * The statistics clock may (or may not) be run at a higher rate while | |
45 | * profiling. This profile clock runs at profhz. We require that profhz | |
46 | * be an integral multiple of stathz. | |
47 | * | |
48 | * If the statistics clock is running fast, it must be divided by the ratio | |
49 | * profhz/stathz for statistics. (For profiling, every tick counts.) | |
76b2a182 | 50 | */ |
6602c75b | 51 | |
76b2a182 BJ |
52 | /* |
53 | * TODO: | |
88a7a62a | 54 | * allocate more timeout table slots when table overflows. |
76b2a182 | 55 | */ |
9c5cfb8b | 56 | |
ad8023d1 KM |
57 | /* |
58 | * Bump a timeval by a small number of usec's. | |
59 | */ | |
ad8023d1 | 60 | #define BUMPTIME(t, usec) { \ |
b44234ac CT |
61 | register volatile struct timeval *tp = (t); \ |
62 | register long us; \ | |
ad8023d1 | 63 | \ |
b44234ac CT |
64 | tp->tv_usec = us = tp->tv_usec + (usec); \ |
65 | if (us >= 1000000) { \ | |
66 | tp->tv_usec = us - 1000000; \ | |
ad8023d1 KM |
67 | tp->tv_sec++; \ |
68 | } \ | |
69 | } | |
83be5fac | 70 | |
272cb936 | 71 | int stathz; |
cbd9e613 | 72 | int profhz; |
37d40d06 | 73 | int profprocs; |
5fe6e3b1 | 74 | int ticks; |
3916e002 | 75 | static int psdiv, pscnt; /* prof => stat divider */ |
be389c2a | 76 | int psratio; /* ratio: prof / stat */ |
b44234ac CT |
77 | |
78 | volatile struct timeval time; | |
79 | volatile struct timeval mono_time; | |
80 | ||
76b2a182 | 81 | /* |
b44234ac | 82 | * Initialize clock frequencies and start both clocks running. |
76b2a182 | 83 | */ |
b44234ac CT |
84 | void |
85 | initclocks() | |
86 | { | |
87 | register int i; | |
88 | ||
89 | /* | |
90 | * Set divisors to 1 (normal case) and let the machine-specific | |
91 | * code do its bit. | |
92 | */ | |
93 | psdiv = pscnt = 1; | |
94 | cpu_initclocks(); | |
95 | ||
96 | /* | |
97 | * Compute profhz/stathz, and fix profhz if needed. | |
98 | */ | |
99 | i = stathz ? stathz : hz; | |
100 | if (profhz == 0) | |
101 | profhz = i; | |
102 | psratio = profhz / i; | |
103 | } | |
104 | ||
105 | /* | |
106 | * The real-time timer, interrupting hz times per second. | |
107 | */ | |
108 | void | |
d293217c | 109 | hardclock(frame) |
b44234ac | 110 | register struct clockframe *frame; |
83be5fac | 111 | { |
0a34b6fd | 112 | register struct callout *p1; |
83be5fac | 113 | |
76b2a182 BJ |
114 | /* |
115 | * Update real-time timeout queue. | |
116 | * At front of queue are some number of events which are ``due''. | |
117 | * The time to these is <= 0 and if negative represents the | |
118 | * number of ticks which have passed since it was supposed to happen. | |
119 | * The rest of the q elements (times > 0) are events yet to happen, | |
120 | * where the time for each is given as a delta from the previous. | |
121 | * Decrementing just the first of these serves to decrement the time | |
122 | * to all events. | |
123 | */ | |
b44234ac CT |
124 | needsoft = 0; |
125 | for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { | |
88a7a62a SL |
126 | if (--p1->c_time > 0) |
127 | break; | |
88a7a62a SL |
128 | if (p1->c_time == 0) |
129 | break; | |
88a7a62a | 130 | } |
5da67d35 | 131 | |
76b2a182 | 132 | /* |
b44234ac | 133 | * Run current process's virtual and profile time, as needed. |
76b2a182 | 134 | */ |
b44234ac CT |
135 | pstats = p->p_stats; |
136 | if (CLKF_USERMODE(frame) && | |
137 | timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && | |
0157085f | 138 | itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) |
53fbb3b3 | 139 | psignal(p, SIGVTALRM); |
0157085f MK |
140 | if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && |
141 | itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) | |
53fbb3b3 | 142 | psignal(p, SIGPROF); |
83be5fac | 143 | } |
76b2a182 | 144 | |
53a32545 | 145 | /* |
b44234ac | 146 | * If no separate statistics clock is available, run it from here. |
53a32545 | 147 | */ |
272cb936 | 148 | if (stathz == 0) |
b44234ac | 149 | statclock(frame); |
53a32545 | 150 | |
76b2a182 | 151 | /* |
cfd66a29 CT |
152 | * Increment the time-of-day. The increment is just ``tick'' unless |
153 | * we are still adjusting the clock; see adjtime(). | |
76b2a182 | 154 | */ |
5fe6e3b1 | 155 | ticks++; |
45e9acec MK |
156 | #ifdef ADJTIME |
157 | if (adjtimedelta == 0) | |
158 | bumptime(&time, tick); | |
159 | else { | |
160 | if (adjtimedelta < 0) { | |
161 | bumptime(&time, tick-ADJ_TICK); | |
162 | adjtimedelta++; | |
163 | } else { | |
164 | bumptime(&time, tick+ADJ_TICK); | |
165 | adjtimedelta--; | |
166 | } | |
167 | } | |
168 | #else | |
cfd66a29 CT |
169 | if (timedelta == 0) |
170 | delta = tick; | |
171 | else { | |
172 | delta = tick + tickdelta; | |
173 | timedelta -= tickdelta; | |
99e47f6b | 174 | } |
cfd66a29 CT |
175 | BUMPTIME(&time, delta); |
176 | BUMPTIME(&mono_time, delta); | |
53a32545 SL |
177 | |
178 | /* | |
b44234ac CT |
179 | * Process callouts at a very low cpu priority, so we don't keep the |
180 | * relatively high clock interrupt priority any longer than necessary. | |
53a32545 | 181 | */ |
53a32545 | 182 | #endif |
b44234ac | 183 | setsoftclock(); |
53a32545 SL |
184 | } |
185 | ||
76b2a182 | 186 | /* |
b44234ac | 187 | * Software (low priority) clock interrupt. |
76b2a182 BJ |
188 | * Run periodic events from timeout queue. |
189 | */ | |
260ea681 | 190 | /*ARGSUSED*/ |
b44234ac CT |
191 | void |
192 | softclock() | |
f403d99f | 193 | { |
b44234ac CT |
194 | register struct callout *c; |
195 | register void *arg; | |
196 | register void (*func) __P((void *)); | |
197 | register int s; | |
f403d99f | 198 | |
b44234ac CT |
199 | s = splhigh(); |
200 | while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { | |
201 | func = c->c_func; | |
202 | arg = c->c_arg; | |
203 | calltodo.c_next = c->c_next; | |
204 | c->c_next = callfree; | |
205 | callfree = c; | |
4f083fd7 | 206 | splx(s); |
b44234ac CT |
207 | (*func)(arg); |
208 | (void) splhigh(); | |
877ef342 | 209 | } |
b44234ac | 210 | splx(s); |
37d40d06 KM |
211 | } |
212 | ||
88a7a62a | 213 | /* |
0157085f | 214 | * Arrange that (*func)(arg) is called in t/hz seconds. |
83be5fac | 215 | */ |
b44234ac | 216 | void |
0157085f | 217 | timeout(func, arg, t) |
b44234ac CT |
218 | void (*func) __P((void *)); |
219 | void *arg; | |
88a7a62a | 220 | register int t; |
83be5fac | 221 | { |
c4710996 | 222 | register struct callout *p1, *p2, *pnew; |
b44234ac | 223 | register int s; |
83be5fac | 224 | |
b44234ac | 225 | s = splhigh(); |
ba96129b | 226 | if (t <= 0) |
88a7a62a | 227 | t = 1; |
c4710996 BJ |
228 | pnew = callfree; |
229 | if (pnew == NULL) | |
230 | panic("timeout table overflow"); | |
231 | callfree = pnew->c_next; | |
232 | pnew->c_arg = arg; | |
0157085f | 233 | pnew->c_func = func; |
c4710996 | 234 | for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) |
d45b61eb SL |
235 | if (p2->c_time > 0) |
236 | t -= p2->c_time; | |
c4710996 BJ |
237 | p1->c_next = pnew; |
238 | pnew->c_next = p2; | |
239 | pnew->c_time = t; | |
240 | if (p2) | |
241 | p2->c_time -= t; | |
83be5fac BJ |
242 | splx(s); |
243 | } | |
1fa9ff62 SL |
244 | |
245 | /* | |
246 | * untimeout is called to remove a function timeout call | |
247 | * from the callout structure. | |
248 | */ | |
b44234ac | 249 | void |
0157085f | 250 | untimeout(func, arg) |
b44234ac CT |
251 | void (*func) __P((void *)); |
252 | void *arg; | |
1fa9ff62 | 253 | { |
1fa9ff62 SL |
254 | register struct callout *p1, *p2; |
255 | register int s; | |
256 | ||
9c5cfb8b | 257 | s = splhigh(); |
b44234ac | 258 | for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { |
0157085f | 259 | if (p2->c_func == func && p2->c_arg == arg) { |
d01b68d6 | 260 | if (p2->c_next && p2->c_time > 0) |
1fa9ff62 SL |
261 | p2->c_next->c_time += p2->c_time; |
262 | p1->c_next = p2->c_next; | |
263 | p2->c_next = callfree; | |
264 | callfree = p2; | |
265 | break; | |
266 | } | |
267 | } | |
268 | splx(s); | |
269 | } | |
d01b68d6 | 270 | |
76b2a182 BJ |
271 | /* |
272 | * Compute number of hz until specified time. | |
273 | * Used to compute third argument to timeout() from an | |
274 | * absolute time. | |
275 | */ | |
b44234ac | 276 | int |
d01b68d6 BJ |
277 | hzto(tv) |
278 | struct timeval *tv; | |
279 | { | |
b44234ac CT |
280 | register long ticks, sec; |
281 | int s; | |
d01b68d6 | 282 | |
76b2a182 BJ |
283 | /* |
284 | * If number of milliseconds will fit in 32 bit arithmetic, | |
285 | * then compute number of milliseconds to time and scale to | |
286 | * ticks. Otherwise just compute number of hz in time, rounding | |
287 | * times greater than representible to maximum value. | |
288 | * | |
289 | * Delta times less than 25 days can be computed ``exactly''. | |
290 | * Maximum value for any timeout in 10ms ticks is 250 days. | |
291 | */ | |
b44234ac | 292 | s = splhigh(); |
76b2a182 BJ |
293 | sec = tv->tv_sec - time.tv_sec; |
294 | if (sec <= 0x7fffffff / 1000 - 1000) | |
295 | ticks = ((tv->tv_sec - time.tv_sec) * 1000 + | |
296 | (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); | |
297 | else if (sec <= 0x7fffffff / hz) | |
298 | ticks = sec * hz; | |
299 | else | |
300 | ticks = 0x7fffffff; | |
d01b68d6 BJ |
301 | splx(s); |
302 | return (ticks); | |
303 | } | |
014adec1 | 304 | |
b44234ac CT |
305 | /* |
306 | * Start profiling on a process. | |
307 | * | |
308 | * Kernel profiling passes proc0 which never exits and hence | |
309 | * keeps the profile clock running constantly. | |
310 | */ | |
311 | void | |
312 | startprofclock(p) | |
313 | register struct proc *p; | |
314 | { | |
315 | int s; | |
316 | ||
317 | if ((p->p_flag & SPROFIL) == 0) { | |
318 | p->p_flag |= SPROFIL; | |
319 | if (++profprocs == 1 && stathz != 0) { | |
320 | s = splstatclock(); | |
321 | psdiv = pscnt = psratio; | |
322 | setstatclockrate(profhz); | |
323 | splx(s); | |
324 | } | |
325 | } | |
326 | } | |
327 | ||
328 | /* | |
329 | * Stop profiling on a process. | |
330 | */ | |
331 | void | |
332 | stopprofclock(p) | |
333 | register struct proc *p; | |
334 | { | |
335 | int s; | |
336 | ||
337 | if (p->p_flag & SPROFIL) { | |
338 | p->p_flag &= ~SPROFIL; | |
339 | if (--profprocs == 0 && stathz != 0) { | |
340 | s = splstatclock(); | |
341 | psdiv = pscnt = 1; | |
342 | setstatclockrate(stathz); | |
343 | splx(s); | |
344 | } | |
345 | } | |
346 | } | |
347 | ||
348 | int dk_ndrive = DK_NDRIVE; | |
349 | ||
350 | /* | |
351 | * Statistics clock. Grab profile sample, and if divider reaches 0, | |
352 | * do process and kernel statistics. | |
353 | */ | |
354 | void | |
355 | statclock(frame) | |
356 | register struct clockframe *frame; | |
357 | { | |
358 | #ifdef GPROF | |
359 | register struct gmonparam *g; | |
360 | #endif | |
361 | register struct proc *p; | |
362 | register int i; | |
363 | ||
364 | if (CLKF_USERMODE(frame)) { | |
365 | p = curproc; | |
366 | if (p->p_flag & SPROFIL) | |
367 | addupc_intr(p, CLKF_PC(frame), 1); | |
368 | if (--pscnt > 0) | |
369 | return; | |
370 | /* | |
371 | * Came from user mode; CPU was in user state. | |
372 | * If this process is being profiled record the tick. | |
373 | */ | |
374 | p->p_uticks++; | |
375 | if (p->p_nice > NZERO) | |
376 | cp_time[CP_NICE]++; | |
377 | else | |
378 | cp_time[CP_USER]++; | |
379 | } else { | |
380 | #ifdef GPROF | |
381 | /* | |
382 | * Kernel statistics are just like addupc_intr, only easier. | |
383 | */ | |
384 | g = &_gmonparam; | |
385 | if (g->state == GMON_PROF_ON) { | |
386 | i = CLKF_PC(frame) - g->lowpc; | |
f929d58c KM |
387 | if (i < g->textsize) { |
388 | i /= HISTFRACTION * sizeof(*g->kcount); | |
389 | g->kcount[i]++; | |
390 | } | |
b44234ac CT |
391 | } |
392 | #endif | |
393 | if (--pscnt > 0) | |
394 | return; | |
395 | /* | |
396 | * Came from kernel mode, so we were: | |
397 | * - handling an interrupt, | |
398 | * - doing syscall or trap work on behalf of the current | |
399 | * user process, or | |
400 | * - spinning in the idle loop. | |
401 | * Whichever it is, charge the time as appropriate. | |
402 | * Note that we charge interrupts to the current process, | |
403 | * regardless of whether they are ``for'' that process, | |
404 | * so that we know how much of its real time was spent | |
405 | * in ``non-process'' (i.e., interrupt) work. | |
406 | */ | |
407 | p = curproc; | |
408 | if (CLKF_INTR(frame)) { | |
409 | if (p != NULL) | |
410 | p->p_iticks++; | |
411 | cp_time[CP_INTR]++; | |
412 | } else if (p != NULL) { | |
413 | p->p_sticks++; | |
414 | cp_time[CP_SYS]++; | |
415 | } else | |
416 | cp_time[CP_IDLE]++; | |
417 | } | |
418 | pscnt = psdiv; | |
419 | ||
420 | /* | |
421 | * We maintain statistics shown by user-level statistics | |
422 | * programs: the amount of time in each cpu state, and | |
423 | * the amount of time each of DK_NDRIVE ``drives'' is busy. | |
424 | * | |
425 | * XXX should either run linked list of drives, or (better) | |
426 | * grab timestamps in the start & done code. | |
427 | */ | |
428 | for (i = 0; i < DK_NDRIVE; i++) | |
429 | if (dk_busy & (1 << i)) | |
430 | dk_time[i]++; | |
431 | ||
432 | /* | |
433 | * We adjust the priority of the current process. | |
434 | * The priority of a process gets worse as it accumulates | |
435 | * CPU time. The cpu usage estimator (p_cpu) is increased here | |
436 | * and the formula for computing priorities (in kern_synch.c) | |
437 | * will compute a different value each time the p_cpu increases | |
438 | * by 4. The cpu usage estimator ramps up quite quickly when | |
439 | * the process is running (linearly), and decays away | |
440 | * exponentially, at a rate which is proportionally slower | |
441 | * when the system is busy. The basic principal is that the | |
442 | * system will 90% forget that a process used a lot of CPU | |
443 | * time in 5*loadav seconds. This causes the system to favor | |
444 | * processes which haven't run much recently, and to | |
445 | * round-robin among other processes. | |
446 | */ | |
447 | if (p != NULL) { | |
448 | p->p_cpticks++; | |
449 | if (++p->p_cpu == 0) | |
450 | p->p_cpu--; | |
451 | if ((p->p_cpu & 3) == 0) { | |
452 | setpri(p); | |
453 | if (p->p_pri >= PUSER) | |
454 | p->p_pri = p->p_usrpri; | |
455 | } | |
456 | } | |
457 | } | |
458 | ||
014adec1 KM |
459 | /* |
460 | * Return information about system clocks. | |
461 | */ | |
4f522b46 | 462 | sysctl_clockrate(where, sizep) |
014adec1 | 463 | register char *where; |
c1909da4 | 464 | size_t *sizep; |
014adec1 | 465 | { |
4f522b46 | 466 | struct clockinfo clkinfo; |
014adec1 | 467 | |
014adec1 | 468 | /* |
4f522b46 | 469 | * Construct clockinfo structure. |
014adec1 | 470 | */ |
4f522b46 KM |
471 | clkinfo.hz = hz; |
472 | clkinfo.tick = tick; | |
473 | clkinfo.profhz = profhz; | |
474 | clkinfo.stathz = stathz ? stathz : hz; | |
475 | return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); | |
014adec1 | 476 | } |