Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /*- |
2 | * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | * | |
600f7f07 | 33 | * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91 |
9e85cc83 | 34 | * $Id: kern_clock.c,v 1.12 1994/03/01 23:21:44 phk Exp $ |
15637ed4 RG |
35 | */ |
36 | ||
9e85cc83 GW |
37 | /* Portions of this software are covered by the following: */ |
38 | /****************************************************************************** | |
39 | * * | |
40 | * Copyright (c) David L. Mills 1993, 1994 * | |
41 | * * | |
42 | * Permission to use, copy, modify, and distribute this software and its * | |
43 | * documentation for any purpose and without fee is hereby granted, provided * | |
44 | * that the above copyright notice appears in all copies and that both the * | |
45 | * copyright notice and this permission notice appear in supporting * | |
46 | * documentation, and that the name University of Delaware not be used in * | |
47 | * advertising or publicity pertaining to distribution of the software * | |
48 | * without specific, written prior permission. The University of Delaware * | |
49 | * makes no representations about the suitability this software for any * | |
50 | * purpose. It is provided "as is" without express or implied warranty. * | |
51 | * * | |
52 | *****************************************************************************/ | |
53 | ||
54 | ||
15637ed4 RG |
55 | #include "param.h" |
56 | #include "systm.h" | |
57 | #include "dkstat.h" | |
58 | #include "callout.h" | |
59 | #include "kernel.h" | |
60 | #include "proc.h" | |
fde1aeb2 | 61 | #include "signalvar.h" |
15637ed4 | 62 | #include "resourcevar.h" |
9e85cc83 | 63 | #include "timex.h" |
15637ed4 RG |
64 | |
65 | #include "machine/cpu.h" | |
66 | ||
a702c214 NW |
67 | #include "resource.h" |
68 | #include "vm/vm.h" | |
69 | ||
15637ed4 RG |
70 | #ifdef GPROF |
71 | #include "gprof.h" | |
72 | #endif | |
73 | ||
4c45483e GW |
74 | static void gatherstats(clockframe *); |
75 | ||
bbc3f849 GW |
76 | /* From callout.h */ |
77 | struct callout *callfree, *callout, calltodo; | |
78 | int ncallout; | |
79 | ||
15637ed4 RG |
80 | /* |
81 | * Clock handling routines. | |
82 | * | |
83 | * This code is written to operate with two timers which run | |
84 | * independently of each other. The main clock, running at hz | |
85 | * times per second, is used to do scheduling and timeout calculations. | |
86 | * The second timer does resource utilization estimation statistically | |
87 | * based on the state of the machine phz times a second. Both functions | |
88 | * can be performed by a single clock (ie hz == phz), however the | |
89 | * statistics will be much more prone to errors. Ideally a machine | |
90 | * would have separate clocks measuring time spent in user state, system | |
91 | * state, interrupt state, and idle state. These clocks would allow a non- | |
92 | * approximate measure of resource utilization. | |
93 | */ | |
94 | ||
95 | /* | |
96 | * TODO: | |
97 | * time of day, system/user timing, timeouts, profiling on separate timers | |
98 | * allocate more timeout table slots when table overflows. | |
99 | */ | |
100 | ||
101 | /* | |
102 | * Bump a timeval by a small number of usec's. | |
103 | */ | |
104 | #define BUMPTIME(t, usec) { \ | |
105 | register struct timeval *tp = (t); \ | |
106 | \ | |
107 | tp->tv_usec += (usec); \ | |
108 | if (tp->tv_usec >= 1000000) { \ | |
109 | tp->tv_usec -= 1000000; \ | |
110 | tp->tv_sec++; \ | |
111 | } \ | |
112 | } | |
113 | ||
9e85cc83 GW |
114 | /* |
115 | * Phase-lock loop (PLL) definitions | |
116 | * | |
117 | * The following defines establish the performance envelope of the PLL. | |
118 | * They specify the maximum phase error (MAXPHASE), maximum frequency | |
119 | * error (MAXFREQ), minimum interval between updates (MINSEC) and | |
120 | * maximum interval between updates (MAXSEC). The intent of these bounds | |
121 | * is to force the PLL to operate within predefined limits in order to | |
122 | * satisfy correctness assertions. An excursion which exceeds these | |
123 | * bounds is clamped to the bound and operation proceeds accordingly. In | |
124 | * practice, this can occur only if something has failed or is operating | |
125 | * out of tolerance, but otherwise the PLL continues to operate in a | |
126 | * stable mode. | |
127 | * | |
128 | * MAXPHASE must be set greater than or equal to CLOCK.MAX (128 ms), as | |
129 | * defined in the NTP specification. CLOCK.MAX establishes the maximum | |
130 | * time offset allowed before the system time is reset, rather than | |
131 | * incrementally adjusted. Here, the maximum offset is clamped to | |
132 | * MAXPHASE only in order to prevent overflow errors due to defective | |
133 | * protocol implementations. | |
134 | * | |
135 | * MAXFREQ reflects the manufacturing frequency tolerance of the CPU | |
136 | * clock oscillator plus the maximum slew rate allowed by the protocol. | |
137 | * It should be set to at least the frequency tolerance of the | |
138 | * oscillator plus 100 ppm for vernier frequency adjustments. If the | |
139 | * kernel frequency discipline code is installed (PPS_SYNC), the CPU | |
140 | * oscillator frequency is disciplined to an external source, presumably | |
141 | * with negligible frequency error, and MAXFREQ can be reduced. | |
142 | */ | |
143 | #define MAXPHASE 512000L /* max phase error (us) */ | |
144 | #ifdef PPS_SYNC | |
145 | #define MAXFREQ (100L << SHIFT_USEC) /* max freq error (scaled ppm) */ | |
146 | #else | |
147 | #define MAXFREQ (200L << SHIFT_USEC) /* max freq error (scaled ppm) */ | |
148 | #endif /* PPS_SYNC */ | |
149 | #define MINSEC 16L /* min interval between updates (s) */ | |
150 | #define MAXSEC 1200L /* max interval between updates (s) */ | |
151 | ||
152 | /* | |
153 | * The following variables are read and set by the ntp_adjtime() system | |
154 | * call. The ntp_pll.status variable defines the synchronization status of | |
155 | * the system clock, with codes defined in the timex.h header file. The | |
156 | * time_offset variable is used by the PLL to adjust the system time in | |
157 | * small increments. The time_constant variable determines the bandwidth | |
158 | * or "stiffness" of the PLL. The time_tolerance variable is the maximum | |
159 | * frequency error or tolerance of the CPU clock oscillator and is a | |
160 | * property of the architecture; however, in principle it could change | |
161 | * as result of the presence of external discipline signals, for | |
162 | * instance. The time_precision variable is usually equal to the kernel | |
163 | * tick variable; however, in cases where a precision clock counter or | |
164 | * external clock is available, the resolution can be much less than | |
165 | * this and depend on whether the external clock is working or not. The | |
166 | * time_maxerror variable is initialized by a ntp_adjtime() call and | |
167 | * increased by the kernel once each second to reflect the maximum error | |
168 | * bound growth. The time_esterror variable is set and read by the | |
169 | * ntp_adjtime() call, but otherwise not used by the kernel. | |
170 | */ | |
171 | /* - use appropriate fields in ntp_pll instead */ | |
172 | #if 0 | |
173 | int ntp_pll.status = TIME_BAD; /* clock synchronization status */ | |
174 | long time_offset = 0; /* time adjustment (us) */ | |
175 | long time_constant = 0; /* pll time constant */ | |
176 | long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ | |
177 | long time_precision = 1; /* clock precision (us) */ | |
178 | long time_maxerror = MAXPHASE; /* maximum error (us) */ | |
179 | long time_esterror = MAXPHASE; /* estimated error (us) */ | |
180 | #endif | |
181 | ||
182 | /* | |
183 | * The following variables establish the state of the PLL and the | |
184 | * residual time and frequency offset of the local clock. The time_phase | |
185 | * variable is the phase increment and the ntp_pll.frequency variable is the | |
186 | * frequency increment of the kernel time variable at each tick of the | |
187 | * clock. The ntp_pll.frequency variable is set via ntp_adjtime() from a value | |
188 | * stored in a file when the synchronization daemon is first started. | |
189 | * Its value is retrieved via ntp_adjtime() and written to the file | |
190 | * about once per hour by the daemon. The time_adj variable is the | |
191 | * adjustment added to the value of tick at each timer interrupt and is | |
192 | * recomputed at each timer interrupt. The time_reftime variable is the | |
193 | * second's portion of the system time on the last call to | |
194 | * ntp_adjtime(). It is used to adjust the ntp_pll.frequency variable and to | |
195 | * increase the time_maxerror as the time since last update increases. | |
196 | * The scale factors are defined in the timex.h header file. | |
197 | */ | |
198 | long time_phase = 0; /* phase offset (scaled us) */ | |
199 | #if 0 | |
200 | long ntp_pll.frequency = 0; /* frequency offset (scaled ppm) */ | |
201 | #endif | |
202 | long time_adj = 0; /* tick adjust (scaled 1 / hz) */ | |
203 | long time_reftime; /* time at last adjustment (s) */ | |
204 | ||
205 | #ifdef PPS_SYNC | |
206 | /* | |
207 | * The following defines and declarations are used only if a pulse-per- | |
208 | * second (PPS) signal is available and connected via a modem control | |
209 | * lead, such as produced by the optional ppsclock feature incorporated | |
210 | * in the asynch driver. They establish the design parameters of the PPS | |
211 | * frequency-lock loop used to discipline the CPU clock oscillator to | |
212 | * the PPS signal. PPS_AVG is the averaging factor for the frequency | |
213 | * loop. PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum | |
214 | * intervals, respectively, in seconds as a power of two. The | |
215 | * PPS_DISPINC is the initial increment to pps_disp at each second. | |
216 | */ | |
217 | #define PPS_AVG 2 /* pps averaging constant (shift) */ | |
218 | #define PPS_SHIFT 2 /* min interval duration (s) (shift) */ | |
219 | #define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */ | |
220 | #define PPS_DISPINC 0L /* dispersion increment (us/s) */ | |
221 | ||
222 | /* | |
223 | * The pps_time variable contains the time at each calibration as read | |
224 | * by microtime(). The pps_usec variable is latched from a high | |
225 | * resolution counter or external clock at pps_time. Here we want the | |
226 | * hardware counter contents only, not the contents plus the | |
227 | * time_tv.usec as usual. The pps_ybar variable is the current CPU | |
228 | * oscillator frequency offset estimate relative to the PPS signal. The | |
229 | * pps_disp variable is the current error estimate, which is increased | |
230 | * pps_dispinc once each second. Frequency updates are permitted only | |
231 | * when pps_disp is below the pps_dispmax threshold. The pps-mf[] array | |
232 | * is used as a median filter for the frequency estimate and to derive | |
233 | * the error estimate. | |
234 | */ | |
235 | struct timeval pps_time; /* kernel time at last interval */ | |
236 | long pps_usec = 0; /* usec counter at last interval */ | |
237 | #if 0 | |
238 | long pps_ybar = 0; /* frequency estimate (scaled ppm) */ | |
239 | long pps_disp = MAXFREQ; /* dispersion estimate (scaled ppm) */ | |
240 | #endif | |
241 | long pps_dispmax = MAXFREQ / 2; /* dispersion threshold */ | |
242 | long pps_dispinc = PPS_DISPINC; /* pps dispersion increment/sec */ | |
243 | long pps_mf[] = {0, 0, 0}; /* pps median filter */ | |
244 | ||
245 | /* | |
246 | * The pps_count variable counts the seconds of the calibration | |
247 | * interval, the duration of which is pps_shift (s) in powers of two. | |
248 | * The pps_intcnt variable counts the calibration intervals for use in | |
249 | * the interval-adaptation algorithm. It's just too complicated for | |
250 | * words. | |
251 | */ | |
252 | int pps_count = 0; /* calibration interval counter (s) */ | |
253 | #if 0 | |
254 | int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ | |
255 | #endif | |
256 | int pps_intcnt = 0; /* intervals at current duration */ | |
257 | ||
258 | /* | |
259 | * PPS signal quality monitors | |
260 | */ | |
261 | #if 0 | |
262 | long pps_calcnt; /* calibration intervals */ | |
263 | long pps_jitcnt; /* jitter limit exceeded */ | |
264 | long pps_discnt; /* dispersion limit exceeded */ | |
265 | #endif | |
266 | #endif /* PPS_SYNC */ | |
267 | ||
268 | struct timex ntp_pll = { | |
269 | 0, /* mode */ | |
270 | 0, /* offset */ | |
271 | 0, /* frequency */ | |
272 | MAXPHASE, /* maxerror */ | |
273 | MAXPHASE, /* esterror */ | |
274 | TIME_BAD, /* status */ | |
275 | 0, /* time_constant */ | |
276 | 1, /* precision */ | |
277 | MAXFREQ, /* tolerance */ | |
278 | 0, /* ybar */ | |
279 | #ifdef PPS_SYNC | |
280 | MAXFREQ, /* disp */ | |
281 | PPS_SHIFT, /* shift */ | |
282 | 0, /* calcnt */ | |
283 | 0, /* jitcnt */ | |
284 | 0 /* discnt */ | |
285 | #endif | |
286 | }; | |
287 | ||
288 | /* | |
289 | * hardupdate() - local clock update | |
290 | * | |
291 | * This routine is called by ntp_adjtime() to update the local clock | |
292 | * phase and frequency. This is used to implement an adaptive-parameter, | |
293 | * first-order, type-II phase-lock loop. The code computes the time | |
294 | * since the last update and clamps to a maximum (for robustness). Then | |
295 | * it multiplies by the offset (sorry about the ugly multiply), scales | |
296 | * by the time constant, and adds to the frequency variable. Then, it | |
297 | * computes the phase variable as the offset scaled by the time | |
298 | * constant. Note that all shifts are assumed to be positive. Only | |
299 | * enough error checking is done to prevent bizarre behavior due to | |
300 | * overflow problems. | |
301 | * | |
302 | * For default SHIFT_UPDATE = 12, the offset is limited to +-512 ms, the | |
303 | * maximum interval between updates is 4096 s and the maximum frequency | |
304 | * offset is +-31.25 ms/s. | |
305 | */ | |
306 | void | |
307 | hardupdate(offset) | |
308 | long offset; | |
309 | { | |
310 | long mtemp; | |
311 | ||
312 | if (offset > MAXPHASE) | |
313 | ntp_pll.offset = MAXPHASE << SHIFT_UPDATE; | |
314 | else if (offset < -MAXPHASE) | |
315 | ntp_pll.offset = -(MAXPHASE << SHIFT_UPDATE); | |
316 | else | |
317 | ntp_pll.offset = offset << SHIFT_UPDATE; | |
318 | mtemp = time.tv_sec - time_reftime; | |
319 | time_reftime = time.tv_sec; | |
320 | if (mtemp > MAXSEC) | |
321 | mtemp = 0; | |
322 | ||
323 | /* ugly multiply should be replaced */ | |
324 | if (offset < 0) | |
325 | ntp_pll.frequency -= | |
326 | (-offset * mtemp) >> (ntp_pll.time_constant | |
327 | + ntp_pll.time_constant | |
328 | + SHIFT_KF | |
329 | - SHIFT_USEC); | |
330 | else | |
331 | ntp_pll.frequency += | |
332 | (offset * mtemp) >> (ntp_pll.time_constant | |
333 | + ntp_pll.time_constant | |
334 | + SHIFT_KF | |
335 | - SHIFT_USEC); | |
336 | if (ntp_pll.frequency > ntp_pll.tolerance) | |
337 | ntp_pll.frequency = ntp_pll.tolerance; | |
338 | else if (ntp_pll.frequency < -ntp_pll.tolerance) | |
339 | ntp_pll.frequency = -ntp_pll.tolerance; | |
340 | if (ntp_pll.status == TIME_BAD) | |
341 | ntp_pll.status = TIME_OK; | |
342 | } | |
343 | ||
15637ed4 RG |
344 | /* |
345 | * The hz hardware interval timer. | |
346 | * We update the events relating to real time. | |
347 | * If this timer is also being used to gather statistics, | |
348 | * we run through the statistics gathering routine as well. | |
349 | */ | |
4c45483e | 350 | void |
15637ed4 RG |
351 | hardclock(frame) |
352 | clockframe frame; | |
353 | { | |
354 | register struct callout *p1; | |
355 | register struct proc *p = curproc; | |
4c45483e | 356 | register struct pstats *pstats = 0; |
a702c214 NW |
357 | register struct rusage *ru; |
358 | register struct vmspace *vm; | |
15637ed4 RG |
359 | register int s; |
360 | int needsoft = 0; | |
361 | extern int tickdelta; | |
362 | extern long timedelta; | |
9e85cc83 | 363 | long ltemp, time_update = 0; |
15637ed4 RG |
364 | |
365 | /* | |
366 | * Update real-time timeout queue. | |
367 | * At front of queue are some number of events which are ``due''. | |
368 | * The time to these is <= 0 and if negative represents the | |
369 | * number of ticks which have passed since it was supposed to happen. | |
370 | * The rest of the q elements (times > 0) are events yet to happen, | |
371 | * where the time for each is given as a delta from the previous. | |
372 | * Decrementing just the first of these serves to decrement the time | |
373 | * to all events. | |
374 | */ | |
375 | p1 = calltodo.c_next; | |
376 | while (p1) { | |
377 | if (--p1->c_time > 0) | |
378 | break; | |
379 | needsoft = 1; | |
380 | if (p1->c_time == 0) | |
381 | break; | |
382 | p1 = p1->c_next; | |
383 | } | |
384 | ||
385 | /* | |
386 | * Curproc (now in p) is null if no process is running. | |
387 | * We assume that curproc is set in user mode! | |
388 | */ | |
389 | if (p) | |
390 | pstats = p->p_stats; | |
391 | /* | |
392 | * Charge the time out based on the mode the cpu is in. | |
393 | * Here again we fudge for the lack of proper interval timers | |
394 | * assuming that the current state has been around at least | |
395 | * one tick. | |
396 | */ | |
397 | if (CLKF_USERMODE(&frame)) { | |
398 | if (pstats->p_prof.pr_scale) | |
399 | needsoft = 1; | |
400 | /* | |
401 | * CPU was in user state. Increment | |
402 | * user time counter, and process process-virtual time | |
403 | * interval timer. | |
404 | */ | |
405 | BUMPTIME(&p->p_utime, tick); | |
406 | if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && | |
407 | itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) | |
408 | psignal(p, SIGVTALRM); | |
409 | } else { | |
410 | /* | |
411 | * CPU was in system state. | |
412 | */ | |
413 | if (p) | |
414 | BUMPTIME(&p->p_stime, tick); | |
415 | } | |
416 | ||
a702c214 NW |
417 | /* bump the resource usage of integral space use */ |
418 | if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { | |
d2ca7c11 DG |
419 | ru->ru_ixrss += vm->vm_tsize * NBPG / 1024; |
420 | ru->ru_idrss += vm->vm_dsize * NBPG / 1024; | |
421 | ru->ru_isrss += vm->vm_ssize * NBPG / 1024; | |
422 | if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) > | |
423 | ru->ru_maxrss) { | |
424 | ru->ru_maxrss = | |
425 | vm->vm_pmap.pm_stats.resident_count * NBPG / 1024; | |
426 | } | |
a702c214 NW |
427 | } |
428 | ||
15637ed4 RG |
429 | /* |
430 | * If the cpu is currently scheduled to a process, then | |
431 | * charge it with resource utilization for a tick, updating | |
432 | * statistics which run in (user+system) virtual time, | |
433 | * such as the cpu time limit and profiling timers. | |
434 | * This assumes that the current process has been running | |
435 | * the entire last tick. | |
436 | */ | |
437 | if (p) { | |
438 | if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > | |
439 | p->p_rlimit[RLIMIT_CPU].rlim_cur) { | |
440 | psignal(p, SIGXCPU); | |
441 | if (p->p_rlimit[RLIMIT_CPU].rlim_cur < | |
442 | p->p_rlimit[RLIMIT_CPU].rlim_max) | |
443 | p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; | |
444 | } | |
445 | if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && | |
446 | itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) | |
447 | psignal(p, SIGPROF); | |
448 | ||
449 | /* | |
450 | * We adjust the priority of the current process. | |
451 | * The priority of a process gets worse as it accumulates | |
452 | * CPU time. The cpu usage estimator (p_cpu) is increased here | |
453 | * and the formula for computing priorities (in kern_synch.c) | |
454 | * will compute a different value each time the p_cpu increases | |
455 | * by 4. The cpu usage estimator ramps up quite quickly when | |
456 | * the process is running (linearly), and decays away | |
457 | * exponentially, * at a rate which is proportionally slower | |
458 | * when the system is busy. The basic principal is that the | |
459 | * system will 90% forget that a process used a lot of CPU | |
460 | * time in 5*loadav seconds. This causes the system to favor | |
461 | * processes which haven't run much recently, and to | |
462 | * round-robin among other processes. | |
463 | */ | |
464 | p->p_cpticks++; | |
465 | if (++p->p_cpu == 0) | |
466 | p->p_cpu--; | |
467 | if ((p->p_cpu&3) == 0) { | |
468 | setpri(p); | |
469 | if (p->p_pri >= PUSER) | |
470 | p->p_pri = p->p_usrpri; | |
471 | } | |
472 | } | |
473 | ||
474 | /* | |
475 | * If the alternate clock has not made itself known then | |
476 | * we must gather the statistics. | |
477 | */ | |
478 | if (phz == 0) | |
479 | gatherstats(&frame); | |
480 | ||
481 | /* | |
482 | * Increment the time-of-day, and schedule | |
483 | * processing of the callouts at a very low cpu priority, | |
484 | * so we don't keep the relatively high clock interrupt | |
485 | * priority any longer than necessary. | |
486 | */ | |
9e85cc83 GW |
487 | { |
488 | int delta; | |
489 | if (timedelta == 0) { | |
490 | delta = tick; | |
15637ed4 | 491 | } else { |
9e85cc83 GW |
492 | if (timedelta < 0) { |
493 | delta = tick - tickdelta; | |
494 | timedelta += tickdelta; | |
495 | } else { | |
496 | delta = tick + tickdelta; | |
497 | timedelta -= tickdelta; | |
498 | } | |
499 | } | |
500 | /* | |
501 | * Logic from ``Precision Time and Frequency Synchronization | |
502 | * Using Modified Kernels'' by David L. Mills, University | |
503 | * of Delaware. | |
504 | */ | |
505 | time_phase += time_adj; | |
506 | if(time_phase <= -FINEUSEC) { | |
507 | ltemp = -time_phase >> SHIFT_SCALE; | |
508 | time_phase += ltemp << SHIFT_SCALE; | |
509 | time_update -= ltemp; | |
510 | } else if(time_phase >= FINEUSEC) { | |
511 | ltemp = time_phase >> SHIFT_SCALE; | |
512 | time_phase -= ltemp << SHIFT_SCALE; | |
513 | time_update += ltemp; | |
514 | } | |
515 | ||
516 | time.tv_usec += delta + time_update; | |
517 | /* | |
518 | * On rollover of the second the phase adjustment to be used for | |
519 | * the next second is calculated. Also, the maximum error is | |
520 | * increased by the tolerance. If the PPS frequency discipline | |
521 | * code is present, the phase is increased to compensate for the | |
522 | * CPU clock oscillator frequency error. | |
523 | * | |
524 | * With SHIFT_SCALE = 23, the maximum frequency adjustment is | |
525 | * +-256 us per tick, or 25.6 ms/s at a clock frequency of 100 | |
526 | * Hz. The time contribution is shifted right a minimum of two | |
527 | * bits, while the frequency contribution is a right shift. | |
528 | * Thus, overflow is prevented if the frequency contribution is | |
529 | * limited to half the maximum or 15.625 ms/s. | |
530 | */ | |
531 | if (time.tv_usec >= 1000000) { | |
532 | time.tv_usec -= 1000000; | |
533 | time.tv_sec++; | |
534 | ntp_pll.maxerror += ntp_pll.tolerance >> SHIFT_USEC; | |
535 | if (ntp_pll.offset < 0) { | |
536 | ltemp = -ntp_pll.offset >> | |
537 | (SHIFT_KG + ntp_pll.time_constant); | |
538 | ntp_pll.offset += ltemp; | |
539 | time_adj = -ltemp << | |
540 | (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); | |
541 | } else { | |
542 | ltemp = ntp_pll.offset >> | |
543 | (SHIFT_KG + ntp_pll.time_constant); | |
544 | ntp_pll.offset -= ltemp; | |
545 | time_adj = ltemp << | |
546 | (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); | |
547 | } | |
548 | #ifdef PPS_SYNC | |
549 | /* | |
550 | * Grow the pps error by pps_dispinc ppm and clamp to | |
551 | * MAXFREQ. The hardpps() routine will pull it down as | |
552 | * long as the PPS signal is good. | |
553 | */ | |
554 | ntp_pll.disp += pps_dispinc; | |
555 | if (ntp_pll.disp > MAXFREQ) | |
556 | ntp_pll.disp = MAXFREQ; | |
557 | ltemp = ntp_pll.frequency + ntp_pll.ybar; | |
558 | #else | |
559 | ltemp = ntp_pll.frequency; | |
560 | #endif /* PPS_SYNC */ | |
561 | if (ltemp < 0) | |
562 | time_adj -= -ltemp >> | |
563 | (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); | |
564 | else | |
565 | time_adj += ltemp >> | |
566 | (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); | |
567 | #if 0 | |
568 | time_adj += fixtick << (SHIFT_SCALE - SHIFT_HZ); | |
569 | #endif | |
570 | ||
571 | /* | |
572 | * When the CPU clock oscillator frequency is not a | |
573 | * power of two in Hz, the SHIFT_HZ is only an | |
574 | * approximate scale factor. In the SunOS kernel, this | |
575 | * results in a PLL gain factor of 1/1.28 = 0.78 what it | |
576 | * should be. In the following code the overall gain is | |
577 | * increased by a factor of 1.25, which results in a | |
578 | * residual error less than 3 percent. | |
579 | */ | |
580 | if (hz == 100) { | |
581 | if (time_adj < 0) | |
582 | time_adj -= -time_adj >> 2; | |
583 | else | |
584 | time_adj += time_adj >> 2; | |
585 | } | |
15637ed4 | 586 | } |
15637ed4 | 587 | } |
9e85cc83 | 588 | |
15637ed4 RG |
589 | if (needsoft) { |
590 | #if 0 | |
591 | /* | |
592 | * XXX - hardclock runs at splhigh, so the splsoftclock is useless and | |
593 | * softclock runs at splhigh as well if we do this. It is not much of | |
594 | * an optimization, since the "software interrupt" is done with a call | |
595 | * from doreti, and the overhead of checking there is sometimes less | |
596 | * than checking here. Moreover, the whole %$$%$^ frame is passed by | |
597 | * value here. | |
598 | */ | |
599 | if (CLKF_BASEPRI(&frame)) { | |
600 | /* | |
601 | * Save the overhead of a software interrupt; | |
602 | * it will happen as soon as we return, so do it now. | |
603 | */ | |
604 | (void) splsoftclock(); | |
605 | softclock(frame); | |
606 | } else | |
607 | #endif | |
608 | setsoftclock(); | |
609 | } | |
610 | } | |
611 | ||
612 | int dk_ndrive = DK_NDRIVE; | |
613 | /* | |
614 | * Gather statistics on resource utilization. | |
615 | * | |
616 | * We make a gross assumption: that the system has been in the | |
617 | * state it is in (user state, kernel state, interrupt state, | |
618 | * or idle state) for the entire last time interval, and | |
619 | * update statistics accordingly. | |
620 | */ | |
4c45483e | 621 | void |
15637ed4 RG |
622 | gatherstats(framep) |
623 | clockframe *framep; | |
624 | { | |
625 | register int cpstate, s; | |
626 | ||
627 | /* | |
628 | * Determine what state the cpu is in. | |
629 | */ | |
630 | if (CLKF_USERMODE(framep)) { | |
631 | /* | |
632 | * CPU was in user state. | |
633 | */ | |
634 | if (curproc->p_nice > NZERO) | |
635 | cpstate = CP_NICE; | |
636 | else | |
637 | cpstate = CP_USER; | |
638 | } else { | |
639 | /* | |
640 | * CPU was in system state. If profiling kernel | |
641 | * increment a counter. If no process is running | |
642 | * then this is a system tick if we were running | |
643 | * at a non-zero IPL (in a driver). If a process is running, | |
644 | * then we charge it with system time even if we were | |
645 | * at a non-zero IPL, since the system often runs | |
646 | * this way during processing of system calls. | |
647 | * This is approximate, but the lack of true interval | |
648 | * timers makes doing anything else difficult. | |
649 | */ | |
650 | cpstate = CP_SYS; | |
651 | if (curproc == NULL && CLKF_BASEPRI(framep)) | |
652 | cpstate = CP_IDLE; | |
653 | #ifdef GPROF | |
654 | s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc; | |
655 | if (profiling < 2 && s < s_textsize) | |
656 | kcount[s / (HISTFRACTION * sizeof (*kcount))]++; | |
657 | #endif | |
658 | } | |
659 | /* | |
660 | * We maintain statistics shown by user-level statistics | |
661 | * programs: the amount of time in each cpu state, and | |
662 | * the amount of time each of DK_NDRIVE ``drives'' is busy. | |
663 | */ | |
664 | cp_time[cpstate]++; | |
665 | for (s = 0; s < DK_NDRIVE; s++) | |
666 | if (dk_busy&(1<<s)) | |
667 | dk_time[s]++; | |
668 | } | |
669 | ||
670 | /* | |
671 | * Software priority level clock interrupt. | |
672 | * Run periodic events from timeout queue. | |
673 | */ | |
674 | /*ARGSUSED*/ | |
4c45483e | 675 | void |
15637ed4 RG |
676 | softclock(frame) |
677 | clockframe frame; | |
678 | { | |
679 | ||
680 | for (;;) { | |
681 | register struct callout *p1; | |
682 | register caddr_t arg; | |
4c45483e | 683 | register timeout_func_t func; |
15637ed4 RG |
684 | register int a, s; |
685 | ||
686 | s = splhigh(); | |
687 | if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { | |
688 | splx(s); | |
689 | break; | |
690 | } | |
691 | arg = p1->c_arg; func = p1->c_func; a = p1->c_time; | |
692 | calltodo.c_next = p1->c_next; | |
693 | p1->c_next = callfree; | |
694 | callfree = p1; | |
695 | splx(s); | |
696 | (*func)(arg, a); | |
697 | } | |
698 | ||
699 | /* | |
700 | * If no process to work with, we're finished. | |
701 | */ | |
702 | if (curproc == 0) return; | |
703 | ||
704 | /* | |
705 | * If trapped user-mode and profiling, give it | |
706 | * a profiling tick. | |
707 | */ | |
708 | if (CLKF_USERMODE(&frame)) { | |
709 | register struct proc *p = curproc; | |
710 | ||
711 | if (p->p_stats->p_prof.pr_scale) | |
712 | profile_tick(p, &frame); | |
713 | /* | |
714 | * Check to see if process has accumulated | |
715 | * more than 10 minutes of user time. If so | |
716 | * reduce priority to give others a chance. | |
717 | */ | |
718 | if (p->p_ucred->cr_uid && p->p_nice == NZERO && | |
719 | p->p_utime.tv_sec > 10 * 60) { | |
720 | p->p_nice = NZERO + 4; | |
721 | setpri(p); | |
722 | p->p_pri = p->p_usrpri; | |
723 | } | |
724 | } | |
725 | } | |
726 | ||
727 | /* | |
728 | * Arrange that (*func)(arg) is called in t/hz seconds. | |
729 | */ | |
4c45483e | 730 | void |
15637ed4 | 731 | timeout(func, arg, t) |
4c45483e | 732 | timeout_func_t func; |
15637ed4 RG |
733 | caddr_t arg; |
734 | register int t; | |
735 | { | |
736 | register struct callout *p1, *p2, *pnew; | |
737 | register int s = splhigh(); | |
738 | ||
739 | if (t <= 0) | |
740 | t = 1; | |
741 | pnew = callfree; | |
742 | if (pnew == NULL) | |
743 | panic("timeout table overflow"); | |
744 | callfree = pnew->c_next; | |
745 | pnew->c_arg = arg; | |
746 | pnew->c_func = func; | |
747 | for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) | |
748 | if (p2->c_time > 0) | |
749 | t -= p2->c_time; | |
750 | p1->c_next = pnew; | |
751 | pnew->c_next = p2; | |
752 | pnew->c_time = t; | |
753 | if (p2) | |
754 | p2->c_time -= t; | |
755 | splx(s); | |
756 | } | |
757 | ||
758 | /* | |
759 | * untimeout is called to remove a function timeout call | |
760 | * from the callout structure. | |
761 | */ | |
4c45483e | 762 | void |
15637ed4 | 763 | untimeout(func, arg) |
4c45483e | 764 | timeout_func_t func; |
15637ed4 RG |
765 | caddr_t arg; |
766 | { | |
767 | register struct callout *p1, *p2; | |
768 | register int s; | |
769 | ||
770 | s = splhigh(); | |
771 | for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { | |
772 | if (p2->c_func == func && p2->c_arg == arg) { | |
773 | if (p2->c_next && p2->c_time > 0) | |
774 | p2->c_next->c_time += p2->c_time; | |
775 | p1->c_next = p2->c_next; | |
776 | p2->c_next = callfree; | |
777 | callfree = p2; | |
778 | break; | |
779 | } | |
780 | } | |
781 | splx(s); | |
782 | } | |
783 | ||
784 | /* | |
785 | * Compute number of hz until specified time. | |
786 | * Used to compute third argument to timeout() from an | |
787 | * absolute time. | |
788 | */ | |
c95f3c5b AC |
789 | |
790 | /* XXX clock_t */ | |
4c45483e | 791 | u_long |
15637ed4 RG |
792 | hzto(tv) |
793 | struct timeval *tv; | |
794 | { | |
c95f3c5b | 795 | register unsigned long ticks; |
15637ed4 | 796 | register long sec; |
c95f3c5b AC |
797 | register long usec; |
798 | int s; | |
15637ed4 RG |
799 | |
800 | /* | |
c95f3c5b AC |
801 | * If the number of usecs in the whole seconds part of the time |
802 | * difference fits in a long, then the total number of usecs will | |
803 | * fit in an unsigned long. Compute the total and convert it to | |
804 | * ticks, rounding up and adding 1 to allow for the current tick | |
805 | * to expire. Rounding also depends on unsigned long arithmetic | |
806 | * to avoid overflow. | |
807 | * | |
808 | * Otherwise, if the number of ticks in the whole seconds part of | |
809 | * the time difference fits in a long, then convert the parts to | |
810 | * ticks separately and add, using similar rounding methods and | |
811 | * overflow avoidance. This method would work in the previous | |
812 | * case but it is slightly slower and assumes that hz is integral. | |
15637ed4 | 813 | * |
c95f3c5b AC |
814 | * Otherwise, round the time difference down to the maximum |
815 | * representable value. | |
816 | * | |
817 | * Maximum value for any timeout in 10ms ticks is 248 days. | |
15637ed4 | 818 | */ |
c95f3c5b | 819 | s = splhigh(); |
15637ed4 | 820 | sec = tv->tv_sec - time.tv_sec; |
c95f3c5b | 821 | usec = tv->tv_usec - time.tv_usec; |
da6d4c06 | 822 | splx(s); |
c95f3c5b AC |
823 | if (usec < 0) { |
824 | sec--; | |
825 | usec += 1000000; | |
826 | } | |
827 | if (sec < 0) { | |
90945f8b | 828 | #ifdef DIAGNOSTIC |
c95f3c5b AC |
829 | printf("hzto: negative time difference %ld sec %ld usec\n", |
830 | sec, usec); | |
831 | #endif | |
832 | ticks = 1; | |
833 | } else if (sec <= LONG_MAX / 1000000) | |
834 | ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) | |
835 | / tick + 1; | |
836 | else if (sec <= LONG_MAX / hz) | |
837 | ticks = sec * hz | |
838 | + ((unsigned long)usec + (tick - 1)) / tick + 1; | |
839 | else | |
840 | ticks = LONG_MAX; | |
841 | #define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */ | |
842 | if (ticks > CLOCK_T_MAX) | |
843 | ticks = CLOCK_T_MAX; | |
15637ed4 RG |
844 | return (ticks); |
845 | } | |
9e85cc83 GW |
846 | |
847 | #ifdef PPS_SYNC | |
848 | /* | |
849 | * hardpps() - discipline CPU clock oscillator to external pps signal | |
850 | * | |
851 | * This routine is called at each PPS interrupt in order to discipline | |
852 | * the CPU clock oscillator to the PPS signal. It integrates successive | |
853 | * phase differences between the two oscillators and calculates the | |
854 | * frequency offset. This is used in hardclock() to discipline the CPU | |
855 | * clock oscillator so that intrinsic frequency error is cancelled out. | |
856 | * The code requires the caller to capture the time and hardware | |
857 | * counter value at the designated PPS signal transition. | |
858 | */ | |
859 | void | |
860 | hardpps(tvp, usec) | |
861 | struct timeval *tvp; /* time at PPS */ | |
862 | long usec; /* hardware counter at PPS */ | |
863 | { | |
864 | long u_usec, v_usec, bigtick; | |
865 | long cal_sec, cal_usec; | |
866 | ||
867 | /* | |
868 | * During the calibration interval adjust the starting time when | |
869 | * the tick overflows. At the end of the interval compute the | |
870 | * duration of the interval and the difference of the hardware | |
871 | * counters at the beginning and end of the interval. This code | |
872 | * is deliciously complicated by the fact valid differences may | |
873 | * exceed the value of tick when using long calibration | |
874 | * intervals and small ticks. Note that the counter can be | |
875 | * greater than tick if caught at just the wrong instant, but | |
876 | * the values returned and used here are correct. | |
877 | */ | |
878 | bigtick = (long)tick << SHIFT_USEC; | |
879 | pps_usec -= ntp_pll.ybar; | |
880 | if (pps_usec >= bigtick) | |
881 | pps_usec -= bigtick; | |
882 | if (pps_usec < 0) | |
883 | pps_usec += bigtick; | |
884 | pps_time.tv_sec++; | |
885 | pps_count++; | |
886 | if (pps_count < (1 << pps_shift)) | |
887 | return; | |
888 | pps_count = 0; | |
889 | ntp_pll.calcnt++; | |
890 | u_usec = usec << SHIFT_USEC; | |
891 | v_usec = pps_usec - u_usec; | |
892 | if (v_usec >= bigtick >> 1) | |
893 | v_usec -= bigtick; | |
894 | if (v_usec < -(bigtick >> 1)) | |
895 | v_usec += bigtick; | |
896 | if (v_usec < 0) | |
897 | v_usec = -(-v_usec >> ntp_pll.shift); | |
898 | else | |
899 | v_usec = v_usec >> ntp_pll.shift; | |
900 | pps_usec = u_usec; | |
901 | cal_sec = tvp->tv_sec; | |
902 | cal_usec = tvp->tv_usec; | |
903 | cal_sec -= pps_time.tv_sec; | |
904 | cal_usec -= pps_time.tv_usec; | |
905 | if (cal_usec < 0) { | |
906 | cal_usec += 1000000; | |
907 | cal_sec--; | |
908 | } | |
909 | pps_time = *tvp; | |
910 | ||
911 | /* | |
912 | * Check for lost interrupts, noise, excessive jitter and | |
913 | * excessive frequency error. The number of timer ticks during | |
914 | * the interval may vary +-1 tick. Add to this a margin of one | |
915 | * tick for the PPS signal jitter and maximum frequency | |
916 | * deviation. If the limits are exceeded, the calibration | |
917 | * interval is reset to the minimum and we start over. | |
918 | */ | |
919 | u_usec = (long)tick << 1; | |
920 | if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) | |
921 | || (cal_sec == 0 && cal_usec < u_usec)) | |
922 | || v_usec > ntp_pll.tolerance || v_usec < -ntp_pll.tolerance) { | |
923 | ntp_pll.jitcnt++; | |
924 | ntp_pll.shift = NTP_PLL.SHIFT; | |
925 | pps_dispinc = PPS_DISPINC; | |
926 | ntp_pll.intcnt = 0; | |
927 | return; | |
928 | } | |
929 | ||
930 | /* | |
931 | * A three-stage median filter is used to help deglitch the pps | |
932 | * signal. The median sample becomes the offset estimate; the | |
933 | * difference between the other two samples becomes the | |
934 | * dispersion estimate. | |
935 | */ | |
936 | pps_mf[2] = pps_mf[1]; | |
937 | pps_mf[1] = pps_mf[0]; | |
938 | pps_mf[0] = v_usec; | |
939 | if (pps_mf[0] > pps_mf[1]) { | |
940 | if (pps_mf[1] > pps_mf[2]) { | |
941 | u_usec = pps_mf[1]; /* 0 1 2 */ | |
942 | v_usec = pps_mf[0] - pps_mf[2]; | |
943 | } else if (pps_mf[2] > pps_mf[0]) { | |
944 | u_usec = pps_mf[0]; /* 2 0 1 */ | |
945 | v_usec = pps_mf[2] - pps_mf[1]; | |
946 | } else { | |
947 | u_usec = pps_mf[2]; /* 0 2 1 */ | |
948 | v_usec = pps_mf[0] - pps_mf[1]; | |
949 | } | |
950 | } else { | |
951 | if (pps_mf[1] < pps_mf[2]) { | |
952 | u_usec = pps_mf[1]; /* 2 1 0 */ | |
953 | v_usec = pps_mf[2] - pps_mf[0]; | |
954 | } else if (pps_mf[2] < pps_mf[0]) { | |
955 | u_usec = pps_mf[0]; /* 1 0 2 */ | |
956 | v_usec = pps_mf[1] - pps_mf[2]; | |
957 | } else { | |
958 | u_usec = pps_mf[2]; /* 1 2 0 */ | |
959 | v_usec = pps_mf[1] - pps_mf[0]; | |
960 | } | |
961 | } | |
962 | ||
963 | /* | |
964 | * Here the dispersion average is updated. If it is less than | |
965 | * the threshold pps_dispmax, the frequency average is updated | |
966 | * as well, but clamped to the tolerance. | |
967 | */ | |
968 | v_usec = (v_usec >> 1) - ntp_pll.disp; | |
969 | if (v_usec < 0) | |
970 | ntp_pll.disp -= -v_usec >> PPS_AVG; | |
971 | else | |
972 | ntp_pll.disp += v_usec >> PPS_AVG; | |
973 | if (ntp_pll.disp > pps_dispmax) { | |
974 | ntp_pll.discnt++; | |
975 | return; | |
976 | } | |
977 | if (u_usec < 0) { | |
978 | ntp_pll.ybar -= -u_usec >> PPS_AVG; | |
979 | if (ntp_pll.ybar < -ntp_pll.tolerance) | |
980 | ntp_pll.ybar = -ntp_pll.tolerance; | |
981 | u_usec = -u_usec; | |
982 | } else { | |
983 | ntp_pll.ybar += u_usec >> PPS_AVG; | |
984 | if (ntp_pll.ybar > ntp_pll.tolerance) | |
985 | ntp_pll.ybar = ntp_pll.tolerance; | |
986 | } | |
987 | ||
988 | /* | |
989 | * Here the calibration interval is adjusted. If the maximum | |
990 | * time difference is greater than tick/4, reduce the interval | |
991 | * by half. If this is not the case for four consecutive | |
992 | * intervals, double the interval. | |
993 | */ | |
994 | if (u_usec << ntp_pll.shift > bigtick >> 2) { | |
995 | ntp_pll.intcnt = 0; | |
996 | if (ntp_pll.shift > NTP_PLL.SHIFT) { | |
997 | ntp_pll.shift--; | |
998 | pps_dispinc <<= 1; | |
999 | } | |
1000 | } else if (ntp_pll.intcnt >= 4) { | |
1001 | ntp_pll.intcnt = 0; | |
1002 | if (ntp_pll.shift < NTP_PLL.SHIFTMAX) { | |
1003 | ntp_pll.shift++; | |
1004 | pps_dispinc >>= 1; | |
1005 | } | |
1006 | } else | |
1007 | ntp_pll.intcnt++; | |
1008 | } | |
1009 | #endif /* PPS_SYNC */ |