added my responsibility for the `cpm' port
[unix-history] / sys / kern / kern_clock.c
CommitLineData
15637ed4
RG
1/*-
2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
600f7f07 33 * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91
1b0bf0f1 34 * $Id: kern_clock.c,v 1.15 1994/04/02 08:39:20 davidg Exp $
15637ed4
RG
35 */
36
9e85cc83
GW
37/* Portions of this software are covered by the following: */
38/******************************************************************************
39 * *
40 * Copyright (c) David L. Mills 1993, 1994 *
41 * *
42 * Permission to use, copy, modify, and distribute this software and its *
43 * documentation for any purpose and without fee is hereby granted, provided *
44 * that the above copyright notice appears in all copies and that both the *
45 * copyright notice and this permission notice appear in supporting *
46 * documentation, and that the name University of Delaware not be used in *
47 * advertising or publicity pertaining to distribution of the software *
48 * without specific, written prior permission. The University of Delaware *
49 * makes no representations about the suitability this software for any *
50 * purpose. It is provided "as is" without express or implied warranty. *
51 * *
52 *****************************************************************************/
53
54
15637ed4
RG
55#include "param.h"
56#include "systm.h"
57#include "dkstat.h"
58#include "callout.h"
59#include "kernel.h"
60#include "proc.h"
fde1aeb2 61#include "signalvar.h"
15637ed4 62#include "resourcevar.h"
9e85cc83 63#include "timex.h"
15637ed4
RG
64
65#include "machine/cpu.h"
66
a702c214
NW
67#include "resource.h"
68#include "vm/vm.h"
69
15637ed4
RG
70#ifdef GPROF
71#include "gprof.h"
72#endif
73
4c45483e
GW
74static void gatherstats(clockframe *);
75
bbc3f849
GW
76/* From callout.h */
77struct callout *callfree, *callout, calltodo;
78int ncallout;
79
15637ed4
RG
80/*
81 * Clock handling routines.
82 *
83 * This code is written to operate with two timers which run
84 * independently of each other. The main clock, running at hz
85 * times per second, is used to do scheduling and timeout calculations.
86 * The second timer does resource utilization estimation statistically
87 * based on the state of the machine phz times a second. Both functions
88 * can be performed by a single clock (ie hz == phz), however the
89 * statistics will be much more prone to errors. Ideally a machine
90 * would have separate clocks measuring time spent in user state, system
91 * state, interrupt state, and idle state. These clocks would allow a non-
92 * approximate measure of resource utilization.
93 */
94
95/*
96 * TODO:
97 * time of day, system/user timing, timeouts, profiling on separate timers
98 * allocate more timeout table slots when table overflows.
99 */
100
101/*
102 * Bump a timeval by a small number of usec's.
103 */
104#define BUMPTIME(t, usec) { \
105 register struct timeval *tp = (t); \
106 \
107 tp->tv_usec += (usec); \
108 if (tp->tv_usec >= 1000000) { \
109 tp->tv_usec -= 1000000; \
110 tp->tv_sec++; \
111 } \
112}
113
9e85cc83
GW
114/*
115 * Phase-lock loop (PLL) definitions
116 *
9e85cc83 117 * The following variables are read and set by the ntp_adjtime() system
1b0bf0f1
GW
118 * call.
119 *
120 * time_state shows the state of the system clock, with values defined
121 * in the timex.h header file.
122 *
123 * time_status shows the status of the system clock, with bits defined
124 * in the timex.h header file.
125 *
126 * time_offset is used by the PLL to adjust the system time in small
127 * increments.
128 *
129 * time_constant determines the bandwidth or "stiffness" of the PLL.
130 *
131 * time_tolerance determines maximum frequency error or tolerance of the
132 * CPU clock oscillator and is a property of the architecture; however,
133 * in principle it could change as result of the presence of external
134 * discipline signals, for instance.
135 *
136 * time_precision is usually equal to the kernel tick variable; however,
137 * in cases where a precision clock counter or external clock is
138 * available, the resolution can be much less than this and depend on
139 * whether the external clock is working or not.
140 *
141 * time_maxerror is initialized by a ntp_adjtime() call and increased by
142 * the kernel once each second to reflect the maximum error
143 * bound growth.
144 *
145 * time_esterror is set and read by the ntp_adjtime() call, but
146 * otherwise not used by the kernel.
9e85cc83 147 */
1b0bf0f1
GW
148int time_status = STA_UNSYNC; /* clock status bits */
149int time_state = TIME_OK; /* clock state */
150long time_offset = 0; /* time offset (us) */
9e85cc83
GW
151long time_constant = 0; /* pll time constant */
152long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */
153long time_precision = 1; /* clock precision (us) */
154long time_maxerror = MAXPHASE; /* maximum error (us) */
155long time_esterror = MAXPHASE; /* estimated error (us) */
9e85cc83
GW
156
157/*
158 * The following variables establish the state of the PLL and the
1b0bf0f1
GW
159 * residual time and frequency offset of the local clock. The scale
160 * factors are defined in the timex.h header file.
161 *
162 * time_phase and time_freq are the phase increment and the frequency
163 * increment, respectively, of the kernel time variable at each tick of
164 * the clock.
165 *
166 * time_freq is set via ntp_adjtime() from a value stored in a file when
167 * the synchronization daemon is first started. Its value is retrieved
168 * via ntp_adjtime() and written to the file about once per hour by the
169 * daemon.
170 *
171 * time_adj is the adjustment added to the value of tick at each timer
172 * interrupt and is recomputed at each timer interrupt.
173 *
174 * time_reftime is the second's portion of the system time on the last
175 * call to ntp_adjtime(). It is used to adjust the time_freq variable
176 * and to increase the time_maxerror as the time since last update
177 * increases.
9e85cc83
GW
178 */
179long time_phase = 0; /* phase offset (scaled us) */
1b0bf0f1 180long time_freq = 0; /* frequency offset (scaled ppm) */
9e85cc83 181long time_adj = 0; /* tick adjust (scaled 1 / hz) */
1b0bf0f1 182long time_reftime = 0; /* time at last adjustment (s) */
9e85cc83
GW
183
184#ifdef PPS_SYNC
185/*
1b0bf0f1
GW
186 * The following variables are used only if the if the kernel PPS
187 * discipline code is configured (PPS_SYNC). The scale factors are
188 * defined in the timex.h header file.
189 *
190 * pps_time contains the time at each calibration interval, as read by
191 * microtime().
192 *
193 * pps_offset is the time offset produced by the time median filter
194 * pps_tf[], while pps_jitter is the dispersion measured by this
195 * filter.
196 *
197 * pps_freq is the frequency offset produced by the frequency median
198 * filter pps_ff[], while pps_stabil is the dispersion measured by
199 * this filter.
200 *
201 * pps_usec is latched from a high resolution counter or external clock
202 * at pps_time. Here we want the hardware counter contents only, not the
203 * contents plus the time_tv.usec as usual.
204 *
205 * pps_valid counts the number of seconds since the last PPS update. It
206 * is used as a watchdog timer to disable the PPS discipline should the
207 * PPS signal be lost.
208 *
209 * pps_glitch counts the number of seconds since the beginning of an
210 * offset burst more than tick/2 from current nominal offset. It is used
211 * mainly to suppress error bursts due to priority conflicts between the
212 * PPS interrupt and timer interrupt.
213 *
214 * pps_count counts the seconds of the calibration interval, the
215 * duration of which is pps_shift in powers of two.
216 *
217 * pps_intcnt counts the calibration intervals for use in the interval-
218 * adaptation algorithm. It's just too complicated for words.
9e85cc83
GW
219 */
220struct timeval pps_time; /* kernel time at last interval */
1b0bf0f1
GW
221long pps_offset = 0; /* pps time offset (us) */
222long pps_jitter = MAXTIME; /* pps time dispersion (jitter) (us) */
223long pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */
224long pps_freq = 0; /* frequency offset (scaled ppm) */
225long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
226long pps_ff[] = {0, 0, 0}; /* frequency offset median filter */
227long pps_usec = 0; /* microsec counter at last interval */
228long pps_valid = PPS_VALID; /* pps signal watchdog counter */
229int pps_glitch = 0; /* pps signal glitch counter */
9e85cc83 230int pps_count = 0; /* calibration interval counter (s) */
9e85cc83 231int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
9e85cc83
GW
232int pps_intcnt = 0; /* intervals at current duration */
233
234/*
235 * PPS signal quality monitors
1b0bf0f1
GW
236 *
237 * pps_jitcnt counts the seconds that have been discarded because the
238 * jitter measured by the time median filter exceeds the limit MAXTIME
239 * (100 us).
240 *
241 * pps_calcnt counts the frequency calibration intervals, which are
242 * variable from 4 s to 256 s.
243 *
244 * pps_errcnt counts the calibration intervals which have been discarded
245 * because the wander exceeds the limit MAXFREQ (100 ppm) or where the
246 * calibration interval jitter exceeds two ticks.
247 *
248 * pps_stbcnt counts the calibration intervals that have been discarded
249 * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us).
9e85cc83 250 */
1b0bf0f1
GW
251long pps_jitcnt = 0; /* jitter limit exceeded */
252long pps_calcnt = 0; /* calibration intervals */
253long pps_errcnt = 0; /* calibration errors */
254long pps_stbcnt = 0; /* stability limit exceeded */
9e85cc83
GW
255#endif /* PPS_SYNC */
256
1b0bf0f1
GW
257/* XXX none of this stuff works under FreeBSD */
258#ifdef EXT_CLOCK
259/*
260 * External clock definitions
261 *
262 * The following definitions and declarations are used only if an
263 * external clock (HIGHBALL or TPRO) is configured on the system.
264 */
265#define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */
266
267/*
268 * The clock_count variable is set to CLOCK_INTERVAL at each PPS
269 * interrupt and decremented once each second.
270 */
271int clock_count = 0; /* CPU clock counter */
272
273#ifdef HIGHBALL
274/*
275 * The clock_offset and clock_cpu variables are used by the HIGHBALL
276 * interface. The clock_offset variable defines the offset between
277 * system time and the HIGBALL counters. The clock_cpu variable contains
278 * the offset between the system clock and the HIGHBALL clock for use in
279 * disciplining the kernel time variable.
280 */
281extern struct timeval clock_offset; /* Highball clock offset */
282long clock_cpu = 0; /* CPU clock adjust */
283#endif /* HIGHBALL */
284#endif /* EXT_CLOCK */
9e85cc83
GW
285
286/*
287 * hardupdate() - local clock update
288 *
289 * This routine is called by ntp_adjtime() to update the local clock
290 * phase and frequency. This is used to implement an adaptive-parameter,
1b0bf0f1
GW
291 * first-order, type-II phase-lock loop. The code computes new time and
292 * frequency offsets each time it is called. The hardclock() routine
293 * amortizes these offsets at each tick interrupt. If the kernel PPS
294 * discipline code is configured (PPS_SYNC), the PPS signal itself
295 * determines the new time offset, instead of the calling argument.
296 * Presumably, calls to ntp_adjtime() occur only when the caller
297 * believes the local clock is valid within some bound (+-128 ms with
298 * NTP). If the caller's time is far different than the PPS time, an
299 * argument will ensue, and it's not clear who will lose.
9e85cc83
GW
300 *
301 * For default SHIFT_UPDATE = 12, the offset is limited to +-512 ms, the
302 * maximum interval between updates is 4096 s and the maximum frequency
303 * offset is +-31.25 ms/s.
1b0bf0f1
GW
304 *
305 * Note: splclock() is in effect.
9e85cc83
GW
306 */
307void
308hardupdate(offset)
309 long offset;
310{
1b0bf0f1 311 long ltemp, mtemp;
9e85cc83 312
1b0bf0f1
GW
313 if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME))
314 return;
315 ltemp = offset;
316#ifdef PPS_SYNC
317 if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
318 ltemp = pps_offset;
319#endif /* PPS_SYNC */
320 if (ltemp > MAXPHASE)
321 time_offset = MAXPHASE << SHIFT_UPDATE;
322 else if (ltemp < -MAXPHASE)
323 time_offset = -(MAXPHASE << SHIFT_UPDATE);
9e85cc83 324 else
1b0bf0f1 325 time_offset = ltemp << SHIFT_UPDATE;
9e85cc83
GW
326 mtemp = time.tv_sec - time_reftime;
327 time_reftime = time.tv_sec;
328 if (mtemp > MAXSEC)
329 mtemp = 0;
330
331 /* ugly multiply should be replaced */
1b0bf0f1
GW
332 if (ltemp < 0)
333 time_freq -= (-ltemp * mtemp) >> (time_constant +
334 time_constant + SHIFT_KF - SHIFT_USEC);
9e85cc83 335 else
1b0bf0f1
GW
336 time_freq += (ltemp * mtemp) >> (time_constant +
337 time_constant + SHIFT_KF - SHIFT_USEC);
338 if (time_freq > time_tolerance)
339 time_freq = time_tolerance;
340 else if (time_freq < -time_tolerance)
341 time_freq = -time_tolerance;
9e85cc83
GW
342}
343
1b0bf0f1
GW
344
345
15637ed4
RG
346/*
347 * The hz hardware interval timer.
348 * We update the events relating to real time.
349 * If this timer is also being used to gather statistics,
350 * we run through the statistics gathering routine as well.
351 */
4c45483e 352void
15637ed4
RG
353hardclock(frame)
354 clockframe frame;
355{
356 register struct callout *p1;
357 register struct proc *p = curproc;
4c45483e 358 register struct pstats *pstats = 0;
a702c214
NW
359 register struct rusage *ru;
360 register struct vmspace *vm;
15637ed4
RG
361 register int s;
362 int needsoft = 0;
363 extern int tickdelta;
364 extern long timedelta;
9e85cc83 365 long ltemp, time_update = 0;
15637ed4
RG
366
367 /*
368 * Update real-time timeout queue.
369 * At front of queue are some number of events which are ``due''.
370 * The time to these is <= 0 and if negative represents the
371 * number of ticks which have passed since it was supposed to happen.
372 * The rest of the q elements (times > 0) are events yet to happen,
373 * where the time for each is given as a delta from the previous.
374 * Decrementing just the first of these serves to decrement the time
375 * to all events.
376 */
377 p1 = calltodo.c_next;
378 while (p1) {
379 if (--p1->c_time > 0)
380 break;
381 needsoft = 1;
382 if (p1->c_time == 0)
383 break;
384 p1 = p1->c_next;
385 }
386
387 /*
388 * Curproc (now in p) is null if no process is running.
389 * We assume that curproc is set in user mode!
390 */
391 if (p)
392 pstats = p->p_stats;
393 /*
394 * Charge the time out based on the mode the cpu is in.
395 * Here again we fudge for the lack of proper interval timers
396 * assuming that the current state has been around at least
397 * one tick.
398 */
399 if (CLKF_USERMODE(&frame)) {
400 if (pstats->p_prof.pr_scale)
401 needsoft = 1;
402 /*
403 * CPU was in user state. Increment
404 * user time counter, and process process-virtual time
405 * interval timer.
406 */
407 BUMPTIME(&p->p_utime, tick);
408 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
409 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
410 psignal(p, SIGVTALRM);
411 } else {
412 /*
413 * CPU was in system state.
414 */
415 if (p)
416 BUMPTIME(&p->p_stime, tick);
417 }
418
a702c214
NW
419 /* bump the resource usage of integral space use */
420 if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) {
d2ca7c11
DG
421 ru->ru_ixrss += vm->vm_tsize * NBPG / 1024;
422 ru->ru_idrss += vm->vm_dsize * NBPG / 1024;
423 ru->ru_isrss += vm->vm_ssize * NBPG / 1024;
424 if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) >
425 ru->ru_maxrss) {
426 ru->ru_maxrss =
427 vm->vm_pmap.pm_stats.resident_count * NBPG / 1024;
428 }
a702c214
NW
429 }
430
15637ed4
RG
431 /*
432 * If the cpu is currently scheduled to a process, then
433 * charge it with resource utilization for a tick, updating
434 * statistics which run in (user+system) virtual time,
435 * such as the cpu time limit and profiling timers.
436 * This assumes that the current process has been running
437 * the entire last tick.
438 */
439 if (p) {
440 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) >
441 p->p_rlimit[RLIMIT_CPU].rlim_cur) {
442 psignal(p, SIGXCPU);
443 if (p->p_rlimit[RLIMIT_CPU].rlim_cur <
444 p->p_rlimit[RLIMIT_CPU].rlim_max)
445 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5;
446 }
447 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
448 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
449 psignal(p, SIGPROF);
450
451 /*
452 * We adjust the priority of the current process.
453 * The priority of a process gets worse as it accumulates
454 * CPU time. The cpu usage estimator (p_cpu) is increased here
455 * and the formula for computing priorities (in kern_synch.c)
456 * will compute a different value each time the p_cpu increases
457 * by 4. The cpu usage estimator ramps up quite quickly when
458 * the process is running (linearly), and decays away
459 * exponentially, * at a rate which is proportionally slower
460 * when the system is busy. The basic principal is that the
461 * system will 90% forget that a process used a lot of CPU
462 * time in 5*loadav seconds. This causes the system to favor
463 * processes which haven't run much recently, and to
464 * round-robin among other processes.
465 */
466 p->p_cpticks++;
467 if (++p->p_cpu == 0)
468 p->p_cpu--;
469 if ((p->p_cpu&3) == 0) {
470 setpri(p);
471 if (p->p_pri >= PUSER)
472 p->p_pri = p->p_usrpri;
473 }
474 }
475
476 /*
477 * If the alternate clock has not made itself known then
478 * we must gather the statistics.
479 */
480 if (phz == 0)
481 gatherstats(&frame);
482
483 /*
484 * Increment the time-of-day, and schedule
485 * processing of the callouts at a very low cpu priority,
486 * so we don't keep the relatively high clock interrupt
487 * priority any longer than necessary.
488 */
9e85cc83 489 {
1b0bf0f1 490 int time_update;
9e85cc83 491 if (timedelta == 0) {
1b0bf0f1 492 time_update = tick;
15637ed4 493 } else {
9e85cc83 494 if (timedelta < 0) {
1b0bf0f1 495 time_update = tick - tickdelta;
9e85cc83
GW
496 timedelta += tickdelta;
497 } else {
1b0bf0f1 498 time_update = tick + tickdelta;
9e85cc83
GW
499 timedelta -= tickdelta;
500 }
501 }
502 /*
1b0bf0f1
GW
503 * Compute the phase adjustment. If the low-order bits
504 * (time_phase) of the update overflow, bump the high-order bits
505 * (time_update).
9e85cc83
GW
506 */
507 time_phase += time_adj;
1b0bf0f1
GW
508 if (time_phase <= -FINEUSEC) {
509 ltemp = -time_phase >> SHIFT_SCALE;
510 time_phase += ltemp << SHIFT_SCALE;
511 time_update -= ltemp;
512 }
513 else if (time_phase >= FINEUSEC) {
514 ltemp = time_phase >> SHIFT_SCALE;
515 time_phase -= ltemp << SHIFT_SCALE;
516 time_update += ltemp;
9e85cc83
GW
517 }
518
1b0bf0f1 519 time.tv_usec += time_update;
9e85cc83
GW
520 /*
521 * On rollover of the second the phase adjustment to be used for
522 * the next second is calculated. Also, the maximum error is
523 * increased by the tolerance. If the PPS frequency discipline
524 * code is present, the phase is increased to compensate for the
525 * CPU clock oscillator frequency error.
526 *
527 * With SHIFT_SCALE = 23, the maximum frequency adjustment is
528 * +-256 us per tick, or 25.6 ms/s at a clock frequency of 100
529 * Hz. The time contribution is shifted right a minimum of two
530 * bits, while the frequency contribution is a right shift.
531 * Thus, overflow is prevented if the frequency contribution is
532 * limited to half the maximum or 15.625 ms/s.
533 */
534 if (time.tv_usec >= 1000000) {
1b0bf0f1
GW
535 time.tv_usec -= 1000000;
536 time.tv_sec++;
537 time_maxerror += time_tolerance >> SHIFT_USEC;
538 if (time_offset < 0) {
539 ltemp = -time_offset >>
540 (SHIFT_KG + time_constant);
541 time_offset += ltemp;
542 time_adj = -ltemp <<
543 (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
544 } else {
545 ltemp = time_offset >>
546 (SHIFT_KG + time_constant);
547 time_offset -= ltemp;
548 time_adj = ltemp <<
549 (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
550 }
9e85cc83 551#ifdef PPS_SYNC
1b0bf0f1
GW
552 /*
553 * Gnaw on the watchdog counter and update the frequency
554 * computed by the pll and the PPS signal.
555 */
556 pps_valid++;
557 if (pps_valid == PPS_VALID) {
558 pps_jitter = MAXTIME;
559 pps_stabil = MAXFREQ;
560 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
561 STA_PPSWANDER | STA_PPSERROR);
562 }
563 ltemp = time_freq + pps_freq;
9e85cc83 564#else
1b0bf0f1 565 ltemp = time_freq;
9e85cc83 566#endif /* PPS_SYNC */
1b0bf0f1
GW
567 if (ltemp < 0)
568 time_adj -= -ltemp >>
569 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
570 else
571 time_adj += ltemp >>
572 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
573
574 /*
575 * When the CPU clock oscillator frequency is not a
576 * power of two in Hz, the SHIFT_HZ is only an
577 * approximate scale factor. In the SunOS kernel, this
578 * results in a PLL gain factor of 1/1.28 = 0.78 what it
579 * should be. In the following code the overall gain is
580 * increased by a factor of 1.25, which results in a
581 * residual error less than 3 percent.
582 */
583 /* Same thing applies for FreeBSD --GAW */
584 if (hz == 100) {
585 if (time_adj < 0)
586 time_adj -= -time_adj >> 2;
587 else
588 time_adj += time_adj >> 2;
589 }
590
591 /* XXX - this is really bogus, but can't be fixed until
592 xntpd's idea of the system clock is fixed to know how
593 the user wants leap seconds handled; in the mean time,
594 we assume that users of NTP are running without proper
595 leap second support (this is now the default anyway) */
596 /*
597 * Leap second processing. If in leap-insert state at
598 * the end of the day, the system clock is set back one
599 * second; if in leap-delete state, the system clock is
600 * set ahead one second. The microtime() routine or
601 * external clock driver will insure that reported time
602 * is always monotonic. The ugly divides should be
603 * replaced.
604 */
605 switch (time_state) {
606
607 case TIME_OK:
608 if (time_status & STA_INS)
609 time_state = TIME_INS;
610 else if (time_status & STA_DEL)
611 time_state = TIME_DEL;
612 break;
613
614 case TIME_INS:
615 if (time.tv_sec % 86400 == 0) {
616 time.tv_sec--;
617 time_state = TIME_OOP;
618 }
619 break;
620
621 case TIME_DEL:
622 if ((time.tv_sec + 1) % 86400 == 0) {
623 time.tv_sec++;
624 time_state = TIME_WAIT;
625 }
626 break;
627
628 case TIME_OOP:
629 time_state = TIME_WAIT;
630 break;
631
632 case TIME_WAIT:
633 if (!(time_status & (STA_INS | STA_DEL)))
634 time_state = TIME_OK;
635 }
15637ed4 636 }
15637ed4 637 }
15637ed4 638 if (needsoft) {
15637ed4
RG
639 if (CLKF_BASEPRI(&frame)) {
640 /*
641 * Save the overhead of a software interrupt;
642 * it will happen as soon as we return, so do it now.
643 */
644 (void) splsoftclock();
1c7454b0 645 softclock(CLKF_USERMODE(&frame));
15637ed4 646 } else
15637ed4
RG
647 setsoftclock();
648 }
649}
650
651int dk_ndrive = DK_NDRIVE;
652/*
653 * Gather statistics on resource utilization.
654 *
655 * We make a gross assumption: that the system has been in the
656 * state it is in (user state, kernel state, interrupt state,
657 * or idle state) for the entire last time interval, and
658 * update statistics accordingly.
659 */
4c45483e 660void
15637ed4
RG
661gatherstats(framep)
662 clockframe *framep;
663{
664 register int cpstate, s;
665
666 /*
667 * Determine what state the cpu is in.
668 */
669 if (CLKF_USERMODE(framep)) {
670 /*
671 * CPU was in user state.
672 */
673 if (curproc->p_nice > NZERO)
674 cpstate = CP_NICE;
675 else
676 cpstate = CP_USER;
677 } else {
678 /*
679 * CPU was in system state. If profiling kernel
680 * increment a counter. If no process is running
681 * then this is a system tick if we were running
682 * at a non-zero IPL (in a driver). If a process is running,
683 * then we charge it with system time even if we were
684 * at a non-zero IPL, since the system often runs
685 * this way during processing of system calls.
686 * This is approximate, but the lack of true interval
687 * timers makes doing anything else difficult.
688 */
689 cpstate = CP_SYS;
690 if (curproc == NULL && CLKF_BASEPRI(framep))
691 cpstate = CP_IDLE;
1c7454b0 692#if defined(GPROF) && !defined(GUPROF)
15637ed4 693 s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc;
a76689a9 694 if (profiling < 2 && s < s_textsize)
15637ed4
RG
695 kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
696#endif
697 }
698 /*
699 * We maintain statistics shown by user-level statistics
700 * programs: the amount of time in each cpu state, and
701 * the amount of time each of DK_NDRIVE ``drives'' is busy.
702 */
703 cp_time[cpstate]++;
704 for (s = 0; s < DK_NDRIVE; s++)
705 if (dk_busy&(1<<s))
706 dk_time[s]++;
707}
708
709/*
710 * Software priority level clock interrupt.
711 * Run periodic events from timeout queue.
712 */
4c45483e 713void
1c7454b0
DG
714softclock(usermode)
715 int usermode;
15637ed4
RG
716{
717
718 for (;;) {
719 register struct callout *p1;
720 register caddr_t arg;
4c45483e 721 register timeout_func_t func;
15637ed4
RG
722 register int a, s;
723
724 s = splhigh();
725 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
726 splx(s);
727 break;
728 }
729 arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
730 calltodo.c_next = p1->c_next;
731 p1->c_next = callfree;
732 callfree = p1;
733 splx(s);
734 (*func)(arg, a);
735 }
736
737 /*
738 * If no process to work with, we're finished.
739 */
740 if (curproc == 0) return;
741
742 /*
743 * If trapped user-mode and profiling, give it
744 * a profiling tick.
745 */
1c7454b0 746 if (usermode) {
15637ed4
RG
747 register struct proc *p = curproc;
748
749 if (p->p_stats->p_prof.pr_scale)
1c7454b0 750 profile_tick(p, unused was &frame);
15637ed4
RG
751 /*
752 * Check to see if process has accumulated
753 * more than 10 minutes of user time. If so
754 * reduce priority to give others a chance.
755 */
756 if (p->p_ucred->cr_uid && p->p_nice == NZERO &&
757 p->p_utime.tv_sec > 10 * 60) {
758 p->p_nice = NZERO + 4;
759 setpri(p);
760 p->p_pri = p->p_usrpri;
761 }
762 }
763}
764
765/*
766 * Arrange that (*func)(arg) is called in t/hz seconds.
767 */
4c45483e 768void
15637ed4 769timeout(func, arg, t)
4c45483e 770 timeout_func_t func;
15637ed4
RG
771 caddr_t arg;
772 register int t;
773{
774 register struct callout *p1, *p2, *pnew;
775 register int s = splhigh();
776
777 if (t <= 0)
778 t = 1;
779 pnew = callfree;
780 if (pnew == NULL)
781 panic("timeout table overflow");
782 callfree = pnew->c_next;
783 pnew->c_arg = arg;
784 pnew->c_func = func;
785 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
786 if (p2->c_time > 0)
787 t -= p2->c_time;
788 p1->c_next = pnew;
789 pnew->c_next = p2;
790 pnew->c_time = t;
791 if (p2)
792 p2->c_time -= t;
793 splx(s);
794}
795
796/*
797 * untimeout is called to remove a function timeout call
798 * from the callout structure.
799 */
4c45483e 800void
15637ed4 801untimeout(func, arg)
4c45483e 802 timeout_func_t func;
15637ed4
RG
803 caddr_t arg;
804{
805 register struct callout *p1, *p2;
806 register int s;
807
808 s = splhigh();
809 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
810 if (p2->c_func == func && p2->c_arg == arg) {
811 if (p2->c_next && p2->c_time > 0)
812 p2->c_next->c_time += p2->c_time;
813 p1->c_next = p2->c_next;
814 p2->c_next = callfree;
815 callfree = p2;
816 break;
817 }
818 }
819 splx(s);
820}
821
822/*
823 * Compute number of hz until specified time.
824 * Used to compute third argument to timeout() from an
825 * absolute time.
826 */
c95f3c5b
AC
827
828/* XXX clock_t */
4c45483e 829u_long
15637ed4
RG
830hzto(tv)
831 struct timeval *tv;
832{
c95f3c5b 833 register unsigned long ticks;
15637ed4 834 register long sec;
c95f3c5b
AC
835 register long usec;
836 int s;
15637ed4
RG
837
838 /*
c95f3c5b
AC
839 * If the number of usecs in the whole seconds part of the time
840 * difference fits in a long, then the total number of usecs will
841 * fit in an unsigned long. Compute the total and convert it to
842 * ticks, rounding up and adding 1 to allow for the current tick
843 * to expire. Rounding also depends on unsigned long arithmetic
844 * to avoid overflow.
845 *
846 * Otherwise, if the number of ticks in the whole seconds part of
847 * the time difference fits in a long, then convert the parts to
848 * ticks separately and add, using similar rounding methods and
849 * overflow avoidance. This method would work in the previous
850 * case but it is slightly slower and assumes that hz is integral.
15637ed4 851 *
c95f3c5b
AC
852 * Otherwise, round the time difference down to the maximum
853 * representable value.
854 *
855 * Maximum value for any timeout in 10ms ticks is 248 days.
15637ed4 856 */
c95f3c5b 857 s = splhigh();
15637ed4 858 sec = tv->tv_sec - time.tv_sec;
c95f3c5b 859 usec = tv->tv_usec - time.tv_usec;
da6d4c06 860 splx(s);
c95f3c5b
AC
861 if (usec < 0) {
862 sec--;
863 usec += 1000000;
864 }
865 if (sec < 0) {
90945f8b 866#ifdef DIAGNOSTIC
c95f3c5b
AC
867 printf("hzto: negative time difference %ld sec %ld usec\n",
868 sec, usec);
869#endif
870 ticks = 1;
871 } else if (sec <= LONG_MAX / 1000000)
872 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
873 / tick + 1;
874 else if (sec <= LONG_MAX / hz)
875 ticks = sec * hz
876 + ((unsigned long)usec + (tick - 1)) / tick + 1;
877 else
878 ticks = LONG_MAX;
879#define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */
880 if (ticks > CLOCK_T_MAX)
881 ticks = CLOCK_T_MAX;
15637ed4
RG
882 return (ticks);
883}
9e85cc83
GW
884
885#ifdef PPS_SYNC
886/*
887 * hardpps() - discipline CPU clock oscillator to external pps signal
888 *
889 * This routine is called at each PPS interrupt in order to discipline
890 * the CPU clock oscillator to the PPS signal. It integrates successive
891 * phase differences between the two oscillators and calculates the
892 * frequency offset. This is used in hardclock() to discipline the CPU
893 * clock oscillator so that intrinsic frequency error is cancelled out.
894 * The code requires the caller to capture the time and hardware
895 * counter value at the designated PPS signal transition.
896 */
897void
898hardpps(tvp, usec)
899 struct timeval *tvp; /* time at PPS */
900 long usec; /* hardware counter at PPS */
901{
902 long u_usec, v_usec, bigtick;
903 long cal_sec, cal_usec;
904
905 /*
906 * During the calibration interval adjust the starting time when
907 * the tick overflows. At the end of the interval compute the
908 * duration of the interval and the difference of the hardware
909 * counters at the beginning and end of the interval. This code
910 * is deliciously complicated by the fact valid differences may
911 * exceed the value of tick when using long calibration
912 * intervals and small ticks. Note that the counter can be
913 * greater than tick if caught at just the wrong instant, but
914 * the values returned and used here are correct.
915 */
916 bigtick = (long)tick << SHIFT_USEC;
917 pps_usec -= ntp_pll.ybar;
918 if (pps_usec >= bigtick)
919 pps_usec -= bigtick;
920 if (pps_usec < 0)
921 pps_usec += bigtick;
922 pps_time.tv_sec++;
923 pps_count++;
924 if (pps_count < (1 << pps_shift))
925 return;
926 pps_count = 0;
927 ntp_pll.calcnt++;
928 u_usec = usec << SHIFT_USEC;
929 v_usec = pps_usec - u_usec;
930 if (v_usec >= bigtick >> 1)
931 v_usec -= bigtick;
932 if (v_usec < -(bigtick >> 1))
933 v_usec += bigtick;
934 if (v_usec < 0)
935 v_usec = -(-v_usec >> ntp_pll.shift);
936 else
937 v_usec = v_usec >> ntp_pll.shift;
938 pps_usec = u_usec;
939 cal_sec = tvp->tv_sec;
940 cal_usec = tvp->tv_usec;
941 cal_sec -= pps_time.tv_sec;
942 cal_usec -= pps_time.tv_usec;
943 if (cal_usec < 0) {
944 cal_usec += 1000000;
945 cal_sec--;
946 }
947 pps_time = *tvp;
948
949 /*
950 * Check for lost interrupts, noise, excessive jitter and
951 * excessive frequency error. The number of timer ticks during
952 * the interval may vary +-1 tick. Add to this a margin of one
953 * tick for the PPS signal jitter and maximum frequency
954 * deviation. If the limits are exceeded, the calibration
955 * interval is reset to the minimum and we start over.
956 */
957 u_usec = (long)tick << 1;
958 if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec))
959 || (cal_sec == 0 && cal_usec < u_usec))
960 || v_usec > ntp_pll.tolerance || v_usec < -ntp_pll.tolerance) {
961 ntp_pll.jitcnt++;
962 ntp_pll.shift = NTP_PLL.SHIFT;
963 pps_dispinc = PPS_DISPINC;
964 ntp_pll.intcnt = 0;
965 return;
966 }
967
968 /*
969 * A three-stage median filter is used to help deglitch the pps
970 * signal. The median sample becomes the offset estimate; the
971 * difference between the other two samples becomes the
972 * dispersion estimate.
973 */
974 pps_mf[2] = pps_mf[1];
975 pps_mf[1] = pps_mf[0];
976 pps_mf[0] = v_usec;
977 if (pps_mf[0] > pps_mf[1]) {
978 if (pps_mf[1] > pps_mf[2]) {
979 u_usec = pps_mf[1]; /* 0 1 2 */
980 v_usec = pps_mf[0] - pps_mf[2];
981 } else if (pps_mf[2] > pps_mf[0]) {
982 u_usec = pps_mf[0]; /* 2 0 1 */
983 v_usec = pps_mf[2] - pps_mf[1];
984 } else {
985 u_usec = pps_mf[2]; /* 0 2 1 */
986 v_usec = pps_mf[0] - pps_mf[1];
987 }
988 } else {
989 if (pps_mf[1] < pps_mf[2]) {
990 u_usec = pps_mf[1]; /* 2 1 0 */
991 v_usec = pps_mf[2] - pps_mf[0];
992 } else if (pps_mf[2] < pps_mf[0]) {
993 u_usec = pps_mf[0]; /* 1 0 2 */
994 v_usec = pps_mf[1] - pps_mf[2];
995 } else {
996 u_usec = pps_mf[2]; /* 1 2 0 */
997 v_usec = pps_mf[1] - pps_mf[0];
998 }
999 }
1000
1001 /*
1002 * Here the dispersion average is updated. If it is less than
1003 * the threshold pps_dispmax, the frequency average is updated
1004 * as well, but clamped to the tolerance.
1005 */
1006 v_usec = (v_usec >> 1) - ntp_pll.disp;
1007 if (v_usec < 0)
1008 ntp_pll.disp -= -v_usec >> PPS_AVG;
1009 else
1010 ntp_pll.disp += v_usec >> PPS_AVG;
1011 if (ntp_pll.disp > pps_dispmax) {
1012 ntp_pll.discnt++;
1013 return;
1014 }
1015 if (u_usec < 0) {
1016 ntp_pll.ybar -= -u_usec >> PPS_AVG;
1017 if (ntp_pll.ybar < -ntp_pll.tolerance)
1018 ntp_pll.ybar = -ntp_pll.tolerance;
1019 u_usec = -u_usec;
1020 } else {
1021 ntp_pll.ybar += u_usec >> PPS_AVG;
1022 if (ntp_pll.ybar > ntp_pll.tolerance)
1023 ntp_pll.ybar = ntp_pll.tolerance;
1024 }
1025
1026 /*
1027 * Here the calibration interval is adjusted. If the maximum
1028 * time difference is greater than tick/4, reduce the interval
1029 * by half. If this is not the case for four consecutive
1030 * intervals, double the interval.
1031 */
1032 if (u_usec << ntp_pll.shift > bigtick >> 2) {
1033 ntp_pll.intcnt = 0;
1034 if (ntp_pll.shift > NTP_PLL.SHIFT) {
1035 ntp_pll.shift--;
1036 pps_dispinc <<= 1;
1037 }
1038 } else if (ntp_pll.intcnt >= 4) {
1039 ntp_pll.intcnt = 0;
1040 if (ntp_pll.shift < NTP_PLL.SHIFTMAX) {
1041 ntp_pll.shift++;
1042 pps_dispinc >>= 1;
1043 }
1044 } else
1045 ntp_pll.intcnt++;
1046}
1047#endif /* PPS_SYNC */