git.subgeniuskitty.com - unix-history/.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*-
	2	* Copyright (c) 1982, 1986, 1991 The Regents of the University of California.
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*
	33	* from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91
	34	* $Id: kern_clock.c,v 1.12 1994/03/01 23:21:44 phk Exp $
	35	*/
	36
	37	/* Portions of this software are covered by the following: */
	38	/******************************************************************************
	39	* *
	40	* Copyright (c) David L. Mills 1993, 1994 *
	41	* *
	42	* Permission to use, copy, modify, and distribute this software and its *
	43	* documentation for any purpose and without fee is hereby granted, provided *
	44	* that the above copyright notice appears in all copies and that both the *
	45	* copyright notice and this permission notice appear in supporting *
	46	* documentation, and that the name University of Delaware not be used in *
	47	* advertising or publicity pertaining to distribution of the software *
	48	* without specific, written prior permission. The University of Delaware *
	49	* makes no representations about the suitability this software for any *
	50	* purpose. It is provided "as is" without express or implied warranty. *
	51	* *
	52	*****************************************************************************/
	53
	54
	55	#include "param.h"
	56	#include "systm.h"
	57	#include "dkstat.h"
	58	#include "callout.h"
	59	#include "kernel.h"
	60	#include "proc.h"
	61	#include "signalvar.h"
	62	#include "resourcevar.h"
	63	#include "timex.h"
	64
	65	#include "machine/cpu.h"
	66
	67	#include "resource.h"
	68	#include "vm/vm.h"
	69
	70	#ifdef GPROF
	71	#include "gprof.h"
	72	#endif
	73
	74	static void gatherstats(clockframe *);
	75
	76	/* From callout.h */
	77	struct callout callfree, callout, calltodo;
	78	int ncallout;
	79
	80	/*
	81	* Clock handling routines.
	82	*
	83	* This code is written to operate with two timers which run
	84	* independently of each other. The main clock, running at hz
	85	* times per second, is used to do scheduling and timeout calculations.
	86	* The second timer does resource utilization estimation statistically
	87	* based on the state of the machine phz times a second. Both functions
	88	* can be performed by a single clock (ie hz == phz), however the
	89	* statistics will be much more prone to errors. Ideally a machine
	90	* would have separate clocks measuring time spent in user state, system
	91	* state, interrupt state, and idle state. These clocks would allow a non-
	92	* approximate measure of resource utilization.
	93	*/
	94
	95	/*
	96	* TODO:
	97	* time of day, system/user timing, timeouts, profiling on separate timers
	98	* allocate more timeout table slots when table overflows.
	99	*/
	100
	101	/*
	102	* Bump a timeval by a small number of usec's.
	103	*/
	104	#define BUMPTIME(t, usec) { \
	105	register struct timeval *tp = (t); \
	106	\
	107	tp->tv_usec += (usec); \
	108	if (tp->tv_usec >= 1000000) { \
	109	tp->tv_usec -= 1000000; \
	110	tp->tv_sec++; \
	111	} \
	112	}
	113
	114	/*
	115	* Phase-lock loop (PLL) definitions
	116	*
	117	* The following defines establish the performance envelope of the PLL.
	118	* They specify the maximum phase error (MAXPHASE), maximum frequency
	119	* error (MAXFREQ), minimum interval between updates (MINSEC) and
	120	* maximum interval between updates (MAXSEC). The intent of these bounds
	121	* is to force the PLL to operate within predefined limits in order to
	122	* satisfy correctness assertions. An excursion which exceeds these
	123	* bounds is clamped to the bound and operation proceeds accordingly. In
	124	* practice, this can occur only if something has failed or is operating
	125	* out of tolerance, but otherwise the PLL continues to operate in a
	126	* stable mode.
	127	*
	128	* MAXPHASE must be set greater than or equal to CLOCK.MAX (128 ms), as
	129	* defined in the NTP specification. CLOCK.MAX establishes the maximum
	130	* time offset allowed before the system time is reset, rather than
	131	* incrementally adjusted. Here, the maximum offset is clamped to
	132	* MAXPHASE only in order to prevent overflow errors due to defective
	133	* protocol implementations.
	134	*
	135	* MAXFREQ reflects the manufacturing frequency tolerance of the CPU
	136	* clock oscillator plus the maximum slew rate allowed by the protocol.
	137	* It should be set to at least the frequency tolerance of the
	138	* oscillator plus 100 ppm for vernier frequency adjustments. If the
	139	* kernel frequency discipline code is installed (PPS_SYNC), the CPU
	140	* oscillator frequency is disciplined to an external source, presumably
	141	* with negligible frequency error, and MAXFREQ can be reduced.
	142	*/
	143	#define MAXPHASE 512000L /* max phase error (us) */
	144	#ifdef PPS_SYNC
	145	#define MAXFREQ (100L << SHIFT_USEC) /* max freq error (scaled ppm) */
	146	#else
	147	#define MAXFREQ (200L << SHIFT_USEC) /* max freq error (scaled ppm) */
	148	#endif /* PPS_SYNC */
	149	#define MINSEC 16L /* min interval between updates (s) */
	150	#define MAXSEC 1200L /* max interval between updates (s) */
	151
	152	/*
	153	* The following variables are read and set by the ntp_adjtime() system
	154	* call. The ntp_pll.status variable defines the synchronization status of
	155	* the system clock, with codes defined in the timex.h header file. The
	156	* time_offset variable is used by the PLL to adjust the system time in
	157	* small increments. The time_constant variable determines the bandwidth
	158	* or "stiffness" of the PLL. The time_tolerance variable is the maximum
	159	* frequency error or tolerance of the CPU clock oscillator and is a
	160	* property of the architecture; however, in principle it could change
	161	* as result of the presence of external discipline signals, for
	162	* instance. The time_precision variable is usually equal to the kernel
	163	* tick variable; however, in cases where a precision clock counter or
	164	* external clock is available, the resolution can be much less than
	165	* this and depend on whether the external clock is working or not. The
	166	* time_maxerror variable is initialized by a ntp_adjtime() call and
	167	* increased by the kernel once each second to reflect the maximum error
	168	* bound growth. The time_esterror variable is set and read by the
	169	* ntp_adjtime() call, but otherwise not used by the kernel.
	170	*/
	171	/* - use appropriate fields in ntp_pll instead */
	172	#if 0
	173	int ntp_pll.status = TIME_BAD; /* clock synchronization status */
	174	long time_offset = 0; /* time adjustment (us) */
	175	long time_constant = 0; /* pll time constant */
	176	long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */
	177	long time_precision = 1; /* clock precision (us) */
	178	long time_maxerror = MAXPHASE; /* maximum error (us) */
	179	long time_esterror = MAXPHASE; /* estimated error (us) */
	180	#endif
	181
	182	/*
	183	* The following variables establish the state of the PLL and the
	184	* residual time and frequency offset of the local clock. The time_phase
	185	* variable is the phase increment and the ntp_pll.frequency variable is the
	186	* frequency increment of the kernel time variable at each tick of the
	187	* clock. The ntp_pll.frequency variable is set via ntp_adjtime() from a value
	188	* stored in a file when the synchronization daemon is first started.
	189	* Its value is retrieved via ntp_adjtime() and written to the file
	190	* about once per hour by the daemon. The time_adj variable is the
	191	* adjustment added to the value of tick at each timer interrupt and is
	192	* recomputed at each timer interrupt. The time_reftime variable is the
	193	* second's portion of the system time on the last call to
	194	* ntp_adjtime(). It is used to adjust the ntp_pll.frequency variable and to
	195	* increase the time_maxerror as the time since last update increases.
	196	* The scale factors are defined in the timex.h header file.
	197	*/
	198	long time_phase = 0; /* phase offset (scaled us) */
	199	#if 0
	200	long ntp_pll.frequency = 0; /* frequency offset (scaled ppm) */
	201	#endif
	202	long time_adj = 0; /* tick adjust (scaled 1 / hz) */
	203	long time_reftime; /* time at last adjustment (s) */
	204
	205	#ifdef PPS_SYNC
	206	/*
	207	* The following defines and declarations are used only if a pulse-per-
	208	* second (PPS) signal is available and connected via a modem control
	209	* lead, such as produced by the optional ppsclock feature incorporated
	210	* in the asynch driver. They establish the design parameters of the PPS
	211	* frequency-lock loop used to discipline the CPU clock oscillator to
	212	* the PPS signal. PPS_AVG is the averaging factor for the frequency
	213	* loop. PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum
	214	* intervals, respectively, in seconds as a power of two. The
	215	* PPS_DISPINC is the initial increment to pps_disp at each second.
	216	*/
	217	#define PPS_AVG 2 /* pps averaging constant (shift) */
	218	#define PPS_SHIFT 2 /* min interval duration (s) (shift) */
	219	#define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */
	220	#define PPS_DISPINC 0L /* dispersion increment (us/s) */
	221
	222	/*
	223	* The pps_time variable contains the time at each calibration as read
	224	* by microtime(). The pps_usec variable is latched from a high
	225	* resolution counter or external clock at pps_time. Here we want the
	226	* hardware counter contents only, not the contents plus the
	227	* time_tv.usec as usual. The pps_ybar variable is the current CPU
	228	* oscillator frequency offset estimate relative to the PPS signal. The
	229	* pps_disp variable is the current error estimate, which is increased
	230	* pps_dispinc once each second. Frequency updates are permitted only
	231	* when pps_disp is below the pps_dispmax threshold. The pps-mf[] array
	232	* is used as a median filter for the frequency estimate and to derive
	233	* the error estimate.
	234	*/
	235	struct timeval pps_time; /* kernel time at last interval */
	236	long pps_usec = 0; /* usec counter at last interval */
	237	#if 0
	238	long pps_ybar = 0; /* frequency estimate (scaled ppm) */
	239	long pps_disp = MAXFREQ; /* dispersion estimate (scaled ppm) */
	240	#endif
	241	long pps_dispmax = MAXFREQ / 2; /* dispersion threshold */
	242	long pps_dispinc = PPS_DISPINC; /* pps dispersion increment/sec */
	243	long pps_mf[] = {0, 0, 0}; /* pps median filter */
	244
	245	/*
	246	* The pps_count variable counts the seconds of the calibration
	247	* interval, the duration of which is pps_shift (s) in powers of two.
	248	* The pps_intcnt variable counts the calibration intervals for use in
	249	* the interval-adaptation algorithm. It's just too complicated for
	250	* words.
	251	*/
	252	int pps_count = 0; /* calibration interval counter (s) */
	253	#if 0
	254	int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
	255	#endif
	256	int pps_intcnt = 0; /* intervals at current duration */
	257
	258	/*
	259	* PPS signal quality monitors
	260	*/
	261	#if 0
	262	long pps_calcnt; /* calibration intervals */
	263	long pps_jitcnt; /* jitter limit exceeded */
	264	long pps_discnt; /* dispersion limit exceeded */
	265	#endif
	266	#endif /* PPS_SYNC */
	267
	268	struct timex ntp_pll = {
	269	0, /* mode */
	270	0, /* offset */
	271	0, /* frequency */
	272	MAXPHASE, /* maxerror */
	273	MAXPHASE, /* esterror */
	274	TIME_BAD, /* status */
	275	0, /* time_constant */
	276	1, /* precision */
	277	MAXFREQ, /* tolerance */
	278	0, /* ybar */
	279	#ifdef PPS_SYNC
	280	MAXFREQ, /* disp */
	281	PPS_SHIFT, /* shift */
	282	0, /* calcnt */
	283	0, /* jitcnt */
	284	0 /* discnt */
	285	#endif
	286	};
	287
	288	/*
	289	* hardupdate() - local clock update
	290	*
	291	* This routine is called by ntp_adjtime() to update the local clock
	292	* phase and frequency. This is used to implement an adaptive-parameter,
	293	* first-order, type-II phase-lock loop. The code computes the time
	294	* since the last update and clamps to a maximum (for robustness). Then
	295	* it multiplies by the offset (sorry about the ugly multiply), scales
	296	* by the time constant, and adds to the frequency variable. Then, it
	297	* computes the phase variable as the offset scaled by the time
	298	* constant. Note that all shifts are assumed to be positive. Only
	299	* enough error checking is done to prevent bizarre behavior due to
	300	* overflow problems.
	301	*
	302	* For default SHIFT_UPDATE = 12, the offset is limited to +-512 ms, the
	303	* maximum interval between updates is 4096 s and the maximum frequency
	304	* offset is +-31.25 ms/s.
	305	*/
	306	void
	307	hardupdate(offset)
	308	long offset;
	309	{
	310	long mtemp;
	311
	312	if (offset > MAXPHASE)
	313	ntp_pll.offset = MAXPHASE << SHIFT_UPDATE;
	314	else if (offset < -MAXPHASE)
	315	ntp_pll.offset = -(MAXPHASE << SHIFT_UPDATE);
	316	else
	317	ntp_pll.offset = offset << SHIFT_UPDATE;
	318	mtemp = time.tv_sec - time_reftime;
	319	time_reftime = time.tv_sec;
	320	if (mtemp > MAXSEC)
	321	mtemp = 0;
	322
	323	/* ugly multiply should be replaced */
	324	if (offset < 0)
	325	ntp_pll.frequency -=
	326	(-offset * mtemp) >> (ntp_pll.time_constant
	327	+ ntp_pll.time_constant
	328	+ SHIFT_KF
	329	- SHIFT_USEC);
	330	else
	331	ntp_pll.frequency +=
	332	(offset * mtemp) >> (ntp_pll.time_constant
	333	+ ntp_pll.time_constant
	334	+ SHIFT_KF
	335	- SHIFT_USEC);
	336	if (ntp_pll.frequency > ntp_pll.tolerance)
	337	ntp_pll.frequency = ntp_pll.tolerance;
	338	else if (ntp_pll.frequency < -ntp_pll.tolerance)
	339	ntp_pll.frequency = -ntp_pll.tolerance;
	340	if (ntp_pll.status == TIME_BAD)
	341	ntp_pll.status = TIME_OK;
	342	}
	343
	344	/*
	345	* The hz hardware interval timer.
	346	* We update the events relating to real time.
	347	* If this timer is also being used to gather statistics,
	348	* we run through the statistics gathering routine as well.
	349	*/
	350	void
	351	hardclock(frame)
	352	clockframe frame;
	353	{
	354	register struct callout *p1;
	355	register struct proc *p = curproc;
	356	register struct pstats *pstats = 0;
	357	register struct rusage *ru;
	358	register struct vmspace *vm;
	359	register int s;
	360	int needsoft = 0;
	361	extern int tickdelta;
	362	extern long timedelta;
	363	long ltemp, time_update = 0;
	364
	365	/*
	366	* Update real-time timeout queue.
	367	* At front of queue are some number of events which are ``due''.
	368	* The time to these is <= 0 and if negative represents the
	369	* number of ticks which have passed since it was supposed to happen.
	370	* The rest of the q elements (times > 0) are events yet to happen,
	371	* where the time for each is given as a delta from the previous.
	372	* Decrementing just the first of these serves to decrement the time
	373	* to all events.
	374	*/
	375	p1 = calltodo.c_next;
	376	while (p1) {
	377	if (--p1->c_time > 0)
	378	break;
	379	needsoft = 1;
	380	if (p1->c_time == 0)
	381	break;
	382	p1 = p1->c_next;
	383	}
	384
	385	/*
	386	* Curproc (now in p) is null if no process is running.
	387	* We assume that curproc is set in user mode!
	388	*/
	389	if (p)
	390	pstats = p->p_stats;
	391	/*
	392	* Charge the time out based on the mode the cpu is in.
	393	* Here again we fudge for the lack of proper interval timers
	394	* assuming that the current state has been around at least
	395	* one tick.
	396	*/
	397	if (CLKF_USERMODE(&frame)) {
	398	if (pstats->p_prof.pr_scale)
	399	needsoft = 1;
	400	/*
	401	* CPU was in user state. Increment
	402	* user time counter, and process process-virtual time
	403	* interval timer.
	404	*/
	405	BUMPTIME(&p->p_utime, tick);
	406	if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
	407	itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
	408	psignal(p, SIGVTALRM);
	409	} else {
	410	/*
	411	* CPU was in system state.
	412	*/
	413	if (p)
	414	BUMPTIME(&p->p_stime, tick);
	415	}
	416
	417	/* bump the resource usage of integral space use */
	418	if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) {
	419	ru->ru_ixrss += vm->vm_tsize * NBPG / 1024;
	420	ru->ru_idrss += vm->vm_dsize * NBPG / 1024;
	421	ru->ru_isrss += vm->vm_ssize * NBPG / 1024;
	422	if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) >
	423	ru->ru_maxrss) {
	424	ru->ru_maxrss =
	425	vm->vm_pmap.pm_stats.resident_count * NBPG / 1024;
	426	}
	427	}
	428
	429	/*
	430	* If the cpu is currently scheduled to a process, then
	431	* charge it with resource utilization for a tick, updating
	432	* statistics which run in (user+system) virtual time,
	433	* such as the cpu time limit and profiling timers.
	434	* This assumes that the current process has been running
	435	* the entire last tick.
	436	*/
	437	if (p) {
	438	if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) >
	439	p->p_rlimit[RLIMIT_CPU].rlim_cur) {
	440	psignal(p, SIGXCPU);
	441	if (p->p_rlimit[RLIMIT_CPU].rlim_cur <
	442	p->p_rlimit[RLIMIT_CPU].rlim_max)
	443	p->p_rlimit[RLIMIT_CPU].rlim_cur += 5;
	444	}
	445	if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
	446	itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
	447	psignal(p, SIGPROF);
	448
	449	/*
	450	* We adjust the priority of the current process.
	451	* The priority of a process gets worse as it accumulates
	452	* CPU time. The cpu usage estimator (p_cpu) is increased here
	453	* and the formula for computing priorities (in kern_synch.c)
	454	* will compute a different value each time the p_cpu increases
	455	* by 4. The cpu usage estimator ramps up quite quickly when
	456	* the process is running (linearly), and decays away
	457	* exponentially, * at a rate which is proportionally slower
	458	* when the system is busy. The basic principal is that the
	459	* system will 90% forget that a process used a lot of CPU
	460	* time in 5*loadav seconds. This causes the system to favor
	461	* processes which haven't run much recently, and to
	462	* round-robin among other processes.
	463	*/
	464	p->p_cpticks++;
	465	if (++p->p_cpu == 0)
	466	p->p_cpu--;
	467	if ((p->p_cpu&3) == 0) {
	468	setpri(p);
	469	if (p->p_pri >= PUSER)
	470	p->p_pri = p->p_usrpri;
	471	}
	472	}
	473
	474	/*
	475	* If the alternate clock has not made itself known then
	476	* we must gather the statistics.
	477	*/
	478	if (phz == 0)
	479	gatherstats(&frame);
	480
	481	/*
	482	* Increment the time-of-day, and schedule
	483	* processing of the callouts at a very low cpu priority,
	484	* so we don't keep the relatively high clock interrupt
	485	* priority any longer than necessary.
	486	*/
	487	{
	488	int delta;
	489	if (timedelta == 0) {
	490	delta = tick;
	491	} else {
	492	if (timedelta < 0) {
	493	delta = tick - tickdelta;
	494	timedelta += tickdelta;
	495	} else {
	496	delta = tick + tickdelta;
	497	timedelta -= tickdelta;
	498	}
	499	}
	500	/*
	501	* Logic from ``Precision Time and Frequency Synchronization
	502	* Using Modified Kernels'' by David L. Mills, University
	503	* of Delaware.
	504	*/
	505	time_phase += time_adj;
	506	if(time_phase <= -FINEUSEC) {
	507	ltemp = -time_phase >> SHIFT_SCALE;
	508	time_phase += ltemp << SHIFT_SCALE;
	509	time_update -= ltemp;
	510	} else if(time_phase >= FINEUSEC) {
	511	ltemp = time_phase >> SHIFT_SCALE;
	512	time_phase -= ltemp << SHIFT_SCALE;
	513	time_update += ltemp;
	514	}
	515
	516	time.tv_usec += delta + time_update;
	517	/*
	518	* On rollover of the second the phase adjustment to be used for
	519	* the next second is calculated. Also, the maximum error is
	520	* increased by the tolerance. If the PPS frequency discipline
	521	* code is present, the phase is increased to compensate for the
	522	* CPU clock oscillator frequency error.
	523	*
	524	* With SHIFT_SCALE = 23, the maximum frequency adjustment is
	525	* +-256 us per tick, or 25.6 ms/s at a clock frequency of 100
	526	* Hz. The time contribution is shifted right a minimum of two
	527	* bits, while the frequency contribution is a right shift.
	528	* Thus, overflow is prevented if the frequency contribution is
	529	* limited to half the maximum or 15.625 ms/s.
	530	*/
	531	if (time.tv_usec >= 1000000) {
	532	time.tv_usec -= 1000000;
	533	time.tv_sec++;
	534	ntp_pll.maxerror += ntp_pll.tolerance >> SHIFT_USEC;
	535	if (ntp_pll.offset < 0) {
	536	ltemp = -ntp_pll.offset >>
	537	(SHIFT_KG + ntp_pll.time_constant);
	538	ntp_pll.offset += ltemp;
	539	time_adj = -ltemp <<
	540	(SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
	541	} else {
	542	ltemp = ntp_pll.offset >>
	543	(SHIFT_KG + ntp_pll.time_constant);
	544	ntp_pll.offset -= ltemp;
	545	time_adj = ltemp <<
	546	(SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
	547	}
	548	#ifdef PPS_SYNC
	549	/*
	550	* Grow the pps error by pps_dispinc ppm and clamp to
	551	* MAXFREQ. The hardpps() routine will pull it down as
	552	* long as the PPS signal is good.
	553	*/
	554	ntp_pll.disp += pps_dispinc;
	555	if (ntp_pll.disp > MAXFREQ)
	556	ntp_pll.disp = MAXFREQ;
	557	ltemp = ntp_pll.frequency + ntp_pll.ybar;
	558	#else
	559	ltemp = ntp_pll.frequency;
	560	#endif /* PPS_SYNC */
	561	if (ltemp < 0)
	562	time_adj -= -ltemp >>
	563	(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
	564	else
	565	time_adj += ltemp >>
	566	(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
	567	#if 0
	568	time_adj += fixtick << (SHIFT_SCALE - SHIFT_HZ);
	569	#endif
	570
	571	/*
	572	* When the CPU clock oscillator frequency is not a
	573	* power of two in Hz, the SHIFT_HZ is only an
	574	* approximate scale factor. In the SunOS kernel, this
	575	* results in a PLL gain factor of 1/1.28 = 0.78 what it
	576	* should be. In the following code the overall gain is
	577	* increased by a factor of 1.25, which results in a
	578	* residual error less than 3 percent.
	579	*/
	580	if (hz == 100) {
	581	if (time_adj < 0)
	582	time_adj -= -time_adj >> 2;
	583	else
	584	time_adj += time_adj >> 2;
	585	}
	586	}
	587	}
	588
	589	if (needsoft) {
	590	#if 0
	591	/*
	592	* XXX - hardclock runs at splhigh, so the splsoftclock is useless and
	593	* softclock runs at splhigh as well if we do this. It is not much of
	594	* an optimization, since the "software interrupt" is done with a call
	595	* from doreti, and the overhead of checking there is sometimes less
	596	* than checking here. Moreover, the whole %$$%$^ frame is passed by
	597	* value here.
	598	*/
	599	if (CLKF_BASEPRI(&frame)) {
	600	/*
	601	* Save the overhead of a software interrupt;
	602	* it will happen as soon as we return, so do it now.
	603	*/
	604	(void) splsoftclock();
	605	softclock(frame);
	606	} else
	607	#endif
	608	setsoftclock();
	609	}
	610	}
	611
	612	int dk_ndrive = DK_NDRIVE;
	613	/*
	614	* Gather statistics on resource utilization.
	615	*
	616	* We make a gross assumption: that the system has been in the
	617	* state it is in (user state, kernel state, interrupt state,
	618	* or idle state) for the entire last time interval, and
	619	* update statistics accordingly.
	620	*/
	621	void
	622	gatherstats(framep)
	623	clockframe *framep;
	624	{
	625	register int cpstate, s;
	626
	627	/*
	628	* Determine what state the cpu is in.
	629	*/
	630	if (CLKF_USERMODE(framep)) {
	631	/*
	632	* CPU was in user state.
	633	*/
	634	if (curproc->p_nice > NZERO)
	635	cpstate = CP_NICE;
	636	else
	637	cpstate = CP_USER;
	638	} else {
	639	/*
	640	* CPU was in system state. If profiling kernel
	641	* increment a counter. If no process is running
	642	* then this is a system tick if we were running
	643	* at a non-zero IPL (in a driver). If a process is running,
	644	* then we charge it with system time even if we were
	645	* at a non-zero IPL, since the system often runs
	646	* this way during processing of system calls.
	647	* This is approximate, but the lack of true interval
	648	* timers makes doing anything else difficult.
	649	*/
	650	cpstate = CP_SYS;
	651	if (curproc == NULL && CLKF_BASEPRI(framep))
	652	cpstate = CP_IDLE;
	653	#ifdef GPROF
	654	s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc;
	655	if (profiling < 2 && s < s_textsize)
	656	kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
	657	#endif
	658	}
	659	/*
	660	* We maintain statistics shown by user-level statistics
	661	* programs: the amount of time in each cpu state, and
	662	* the amount of time each of DK_NDRIVE ``drives'' is busy.
	663	*/
	664	cp_time[cpstate]++;
	665	for (s = 0; s < DK_NDRIVE; s++)
	666	if (dk_busy&(1<<s))
	667	dk_time[s]++;
	668	}
	669
	670	/*
	671	* Software priority level clock interrupt.
	672	* Run periodic events from timeout queue.
	673	*/
	674	/ARGSUSED/
	675	void
	676	softclock(frame)
	677	clockframe frame;
	678	{
	679
	680	for (;;) {
	681	register struct callout *p1;
	682	register caddr_t arg;
	683	register timeout_func_t func;
	684	register int a, s;
	685
	686	s = splhigh();
	687	if ((p1 = calltodo.c_next) == 0 \|\| p1->c_time > 0) {
	688	splx(s);
	689	break;
	690	}
	691	arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
	692	calltodo.c_next = p1->c_next;
	693	p1->c_next = callfree;
	694	callfree = p1;
	695	splx(s);
	696	(*func)(arg, a);
	697	}
	698
	699	/*
	700	* If no process to work with, we're finished.
	701	*/
	702	if (curproc == 0) return;
	703
	704	/*
	705	* If trapped user-mode and profiling, give it
	706	* a profiling tick.
	707	*/
	708	if (CLKF_USERMODE(&frame)) {
	709	register struct proc *p = curproc;
	710
	711	if (p->p_stats->p_prof.pr_scale)
	712	profile_tick(p, &frame);
	713	/*
	714	* Check to see if process has accumulated
	715	* more than 10 minutes of user time. If so
	716	* reduce priority to give others a chance.
	717	*/
	718	if (p->p_ucred->cr_uid && p->p_nice == NZERO &&
	719	p->p_utime.tv_sec > 10 * 60) {
	720	p->p_nice = NZERO + 4;
	721	setpri(p);
	722	p->p_pri = p->p_usrpri;
	723	}
	724	}
	725	}
	726
	727	/*
	728	* Arrange that (*func)(arg) is called in t/hz seconds.
	729	*/
	730	void
	731	timeout(func, arg, t)
	732	timeout_func_t func;
	733	caddr_t arg;
	734	register int t;
	735	{
	736	register struct callout p1, p2, *pnew;
	737	register int s = splhigh();
	738
	739	if (t <= 0)
	740	t = 1;
	741	pnew = callfree;
	742	if (pnew == NULL)
	743	panic("timeout table overflow");
	744	callfree = pnew->c_next;
	745	pnew->c_arg = arg;
	746	pnew->c_func = func;
	747	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
	748	if (p2->c_time > 0)
	749	t -= p2->c_time;
	750	p1->c_next = pnew;
	751	pnew->c_next = p2;
	752	pnew->c_time = t;
	753	if (p2)
	754	p2->c_time -= t;
	755	splx(s);
	756	}
	757
	758	/*
	759	* untimeout is called to remove a function timeout call
	760	* from the callout structure.
	761	*/
	762	void
	763	untimeout(func, arg)
	764	timeout_func_t func;
	765	caddr_t arg;
	766	{
	767	register struct callout p1, p2;
	768	register int s;
	769
	770	s = splhigh();
	771	for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
	772	if (p2->c_func == func && p2->c_arg == arg) {
	773	if (p2->c_next && p2->c_time > 0)
	774	p2->c_next->c_time += p2->c_time;
	775	p1->c_next = p2->c_next;
	776	p2->c_next = callfree;
	777	callfree = p2;
	778	break;
	779	}
	780	}
	781	splx(s);
	782	}
	783
	784	/*
	785	* Compute number of hz until specified time.
	786	* Used to compute third argument to timeout() from an
	787	* absolute time.
	788	*/
	789
	790	/* XXX clock_t */
	791	u_long
	792	hzto(tv)
	793	struct timeval *tv;
	794	{
	795	register unsigned long ticks;
	796	register long sec;
	797	register long usec;
	798	int s;
	799
	800	/*
	801	* If the number of usecs in the whole seconds part of the time
	802	* difference fits in a long, then the total number of usecs will
	803	* fit in an unsigned long. Compute the total and convert it to
	804	* ticks, rounding up and adding 1 to allow for the current tick
	805	* to expire. Rounding also depends on unsigned long arithmetic
	806	* to avoid overflow.
	807	*
	808	* Otherwise, if the number of ticks in the whole seconds part of
	809	* the time difference fits in a long, then convert the parts to
	810	* ticks separately and add, using similar rounding methods and
	811	* overflow avoidance. This method would work in the previous
	812	* case but it is slightly slower and assumes that hz is integral.
	813	*
	814	* Otherwise, round the time difference down to the maximum
	815	* representable value.
	816	*
	817	* Maximum value for any timeout in 10ms ticks is 248 days.
	818	*/
	819	s = splhigh();
	820	sec = tv->tv_sec - time.tv_sec;
	821	usec = tv->tv_usec - time.tv_usec;
	822	splx(s);
	823	if (usec < 0) {
	824	sec--;
	825	usec += 1000000;
	826	}
	827	if (sec < 0) {
	828	#ifdef DIAGNOSTIC
	829	printf("hzto: negative time difference %ld sec %ld usec\n",
	830	sec, usec);
	831	#endif
	832	ticks = 1;
	833	} else if (sec <= LONG_MAX / 1000000)
	834	ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
	835	/ tick + 1;
	836	else if (sec <= LONG_MAX / hz)
	837	ticks = sec * hz
	838	+ ((unsigned long)usec + (tick - 1)) / tick + 1;
	839	else
	840	ticks = LONG_MAX;
	841	#define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */
	842	if (ticks > CLOCK_T_MAX)
	843	ticks = CLOCK_T_MAX;
	844	return (ticks);
	845	}
	846
	847	#ifdef PPS_SYNC
	848	/*
	849	* hardpps() - discipline CPU clock oscillator to external pps signal
	850	*
	851	* This routine is called at each PPS interrupt in order to discipline
	852	* the CPU clock oscillator to the PPS signal. It integrates successive
	853	* phase differences between the two oscillators and calculates the
	854	* frequency offset. This is used in hardclock() to discipline the CPU
	855	* clock oscillator so that intrinsic frequency error is cancelled out.
	856	* The code requires the caller to capture the time and hardware
	857	* counter value at the designated PPS signal transition.
	858	*/
	859	void
	860	hardpps(tvp, usec)
	861	struct timeval tvp; / time at PPS */
	862	long usec; /* hardware counter at PPS */
	863	{
	864	long u_usec, v_usec, bigtick;
	865	long cal_sec, cal_usec;
	866
	867	/*
	868	* During the calibration interval adjust the starting time when
	869	* the tick overflows. At the end of the interval compute the
	870	* duration of the interval and the difference of the hardware
	871	* counters at the beginning and end of the interval. This code
	872	* is deliciously complicated by the fact valid differences may
	873	* exceed the value of tick when using long calibration
	874	* intervals and small ticks. Note that the counter can be
	875	* greater than tick if caught at just the wrong instant, but
	876	* the values returned and used here are correct.
	877	*/
	878	bigtick = (long)tick << SHIFT_USEC;
	879	pps_usec -= ntp_pll.ybar;
	880	if (pps_usec >= bigtick)
	881	pps_usec -= bigtick;
	882	if (pps_usec < 0)
	883	pps_usec += bigtick;
	884	pps_time.tv_sec++;
	885	pps_count++;
	886	if (pps_count < (1 << pps_shift))
	887	return;
	888	pps_count = 0;
	889	ntp_pll.calcnt++;
	890	u_usec = usec << SHIFT_USEC;
	891	v_usec = pps_usec - u_usec;
	892	if (v_usec >= bigtick >> 1)
	893	v_usec -= bigtick;
	894	if (v_usec < -(bigtick >> 1))
	895	v_usec += bigtick;
	896	if (v_usec < 0)
	897	v_usec = -(-v_usec >> ntp_pll.shift);
	898	else
	899	v_usec = v_usec >> ntp_pll.shift;
	900	pps_usec = u_usec;
	901	cal_sec = tvp->tv_sec;
	902	cal_usec = tvp->tv_usec;
	903	cal_sec -= pps_time.tv_sec;
	904	cal_usec -= pps_time.tv_usec;
	905	if (cal_usec < 0) {
	906	cal_usec += 1000000;
	907	cal_sec--;
	908	}
	909	pps_time = *tvp;
	910
	911	/*
	912	* Check for lost interrupts, noise, excessive jitter and
	913	* excessive frequency error. The number of timer ticks during
	914	* the interval may vary +-1 tick. Add to this a margin of one
	915	* tick for the PPS signal jitter and maximum frequency
	916	* deviation. If the limits are exceeded, the calibration
	917	* interval is reset to the minimum and we start over.
	918	*/
	919	u_usec = (long)tick << 1;
	920	if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec))
	921	\|\| (cal_sec == 0 && cal_usec < u_usec))
	922	\|\| v_usec > ntp_pll.tolerance \|\| v_usec < -ntp_pll.tolerance) {
	923	ntp_pll.jitcnt++;
	924	ntp_pll.shift = NTP_PLL.SHIFT;
	925	pps_dispinc = PPS_DISPINC;
	926	ntp_pll.intcnt = 0;
	927	return;
	928	}
	929
	930	/*
	931	* A three-stage median filter is used to help deglitch the pps
	932	* signal. The median sample becomes the offset estimate; the
	933	* difference between the other two samples becomes the
	934	* dispersion estimate.
	935	*/
	936	pps_mf[2] = pps_mf[1];
	937	pps_mf[1] = pps_mf[0];
	938	pps_mf[0] = v_usec;
	939	if (pps_mf[0] > pps_mf[1]) {
	940	if (pps_mf[1] > pps_mf[2]) {
	941	u_usec = pps_mf[1]; /* 0 1 2 */
	942	v_usec = pps_mf[0] - pps_mf[2];
	943	} else if (pps_mf[2] > pps_mf[0]) {
	944	u_usec = pps_mf[0]; /* 2 0 1 */
	945	v_usec = pps_mf[2] - pps_mf[1];
	946	} else {
	947	u_usec = pps_mf[2]; /* 0 2 1 */
	948	v_usec = pps_mf[0] - pps_mf[1];
	949	}
	950	} else {
	951	if (pps_mf[1] < pps_mf[2]) {
	952	u_usec = pps_mf[1]; /* 2 1 0 */
	953	v_usec = pps_mf[2] - pps_mf[0];
	954	} else if (pps_mf[2] < pps_mf[0]) {
	955	u_usec = pps_mf[0]; /* 1 0 2 */
	956	v_usec = pps_mf[1] - pps_mf[2];
	957	} else {
	958	u_usec = pps_mf[2]; /* 1 2 0 */
	959	v_usec = pps_mf[1] - pps_mf[0];
	960	}
	961	}
	962
	963	/*
	964	* Here the dispersion average is updated. If it is less than
	965	* the threshold pps_dispmax, the frequency average is updated
	966	* as well, but clamped to the tolerance.
	967	*/
	968	v_usec = (v_usec >> 1) - ntp_pll.disp;
	969	if (v_usec < 0)
	970	ntp_pll.disp -= -v_usec >> PPS_AVG;
	971	else
	972	ntp_pll.disp += v_usec >> PPS_AVG;
	973	if (ntp_pll.disp > pps_dispmax) {
	974	ntp_pll.discnt++;
	975	return;
	976	}
	977	if (u_usec < 0) {
	978	ntp_pll.ybar -= -u_usec >> PPS_AVG;
	979	if (ntp_pll.ybar < -ntp_pll.tolerance)
	980	ntp_pll.ybar = -ntp_pll.tolerance;
	981	u_usec = -u_usec;
	982	} else {
	983	ntp_pll.ybar += u_usec >> PPS_AVG;
	984	if (ntp_pll.ybar > ntp_pll.tolerance)
	985	ntp_pll.ybar = ntp_pll.tolerance;
	986	}
	987
	988	/*
	989	* Here the calibration interval is adjusted. If the maximum
	990	* time difference is greater than tick/4, reduce the interval
	991	* by half. If this is not the case for four consecutive
	992	* intervals, double the interval.
	993	*/
	994	if (u_usec << ntp_pll.shift > bigtick >> 2) {
	995	ntp_pll.intcnt = 0;
	996	if (ntp_pll.shift > NTP_PLL.SHIFT) {
	997	ntp_pll.shift--;
	998	pps_dispinc <<= 1;
	999	}
	1000	} else if (ntp_pll.intcnt >= 4) {
	1001	ntp_pll.intcnt = 0;
	1002	if (ntp_pll.shift < NTP_PLL.SHIFTMAX) {
	1003	ntp_pll.shift++;
	1004	pps_dispinc >>= 1;
	1005	}
	1006	} else
	1007	ntp_pll.intcnt++;
	1008	}
	1009	#endif /* PPS_SYNC */