| 1 | /*- |
| 2 | * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in the |
| 12 | * documentation and/or other materials provided with the distribution. |
| 13 | * 3. All advertising materials mentioning features or use of this software |
| 14 | * must display the following acknowledgement: |
| 15 | * This product includes software developed by the University of |
| 16 | * California, Berkeley and its contributors. |
| 17 | * 4. Neither the name of the University nor the names of its contributors |
| 18 | * may be used to endorse or promote products derived from this software |
| 19 | * without specific prior written permission. |
| 20 | * |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 31 | * SUCH DAMAGE. |
| 32 | * |
| 33 | * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91 |
| 34 | * $Id: kern_clock.c,v 1.12 1994/03/01 23:21:44 phk Exp $ |
| 35 | */ |
| 36 | |
| 37 | /* Portions of this software are covered by the following: */ |
| 38 | /****************************************************************************** |
| 39 | * * |
| 40 | * Copyright (c) David L. Mills 1993, 1994 * |
| 41 | * * |
| 42 | * Permission to use, copy, modify, and distribute this software and its * |
| 43 | * documentation for any purpose and without fee is hereby granted, provided * |
| 44 | * that the above copyright notice appears in all copies and that both the * |
| 45 | * copyright notice and this permission notice appear in supporting * |
| 46 | * documentation, and that the name University of Delaware not be used in * |
| 47 | * advertising or publicity pertaining to distribution of the software * |
| 48 | * without specific, written prior permission. The University of Delaware * |
| 49 | * makes no representations about the suitability this software for any * |
| 50 | * purpose. It is provided "as is" without express or implied warranty. * |
| 51 | * * |
| 52 | *****************************************************************************/ |
| 53 | |
| 54 | |
| 55 | #include "param.h" |
| 56 | #include "systm.h" |
| 57 | #include "dkstat.h" |
| 58 | #include "callout.h" |
| 59 | #include "kernel.h" |
| 60 | #include "proc.h" |
| 61 | #include "signalvar.h" |
| 62 | #include "resourcevar.h" |
| 63 | #include "timex.h" |
| 64 | |
| 65 | #include "machine/cpu.h" |
| 66 | |
| 67 | #include "resource.h" |
| 68 | #include "vm/vm.h" |
| 69 | |
| 70 | #ifdef GPROF |
| 71 | #include "gprof.h" |
| 72 | #endif |
| 73 | |
| 74 | static void gatherstats(clockframe *); |
| 75 | |
| 76 | /* From callout.h */ |
| 77 | struct callout *callfree, *callout, calltodo; |
| 78 | int ncallout; |
| 79 | |
| 80 | /* |
| 81 | * Clock handling routines. |
| 82 | * |
| 83 | * This code is written to operate with two timers which run |
| 84 | * independently of each other. The main clock, running at hz |
| 85 | * times per second, is used to do scheduling and timeout calculations. |
| 86 | * The second timer does resource utilization estimation statistically |
| 87 | * based on the state of the machine phz times a second. Both functions |
| 88 | * can be performed by a single clock (ie hz == phz), however the |
| 89 | * statistics will be much more prone to errors. Ideally a machine |
| 90 | * would have separate clocks measuring time spent in user state, system |
| 91 | * state, interrupt state, and idle state. These clocks would allow a non- |
| 92 | * approximate measure of resource utilization. |
| 93 | */ |
| 94 | |
| 95 | /* |
| 96 | * TODO: |
| 97 | * time of day, system/user timing, timeouts, profiling on separate timers |
| 98 | * allocate more timeout table slots when table overflows. |
| 99 | */ |
| 100 | |
| 101 | /* |
| 102 | * Bump a timeval by a small number of usec's. |
| 103 | */ |
| 104 | #define BUMPTIME(t, usec) { \ |
| 105 | register struct timeval *tp = (t); \ |
| 106 | \ |
| 107 | tp->tv_usec += (usec); \ |
| 108 | if (tp->tv_usec >= 1000000) { \ |
| 109 | tp->tv_usec -= 1000000; \ |
| 110 | tp->tv_sec++; \ |
| 111 | } \ |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * Phase-lock loop (PLL) definitions |
| 116 | * |
| 117 | * The following defines establish the performance envelope of the PLL. |
| 118 | * They specify the maximum phase error (MAXPHASE), maximum frequency |
| 119 | * error (MAXFREQ), minimum interval between updates (MINSEC) and |
| 120 | * maximum interval between updates (MAXSEC). The intent of these bounds |
| 121 | * is to force the PLL to operate within predefined limits in order to |
| 122 | * satisfy correctness assertions. An excursion which exceeds these |
| 123 | * bounds is clamped to the bound and operation proceeds accordingly. In |
| 124 | * practice, this can occur only if something has failed or is operating |
| 125 | * out of tolerance, but otherwise the PLL continues to operate in a |
| 126 | * stable mode. |
| 127 | * |
| 128 | * MAXPHASE must be set greater than or equal to CLOCK.MAX (128 ms), as |
| 129 | * defined in the NTP specification. CLOCK.MAX establishes the maximum |
| 130 | * time offset allowed before the system time is reset, rather than |
| 131 | * incrementally adjusted. Here, the maximum offset is clamped to |
| 132 | * MAXPHASE only in order to prevent overflow errors due to defective |
| 133 | * protocol implementations. |
| 134 | * |
| 135 | * MAXFREQ reflects the manufacturing frequency tolerance of the CPU |
| 136 | * clock oscillator plus the maximum slew rate allowed by the protocol. |
| 137 | * It should be set to at least the frequency tolerance of the |
| 138 | * oscillator plus 100 ppm for vernier frequency adjustments. If the |
| 139 | * kernel frequency discipline code is installed (PPS_SYNC), the CPU |
| 140 | * oscillator frequency is disciplined to an external source, presumably |
| 141 | * with negligible frequency error, and MAXFREQ can be reduced. |
| 142 | */ |
| 143 | #define MAXPHASE 512000L /* max phase error (us) */ |
| 144 | #ifdef PPS_SYNC |
| 145 | #define MAXFREQ (100L << SHIFT_USEC) /* max freq error (scaled ppm) */ |
| 146 | #else |
| 147 | #define MAXFREQ (200L << SHIFT_USEC) /* max freq error (scaled ppm) */ |
| 148 | #endif /* PPS_SYNC */ |
| 149 | #define MINSEC 16L /* min interval between updates (s) */ |
| 150 | #define MAXSEC 1200L /* max interval between updates (s) */ |
| 151 | |
| 152 | /* |
| 153 | * The following variables are read and set by the ntp_adjtime() system |
| 154 | * call. The ntp_pll.status variable defines the synchronization status of |
| 155 | * the system clock, with codes defined in the timex.h header file. The |
| 156 | * time_offset variable is used by the PLL to adjust the system time in |
| 157 | * small increments. The time_constant variable determines the bandwidth |
| 158 | * or "stiffness" of the PLL. The time_tolerance variable is the maximum |
| 159 | * frequency error or tolerance of the CPU clock oscillator and is a |
| 160 | * property of the architecture; however, in principle it could change |
| 161 | * as result of the presence of external discipline signals, for |
| 162 | * instance. The time_precision variable is usually equal to the kernel |
| 163 | * tick variable; however, in cases where a precision clock counter or |
| 164 | * external clock is available, the resolution can be much less than |
| 165 | * this and depend on whether the external clock is working or not. The |
| 166 | * time_maxerror variable is initialized by a ntp_adjtime() call and |
| 167 | * increased by the kernel once each second to reflect the maximum error |
| 168 | * bound growth. The time_esterror variable is set and read by the |
| 169 | * ntp_adjtime() call, but otherwise not used by the kernel. |
| 170 | */ |
| 171 | /* - use appropriate fields in ntp_pll instead */ |
| 172 | #if 0 |
| 173 | int ntp_pll.status = TIME_BAD; /* clock synchronization status */ |
| 174 | long time_offset = 0; /* time adjustment (us) */ |
| 175 | long time_constant = 0; /* pll time constant */ |
| 176 | long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ |
| 177 | long time_precision = 1; /* clock precision (us) */ |
| 178 | long time_maxerror = MAXPHASE; /* maximum error (us) */ |
| 179 | long time_esterror = MAXPHASE; /* estimated error (us) */ |
| 180 | #endif |
| 181 | |
| 182 | /* |
| 183 | * The following variables establish the state of the PLL and the |
| 184 | * residual time and frequency offset of the local clock. The time_phase |
| 185 | * variable is the phase increment and the ntp_pll.frequency variable is the |
| 186 | * frequency increment of the kernel time variable at each tick of the |
| 187 | * clock. The ntp_pll.frequency variable is set via ntp_adjtime() from a value |
| 188 | * stored in a file when the synchronization daemon is first started. |
| 189 | * Its value is retrieved via ntp_adjtime() and written to the file |
| 190 | * about once per hour by the daemon. The time_adj variable is the |
| 191 | * adjustment added to the value of tick at each timer interrupt and is |
| 192 | * recomputed at each timer interrupt. The time_reftime variable is the |
| 193 | * second's portion of the system time on the last call to |
| 194 | * ntp_adjtime(). It is used to adjust the ntp_pll.frequency variable and to |
| 195 | * increase the time_maxerror as the time since last update increases. |
| 196 | * The scale factors are defined in the timex.h header file. |
| 197 | */ |
| 198 | long time_phase = 0; /* phase offset (scaled us) */ |
| 199 | #if 0 |
| 200 | long ntp_pll.frequency = 0; /* frequency offset (scaled ppm) */ |
| 201 | #endif |
| 202 | long time_adj = 0; /* tick adjust (scaled 1 / hz) */ |
| 203 | long time_reftime; /* time at last adjustment (s) */ |
| 204 | |
| 205 | #ifdef PPS_SYNC |
| 206 | /* |
| 207 | * The following defines and declarations are used only if a pulse-per- |
| 208 | * second (PPS) signal is available and connected via a modem control |
| 209 | * lead, such as produced by the optional ppsclock feature incorporated |
| 210 | * in the asynch driver. They establish the design parameters of the PPS |
| 211 | * frequency-lock loop used to discipline the CPU clock oscillator to |
| 212 | * the PPS signal. PPS_AVG is the averaging factor for the frequency |
| 213 | * loop. PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum |
| 214 | * intervals, respectively, in seconds as a power of two. The |
| 215 | * PPS_DISPINC is the initial increment to pps_disp at each second. |
| 216 | */ |
| 217 | #define PPS_AVG 2 /* pps averaging constant (shift) */ |
| 218 | #define PPS_SHIFT 2 /* min interval duration (s) (shift) */ |
| 219 | #define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */ |
| 220 | #define PPS_DISPINC 0L /* dispersion increment (us/s) */ |
| 221 | |
| 222 | /* |
| 223 | * The pps_time variable contains the time at each calibration as read |
| 224 | * by microtime(). The pps_usec variable is latched from a high |
| 225 | * resolution counter or external clock at pps_time. Here we want the |
| 226 | * hardware counter contents only, not the contents plus the |
| 227 | * time_tv.usec as usual. The pps_ybar variable is the current CPU |
| 228 | * oscillator frequency offset estimate relative to the PPS signal. The |
| 229 | * pps_disp variable is the current error estimate, which is increased |
| 230 | * pps_dispinc once each second. Frequency updates are permitted only |
| 231 | * when pps_disp is below the pps_dispmax threshold. The pps-mf[] array |
| 232 | * is used as a median filter for the frequency estimate and to derive |
| 233 | * the error estimate. |
| 234 | */ |
| 235 | struct timeval pps_time; /* kernel time at last interval */ |
| 236 | long pps_usec = 0; /* usec counter at last interval */ |
| 237 | #if 0 |
| 238 | long pps_ybar = 0; /* frequency estimate (scaled ppm) */ |
| 239 | long pps_disp = MAXFREQ; /* dispersion estimate (scaled ppm) */ |
| 240 | #endif |
| 241 | long pps_dispmax = MAXFREQ / 2; /* dispersion threshold */ |
| 242 | long pps_dispinc = PPS_DISPINC; /* pps dispersion increment/sec */ |
| 243 | long pps_mf[] = {0, 0, 0}; /* pps median filter */ |
| 244 | |
| 245 | /* |
| 246 | * The pps_count variable counts the seconds of the calibration |
| 247 | * interval, the duration of which is pps_shift (s) in powers of two. |
| 248 | * The pps_intcnt variable counts the calibration intervals for use in |
| 249 | * the interval-adaptation algorithm. It's just too complicated for |
| 250 | * words. |
| 251 | */ |
| 252 | int pps_count = 0; /* calibration interval counter (s) */ |
| 253 | #if 0 |
| 254 | int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ |
| 255 | #endif |
| 256 | int pps_intcnt = 0; /* intervals at current duration */ |
| 257 | |
| 258 | /* |
| 259 | * PPS signal quality monitors |
| 260 | */ |
| 261 | #if 0 |
| 262 | long pps_calcnt; /* calibration intervals */ |
| 263 | long pps_jitcnt; /* jitter limit exceeded */ |
| 264 | long pps_discnt; /* dispersion limit exceeded */ |
| 265 | #endif |
| 266 | #endif /* PPS_SYNC */ |
| 267 | |
| 268 | struct timex ntp_pll = { |
| 269 | 0, /* mode */ |
| 270 | 0, /* offset */ |
| 271 | 0, /* frequency */ |
| 272 | MAXPHASE, /* maxerror */ |
| 273 | MAXPHASE, /* esterror */ |
| 274 | TIME_BAD, /* status */ |
| 275 | 0, /* time_constant */ |
| 276 | 1, /* precision */ |
| 277 | MAXFREQ, /* tolerance */ |
| 278 | 0, /* ybar */ |
| 279 | #ifdef PPS_SYNC |
| 280 | MAXFREQ, /* disp */ |
| 281 | PPS_SHIFT, /* shift */ |
| 282 | 0, /* calcnt */ |
| 283 | 0, /* jitcnt */ |
| 284 | 0 /* discnt */ |
| 285 | #endif |
| 286 | }; |
| 287 | |
| 288 | /* |
| 289 | * hardupdate() - local clock update |
| 290 | * |
| 291 | * This routine is called by ntp_adjtime() to update the local clock |
| 292 | * phase and frequency. This is used to implement an adaptive-parameter, |
| 293 | * first-order, type-II phase-lock loop. The code computes the time |
| 294 | * since the last update and clamps to a maximum (for robustness). Then |
| 295 | * it multiplies by the offset (sorry about the ugly multiply), scales |
| 296 | * by the time constant, and adds to the frequency variable. Then, it |
| 297 | * computes the phase variable as the offset scaled by the time |
| 298 | * constant. Note that all shifts are assumed to be positive. Only |
| 299 | * enough error checking is done to prevent bizarre behavior due to |
| 300 | * overflow problems. |
| 301 | * |
| 302 | * For default SHIFT_UPDATE = 12, the offset is limited to +-512 ms, the |
| 303 | * maximum interval between updates is 4096 s and the maximum frequency |
| 304 | * offset is +-31.25 ms/s. |
| 305 | */ |
| 306 | void |
| 307 | hardupdate(offset) |
| 308 | long offset; |
| 309 | { |
| 310 | long mtemp; |
| 311 | |
| 312 | if (offset > MAXPHASE) |
| 313 | ntp_pll.offset = MAXPHASE << SHIFT_UPDATE; |
| 314 | else if (offset < -MAXPHASE) |
| 315 | ntp_pll.offset = -(MAXPHASE << SHIFT_UPDATE); |
| 316 | else |
| 317 | ntp_pll.offset = offset << SHIFT_UPDATE; |
| 318 | mtemp = time.tv_sec - time_reftime; |
| 319 | time_reftime = time.tv_sec; |
| 320 | if (mtemp > MAXSEC) |
| 321 | mtemp = 0; |
| 322 | |
| 323 | /* ugly multiply should be replaced */ |
| 324 | if (offset < 0) |
| 325 | ntp_pll.frequency -= |
| 326 | (-offset * mtemp) >> (ntp_pll.time_constant |
| 327 | + ntp_pll.time_constant |
| 328 | + SHIFT_KF |
| 329 | - SHIFT_USEC); |
| 330 | else |
| 331 | ntp_pll.frequency += |
| 332 | (offset * mtemp) >> (ntp_pll.time_constant |
| 333 | + ntp_pll.time_constant |
| 334 | + SHIFT_KF |
| 335 | - SHIFT_USEC); |
| 336 | if (ntp_pll.frequency > ntp_pll.tolerance) |
| 337 | ntp_pll.frequency = ntp_pll.tolerance; |
| 338 | else if (ntp_pll.frequency < -ntp_pll.tolerance) |
| 339 | ntp_pll.frequency = -ntp_pll.tolerance; |
| 340 | if (ntp_pll.status == TIME_BAD) |
| 341 | ntp_pll.status = TIME_OK; |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * The hz hardware interval timer. |
| 346 | * We update the events relating to real time. |
| 347 | * If this timer is also being used to gather statistics, |
| 348 | * we run through the statistics gathering routine as well. |
| 349 | */ |
| 350 | void |
| 351 | hardclock(frame) |
| 352 | clockframe frame; |
| 353 | { |
| 354 | register struct callout *p1; |
| 355 | register struct proc *p = curproc; |
| 356 | register struct pstats *pstats = 0; |
| 357 | register struct rusage *ru; |
| 358 | register struct vmspace *vm; |
| 359 | register int s; |
| 360 | int needsoft = 0; |
| 361 | extern int tickdelta; |
| 362 | extern long timedelta; |
| 363 | long ltemp, time_update = 0; |
| 364 | |
| 365 | /* |
| 366 | * Update real-time timeout queue. |
| 367 | * At front of queue are some number of events which are ``due''. |
| 368 | * The time to these is <= 0 and if negative represents the |
| 369 | * number of ticks which have passed since it was supposed to happen. |
| 370 | * The rest of the q elements (times > 0) are events yet to happen, |
| 371 | * where the time for each is given as a delta from the previous. |
| 372 | * Decrementing just the first of these serves to decrement the time |
| 373 | * to all events. |
| 374 | */ |
| 375 | p1 = calltodo.c_next; |
| 376 | while (p1) { |
| 377 | if (--p1->c_time > 0) |
| 378 | break; |
| 379 | needsoft = 1; |
| 380 | if (p1->c_time == 0) |
| 381 | break; |
| 382 | p1 = p1->c_next; |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * Curproc (now in p) is null if no process is running. |
| 387 | * We assume that curproc is set in user mode! |
| 388 | */ |
| 389 | if (p) |
| 390 | pstats = p->p_stats; |
| 391 | /* |
| 392 | * Charge the time out based on the mode the cpu is in. |
| 393 | * Here again we fudge for the lack of proper interval timers |
| 394 | * assuming that the current state has been around at least |
| 395 | * one tick. |
| 396 | */ |
| 397 | if (CLKF_USERMODE(&frame)) { |
| 398 | if (pstats->p_prof.pr_scale) |
| 399 | needsoft = 1; |
| 400 | /* |
| 401 | * CPU was in user state. Increment |
| 402 | * user time counter, and process process-virtual time |
| 403 | * interval timer. |
| 404 | */ |
| 405 | BUMPTIME(&p->p_utime, tick); |
| 406 | if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && |
| 407 | itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) |
| 408 | psignal(p, SIGVTALRM); |
| 409 | } else { |
| 410 | /* |
| 411 | * CPU was in system state. |
| 412 | */ |
| 413 | if (p) |
| 414 | BUMPTIME(&p->p_stime, tick); |
| 415 | } |
| 416 | |
| 417 | /* bump the resource usage of integral space use */ |
| 418 | if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { |
| 419 | ru->ru_ixrss += vm->vm_tsize * NBPG / 1024; |
| 420 | ru->ru_idrss += vm->vm_dsize * NBPG / 1024; |
| 421 | ru->ru_isrss += vm->vm_ssize * NBPG / 1024; |
| 422 | if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) > |
| 423 | ru->ru_maxrss) { |
| 424 | ru->ru_maxrss = |
| 425 | vm->vm_pmap.pm_stats.resident_count * NBPG / 1024; |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | /* |
| 430 | * If the cpu is currently scheduled to a process, then |
| 431 | * charge it with resource utilization for a tick, updating |
| 432 | * statistics which run in (user+system) virtual time, |
| 433 | * such as the cpu time limit and profiling timers. |
| 434 | * This assumes that the current process has been running |
| 435 | * the entire last tick. |
| 436 | */ |
| 437 | if (p) { |
| 438 | if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > |
| 439 | p->p_rlimit[RLIMIT_CPU].rlim_cur) { |
| 440 | psignal(p, SIGXCPU); |
| 441 | if (p->p_rlimit[RLIMIT_CPU].rlim_cur < |
| 442 | p->p_rlimit[RLIMIT_CPU].rlim_max) |
| 443 | p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; |
| 444 | } |
| 445 | if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && |
| 446 | itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) |
| 447 | psignal(p, SIGPROF); |
| 448 | |
| 449 | /* |
| 450 | * We adjust the priority of the current process. |
| 451 | * The priority of a process gets worse as it accumulates |
| 452 | * CPU time. The cpu usage estimator (p_cpu) is increased here |
| 453 | * and the formula for computing priorities (in kern_synch.c) |
| 454 | * will compute a different value each time the p_cpu increases |
| 455 | * by 4. The cpu usage estimator ramps up quite quickly when |
| 456 | * the process is running (linearly), and decays away |
| 457 | * exponentially, * at a rate which is proportionally slower |
| 458 | * when the system is busy. The basic principal is that the |
| 459 | * system will 90% forget that a process used a lot of CPU |
| 460 | * time in 5*loadav seconds. This causes the system to favor |
| 461 | * processes which haven't run much recently, and to |
| 462 | * round-robin among other processes. |
| 463 | */ |
| 464 | p->p_cpticks++; |
| 465 | if (++p->p_cpu == 0) |
| 466 | p->p_cpu--; |
| 467 | if ((p->p_cpu&3) == 0) { |
| 468 | setpri(p); |
| 469 | if (p->p_pri >= PUSER) |
| 470 | p->p_pri = p->p_usrpri; |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | /* |
| 475 | * If the alternate clock has not made itself known then |
| 476 | * we must gather the statistics. |
| 477 | */ |
| 478 | if (phz == 0) |
| 479 | gatherstats(&frame); |
| 480 | |
| 481 | /* |
| 482 | * Increment the time-of-day, and schedule |
| 483 | * processing of the callouts at a very low cpu priority, |
| 484 | * so we don't keep the relatively high clock interrupt |
| 485 | * priority any longer than necessary. |
| 486 | */ |
| 487 | { |
| 488 | int delta; |
| 489 | if (timedelta == 0) { |
| 490 | delta = tick; |
| 491 | } else { |
| 492 | if (timedelta < 0) { |
| 493 | delta = tick - tickdelta; |
| 494 | timedelta += tickdelta; |
| 495 | } else { |
| 496 | delta = tick + tickdelta; |
| 497 | timedelta -= tickdelta; |
| 498 | } |
| 499 | } |
| 500 | /* |
| 501 | * Logic from ``Precision Time and Frequency Synchronization |
| 502 | * Using Modified Kernels'' by David L. Mills, University |
| 503 | * of Delaware. |
| 504 | */ |
| 505 | time_phase += time_adj; |
| 506 | if(time_phase <= -FINEUSEC) { |
| 507 | ltemp = -time_phase >> SHIFT_SCALE; |
| 508 | time_phase += ltemp << SHIFT_SCALE; |
| 509 | time_update -= ltemp; |
| 510 | } else if(time_phase >= FINEUSEC) { |
| 511 | ltemp = time_phase >> SHIFT_SCALE; |
| 512 | time_phase -= ltemp << SHIFT_SCALE; |
| 513 | time_update += ltemp; |
| 514 | } |
| 515 | |
| 516 | time.tv_usec += delta + time_update; |
| 517 | /* |
| 518 | * On rollover of the second the phase adjustment to be used for |
| 519 | * the next second is calculated. Also, the maximum error is |
| 520 | * increased by the tolerance. If the PPS frequency discipline |
| 521 | * code is present, the phase is increased to compensate for the |
| 522 | * CPU clock oscillator frequency error. |
| 523 | * |
| 524 | * With SHIFT_SCALE = 23, the maximum frequency adjustment is |
| 525 | * +-256 us per tick, or 25.6 ms/s at a clock frequency of 100 |
| 526 | * Hz. The time contribution is shifted right a minimum of two |
| 527 | * bits, while the frequency contribution is a right shift. |
| 528 | * Thus, overflow is prevented if the frequency contribution is |
| 529 | * limited to half the maximum or 15.625 ms/s. |
| 530 | */ |
| 531 | if (time.tv_usec >= 1000000) { |
| 532 | time.tv_usec -= 1000000; |
| 533 | time.tv_sec++; |
| 534 | ntp_pll.maxerror += ntp_pll.tolerance >> SHIFT_USEC; |
| 535 | if (ntp_pll.offset < 0) { |
| 536 | ltemp = -ntp_pll.offset >> |
| 537 | (SHIFT_KG + ntp_pll.time_constant); |
| 538 | ntp_pll.offset += ltemp; |
| 539 | time_adj = -ltemp << |
| 540 | (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); |
| 541 | } else { |
| 542 | ltemp = ntp_pll.offset >> |
| 543 | (SHIFT_KG + ntp_pll.time_constant); |
| 544 | ntp_pll.offset -= ltemp; |
| 545 | time_adj = ltemp << |
| 546 | (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); |
| 547 | } |
| 548 | #ifdef PPS_SYNC |
| 549 | /* |
| 550 | * Grow the pps error by pps_dispinc ppm and clamp to |
| 551 | * MAXFREQ. The hardpps() routine will pull it down as |
| 552 | * long as the PPS signal is good. |
| 553 | */ |
| 554 | ntp_pll.disp += pps_dispinc; |
| 555 | if (ntp_pll.disp > MAXFREQ) |
| 556 | ntp_pll.disp = MAXFREQ; |
| 557 | ltemp = ntp_pll.frequency + ntp_pll.ybar; |
| 558 | #else |
| 559 | ltemp = ntp_pll.frequency; |
| 560 | #endif /* PPS_SYNC */ |
| 561 | if (ltemp < 0) |
| 562 | time_adj -= -ltemp >> |
| 563 | (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); |
| 564 | else |
| 565 | time_adj += ltemp >> |
| 566 | (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); |
| 567 | #if 0 |
| 568 | time_adj += fixtick << (SHIFT_SCALE - SHIFT_HZ); |
| 569 | #endif |
| 570 | |
| 571 | /* |
| 572 | * When the CPU clock oscillator frequency is not a |
| 573 | * power of two in Hz, the SHIFT_HZ is only an |
| 574 | * approximate scale factor. In the SunOS kernel, this |
| 575 | * results in a PLL gain factor of 1/1.28 = 0.78 what it |
| 576 | * should be. In the following code the overall gain is |
| 577 | * increased by a factor of 1.25, which results in a |
| 578 | * residual error less than 3 percent. |
| 579 | */ |
| 580 | if (hz == 100) { |
| 581 | if (time_adj < 0) |
| 582 | time_adj -= -time_adj >> 2; |
| 583 | else |
| 584 | time_adj += time_adj >> 2; |
| 585 | } |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | if (needsoft) { |
| 590 | #if 0 |
| 591 | /* |
| 592 | * XXX - hardclock runs at splhigh, so the splsoftclock is useless and |
| 593 | * softclock runs at splhigh as well if we do this. It is not much of |
| 594 | * an optimization, since the "software interrupt" is done with a call |
| 595 | * from doreti, and the overhead of checking there is sometimes less |
| 596 | * than checking here. Moreover, the whole %$$%$^ frame is passed by |
| 597 | * value here. |
| 598 | */ |
| 599 | if (CLKF_BASEPRI(&frame)) { |
| 600 | /* |
| 601 | * Save the overhead of a software interrupt; |
| 602 | * it will happen as soon as we return, so do it now. |
| 603 | */ |
| 604 | (void) splsoftclock(); |
| 605 | softclock(frame); |
| 606 | } else |
| 607 | #endif |
| 608 | setsoftclock(); |
| 609 | } |
| 610 | } |
| 611 | |
| 612 | int dk_ndrive = DK_NDRIVE; |
| 613 | /* |
| 614 | * Gather statistics on resource utilization. |
| 615 | * |
| 616 | * We make a gross assumption: that the system has been in the |
| 617 | * state it is in (user state, kernel state, interrupt state, |
| 618 | * or idle state) for the entire last time interval, and |
| 619 | * update statistics accordingly. |
| 620 | */ |
| 621 | void |
| 622 | gatherstats(framep) |
| 623 | clockframe *framep; |
| 624 | { |
| 625 | register int cpstate, s; |
| 626 | |
| 627 | /* |
| 628 | * Determine what state the cpu is in. |
| 629 | */ |
| 630 | if (CLKF_USERMODE(framep)) { |
| 631 | /* |
| 632 | * CPU was in user state. |
| 633 | */ |
| 634 | if (curproc->p_nice > NZERO) |
| 635 | cpstate = CP_NICE; |
| 636 | else |
| 637 | cpstate = CP_USER; |
| 638 | } else { |
| 639 | /* |
| 640 | * CPU was in system state. If profiling kernel |
| 641 | * increment a counter. If no process is running |
| 642 | * then this is a system tick if we were running |
| 643 | * at a non-zero IPL (in a driver). If a process is running, |
| 644 | * then we charge it with system time even if we were |
| 645 | * at a non-zero IPL, since the system often runs |
| 646 | * this way during processing of system calls. |
| 647 | * This is approximate, but the lack of true interval |
| 648 | * timers makes doing anything else difficult. |
| 649 | */ |
| 650 | cpstate = CP_SYS; |
| 651 | if (curproc == NULL && CLKF_BASEPRI(framep)) |
| 652 | cpstate = CP_IDLE; |
| 653 | #ifdef GPROF |
| 654 | s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc; |
| 655 | if (profiling < 2 && s < s_textsize) |
| 656 | kcount[s / (HISTFRACTION * sizeof (*kcount))]++; |
| 657 | #endif |
| 658 | } |
| 659 | /* |
| 660 | * We maintain statistics shown by user-level statistics |
| 661 | * programs: the amount of time in each cpu state, and |
| 662 | * the amount of time each of DK_NDRIVE ``drives'' is busy. |
| 663 | */ |
| 664 | cp_time[cpstate]++; |
| 665 | for (s = 0; s < DK_NDRIVE; s++) |
| 666 | if (dk_busy&(1<<s)) |
| 667 | dk_time[s]++; |
| 668 | } |
| 669 | |
| 670 | /* |
| 671 | * Software priority level clock interrupt. |
| 672 | * Run periodic events from timeout queue. |
| 673 | */ |
| 674 | /*ARGSUSED*/ |
| 675 | void |
| 676 | softclock(frame) |
| 677 | clockframe frame; |
| 678 | { |
| 679 | |
| 680 | for (;;) { |
| 681 | register struct callout *p1; |
| 682 | register caddr_t arg; |
| 683 | register timeout_func_t func; |
| 684 | register int a, s; |
| 685 | |
| 686 | s = splhigh(); |
| 687 | if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { |
| 688 | splx(s); |
| 689 | break; |
| 690 | } |
| 691 | arg = p1->c_arg; func = p1->c_func; a = p1->c_time; |
| 692 | calltodo.c_next = p1->c_next; |
| 693 | p1->c_next = callfree; |
| 694 | callfree = p1; |
| 695 | splx(s); |
| 696 | (*func)(arg, a); |
| 697 | } |
| 698 | |
| 699 | /* |
| 700 | * If no process to work with, we're finished. |
| 701 | */ |
| 702 | if (curproc == 0) return; |
| 703 | |
| 704 | /* |
| 705 | * If trapped user-mode and profiling, give it |
| 706 | * a profiling tick. |
| 707 | */ |
| 708 | if (CLKF_USERMODE(&frame)) { |
| 709 | register struct proc *p = curproc; |
| 710 | |
| 711 | if (p->p_stats->p_prof.pr_scale) |
| 712 | profile_tick(p, &frame); |
| 713 | /* |
| 714 | * Check to see if process has accumulated |
| 715 | * more than 10 minutes of user time. If so |
| 716 | * reduce priority to give others a chance. |
| 717 | */ |
| 718 | if (p->p_ucred->cr_uid && p->p_nice == NZERO && |
| 719 | p->p_utime.tv_sec > 10 * 60) { |
| 720 | p->p_nice = NZERO + 4; |
| 721 | setpri(p); |
| 722 | p->p_pri = p->p_usrpri; |
| 723 | } |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | /* |
| 728 | * Arrange that (*func)(arg) is called in t/hz seconds. |
| 729 | */ |
| 730 | void |
| 731 | timeout(func, arg, t) |
| 732 | timeout_func_t func; |
| 733 | caddr_t arg; |
| 734 | register int t; |
| 735 | { |
| 736 | register struct callout *p1, *p2, *pnew; |
| 737 | register int s = splhigh(); |
| 738 | |
| 739 | if (t <= 0) |
| 740 | t = 1; |
| 741 | pnew = callfree; |
| 742 | if (pnew == NULL) |
| 743 | panic("timeout table overflow"); |
| 744 | callfree = pnew->c_next; |
| 745 | pnew->c_arg = arg; |
| 746 | pnew->c_func = func; |
| 747 | for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) |
| 748 | if (p2->c_time > 0) |
| 749 | t -= p2->c_time; |
| 750 | p1->c_next = pnew; |
| 751 | pnew->c_next = p2; |
| 752 | pnew->c_time = t; |
| 753 | if (p2) |
| 754 | p2->c_time -= t; |
| 755 | splx(s); |
| 756 | } |
| 757 | |
| 758 | /* |
| 759 | * untimeout is called to remove a function timeout call |
| 760 | * from the callout structure. |
| 761 | */ |
| 762 | void |
| 763 | untimeout(func, arg) |
| 764 | timeout_func_t func; |
| 765 | caddr_t arg; |
| 766 | { |
| 767 | register struct callout *p1, *p2; |
| 768 | register int s; |
| 769 | |
| 770 | s = splhigh(); |
| 771 | for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { |
| 772 | if (p2->c_func == func && p2->c_arg == arg) { |
| 773 | if (p2->c_next && p2->c_time > 0) |
| 774 | p2->c_next->c_time += p2->c_time; |
| 775 | p1->c_next = p2->c_next; |
| 776 | p2->c_next = callfree; |
| 777 | callfree = p2; |
| 778 | break; |
| 779 | } |
| 780 | } |
| 781 | splx(s); |
| 782 | } |
| 783 | |
| 784 | /* |
| 785 | * Compute number of hz until specified time. |
| 786 | * Used to compute third argument to timeout() from an |
| 787 | * absolute time. |
| 788 | */ |
| 789 | |
| 790 | /* XXX clock_t */ |
| 791 | u_long |
| 792 | hzto(tv) |
| 793 | struct timeval *tv; |
| 794 | { |
| 795 | register unsigned long ticks; |
| 796 | register long sec; |
| 797 | register long usec; |
| 798 | int s; |
| 799 | |
| 800 | /* |
| 801 | * If the number of usecs in the whole seconds part of the time |
| 802 | * difference fits in a long, then the total number of usecs will |
| 803 | * fit in an unsigned long. Compute the total and convert it to |
| 804 | * ticks, rounding up and adding 1 to allow for the current tick |
| 805 | * to expire. Rounding also depends on unsigned long arithmetic |
| 806 | * to avoid overflow. |
| 807 | * |
| 808 | * Otherwise, if the number of ticks in the whole seconds part of |
| 809 | * the time difference fits in a long, then convert the parts to |
| 810 | * ticks separately and add, using similar rounding methods and |
| 811 | * overflow avoidance. This method would work in the previous |
| 812 | * case but it is slightly slower and assumes that hz is integral. |
| 813 | * |
| 814 | * Otherwise, round the time difference down to the maximum |
| 815 | * representable value. |
| 816 | * |
| 817 | * Maximum value for any timeout in 10ms ticks is 248 days. |
| 818 | */ |
| 819 | s = splhigh(); |
| 820 | sec = tv->tv_sec - time.tv_sec; |
| 821 | usec = tv->tv_usec - time.tv_usec; |
| 822 | splx(s); |
| 823 | if (usec < 0) { |
| 824 | sec--; |
| 825 | usec += 1000000; |
| 826 | } |
| 827 | if (sec < 0) { |
| 828 | #ifdef DIAGNOSTIC |
| 829 | printf("hzto: negative time difference %ld sec %ld usec\n", |
| 830 | sec, usec); |
| 831 | #endif |
| 832 | ticks = 1; |
| 833 | } else if (sec <= LONG_MAX / 1000000) |
| 834 | ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) |
| 835 | / tick + 1; |
| 836 | else if (sec <= LONG_MAX / hz) |
| 837 | ticks = sec * hz |
| 838 | + ((unsigned long)usec + (tick - 1)) / tick + 1; |
| 839 | else |
| 840 | ticks = LONG_MAX; |
| 841 | #define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */ |
| 842 | if (ticks > CLOCK_T_MAX) |
| 843 | ticks = CLOCK_T_MAX; |
| 844 | return (ticks); |
| 845 | } |
| 846 | |
| 847 | #ifdef PPS_SYNC |
| 848 | /* |
| 849 | * hardpps() - discipline CPU clock oscillator to external pps signal |
| 850 | * |
| 851 | * This routine is called at each PPS interrupt in order to discipline |
| 852 | * the CPU clock oscillator to the PPS signal. It integrates successive |
| 853 | * phase differences between the two oscillators and calculates the |
| 854 | * frequency offset. This is used in hardclock() to discipline the CPU |
| 855 | * clock oscillator so that intrinsic frequency error is cancelled out. |
| 856 | * The code requires the caller to capture the time and hardware |
| 857 | * counter value at the designated PPS signal transition. |
| 858 | */ |
| 859 | void |
| 860 | hardpps(tvp, usec) |
| 861 | struct timeval *tvp; /* time at PPS */ |
| 862 | long usec; /* hardware counter at PPS */ |
| 863 | { |
| 864 | long u_usec, v_usec, bigtick; |
| 865 | long cal_sec, cal_usec; |
| 866 | |
| 867 | /* |
| 868 | * During the calibration interval adjust the starting time when |
| 869 | * the tick overflows. At the end of the interval compute the |
| 870 | * duration of the interval and the difference of the hardware |
| 871 | * counters at the beginning and end of the interval. This code |
| 872 | * is deliciously complicated by the fact valid differences may |
| 873 | * exceed the value of tick when using long calibration |
| 874 | * intervals and small ticks. Note that the counter can be |
| 875 | * greater than tick if caught at just the wrong instant, but |
| 876 | * the values returned and used here are correct. |
| 877 | */ |
| 878 | bigtick = (long)tick << SHIFT_USEC; |
| 879 | pps_usec -= ntp_pll.ybar; |
| 880 | if (pps_usec >= bigtick) |
| 881 | pps_usec -= bigtick; |
| 882 | if (pps_usec < 0) |
| 883 | pps_usec += bigtick; |
| 884 | pps_time.tv_sec++; |
| 885 | pps_count++; |
| 886 | if (pps_count < (1 << pps_shift)) |
| 887 | return; |
| 888 | pps_count = 0; |
| 889 | ntp_pll.calcnt++; |
| 890 | u_usec = usec << SHIFT_USEC; |
| 891 | v_usec = pps_usec - u_usec; |
| 892 | if (v_usec >= bigtick >> 1) |
| 893 | v_usec -= bigtick; |
| 894 | if (v_usec < -(bigtick >> 1)) |
| 895 | v_usec += bigtick; |
| 896 | if (v_usec < 0) |
| 897 | v_usec = -(-v_usec >> ntp_pll.shift); |
| 898 | else |
| 899 | v_usec = v_usec >> ntp_pll.shift; |
| 900 | pps_usec = u_usec; |
| 901 | cal_sec = tvp->tv_sec; |
| 902 | cal_usec = tvp->tv_usec; |
| 903 | cal_sec -= pps_time.tv_sec; |
| 904 | cal_usec -= pps_time.tv_usec; |
| 905 | if (cal_usec < 0) { |
| 906 | cal_usec += 1000000; |
| 907 | cal_sec--; |
| 908 | } |
| 909 | pps_time = *tvp; |
| 910 | |
| 911 | /* |
| 912 | * Check for lost interrupts, noise, excessive jitter and |
| 913 | * excessive frequency error. The number of timer ticks during |
| 914 | * the interval may vary +-1 tick. Add to this a margin of one |
| 915 | * tick for the PPS signal jitter and maximum frequency |
| 916 | * deviation. If the limits are exceeded, the calibration |
| 917 | * interval is reset to the minimum and we start over. |
| 918 | */ |
| 919 | u_usec = (long)tick << 1; |
| 920 | if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) |
| 921 | || (cal_sec == 0 && cal_usec < u_usec)) |
| 922 | || v_usec > ntp_pll.tolerance || v_usec < -ntp_pll.tolerance) { |
| 923 | ntp_pll.jitcnt++; |
| 924 | ntp_pll.shift = NTP_PLL.SHIFT; |
| 925 | pps_dispinc = PPS_DISPINC; |
| 926 | ntp_pll.intcnt = 0; |
| 927 | return; |
| 928 | } |
| 929 | |
| 930 | /* |
| 931 | * A three-stage median filter is used to help deglitch the pps |
| 932 | * signal. The median sample becomes the offset estimate; the |
| 933 | * difference between the other two samples becomes the |
| 934 | * dispersion estimate. |
| 935 | */ |
| 936 | pps_mf[2] = pps_mf[1]; |
| 937 | pps_mf[1] = pps_mf[0]; |
| 938 | pps_mf[0] = v_usec; |
| 939 | if (pps_mf[0] > pps_mf[1]) { |
| 940 | if (pps_mf[1] > pps_mf[2]) { |
| 941 | u_usec = pps_mf[1]; /* 0 1 2 */ |
| 942 | v_usec = pps_mf[0] - pps_mf[2]; |
| 943 | } else if (pps_mf[2] > pps_mf[0]) { |
| 944 | u_usec = pps_mf[0]; /* 2 0 1 */ |
| 945 | v_usec = pps_mf[2] - pps_mf[1]; |
| 946 | } else { |
| 947 | u_usec = pps_mf[2]; /* 0 2 1 */ |
| 948 | v_usec = pps_mf[0] - pps_mf[1]; |
| 949 | } |
| 950 | } else { |
| 951 | if (pps_mf[1] < pps_mf[2]) { |
| 952 | u_usec = pps_mf[1]; /* 2 1 0 */ |
| 953 | v_usec = pps_mf[2] - pps_mf[0]; |
| 954 | } else if (pps_mf[2] < pps_mf[0]) { |
| 955 | u_usec = pps_mf[0]; /* 1 0 2 */ |
| 956 | v_usec = pps_mf[1] - pps_mf[2]; |
| 957 | } else { |
| 958 | u_usec = pps_mf[2]; /* 1 2 0 */ |
| 959 | v_usec = pps_mf[1] - pps_mf[0]; |
| 960 | } |
| 961 | } |
| 962 | |
| 963 | /* |
| 964 | * Here the dispersion average is updated. If it is less than |
| 965 | * the threshold pps_dispmax, the frequency average is updated |
| 966 | * as well, but clamped to the tolerance. |
| 967 | */ |
| 968 | v_usec = (v_usec >> 1) - ntp_pll.disp; |
| 969 | if (v_usec < 0) |
| 970 | ntp_pll.disp -= -v_usec >> PPS_AVG; |
| 971 | else |
| 972 | ntp_pll.disp += v_usec >> PPS_AVG; |
| 973 | if (ntp_pll.disp > pps_dispmax) { |
| 974 | ntp_pll.discnt++; |
| 975 | return; |
| 976 | } |
| 977 | if (u_usec < 0) { |
| 978 | ntp_pll.ybar -= -u_usec >> PPS_AVG; |
| 979 | if (ntp_pll.ybar < -ntp_pll.tolerance) |
| 980 | ntp_pll.ybar = -ntp_pll.tolerance; |
| 981 | u_usec = -u_usec; |
| 982 | } else { |
| 983 | ntp_pll.ybar += u_usec >> PPS_AVG; |
| 984 | if (ntp_pll.ybar > ntp_pll.tolerance) |
| 985 | ntp_pll.ybar = ntp_pll.tolerance; |
| 986 | } |
| 987 | |
| 988 | /* |
| 989 | * Here the calibration interval is adjusted. If the maximum |
| 990 | * time difference is greater than tick/4, reduce the interval |
| 991 | * by half. If this is not the case for four consecutive |
| 992 | * intervals, double the interval. |
| 993 | */ |
| 994 | if (u_usec << ntp_pll.shift > bigtick >> 2) { |
| 995 | ntp_pll.intcnt = 0; |
| 996 | if (ntp_pll.shift > NTP_PLL.SHIFT) { |
| 997 | ntp_pll.shift--; |
| 998 | pps_dispinc <<= 1; |
| 999 | } |
| 1000 | } else if (ntp_pll.intcnt >= 4) { |
| 1001 | ntp_pll.intcnt = 0; |
| 1002 | if (ntp_pll.shift < NTP_PLL.SHIFTMAX) { |
| 1003 | ntp_pll.shift++; |
| 1004 | pps_dispinc >>= 1; |
| 1005 | } |
| 1006 | } else |
| 1007 | ntp_pll.intcnt++; |
| 1008 | } |
| 1009 | #endif /* PPS_SYNC */ |