* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* %sccs.include.redist.c%
* @(#)kern_clock.c 8.2 (Berkeley) %G%
#include <sys/resourcevar.h>
#define ADJTIME /* For now... */
* Clock handling routines.
* This code is written to operate with two timers that run independently of
* each other. The main clock, running hz times per second, is used to keep
* track of real time. The second timer handles kernel and user profiling,
* and does resource use estimation. If the second timer is programmable,
* it is randomized to avoid aliasing between the two clocks. For example,
* the randomization prevents an adversary from always giving up the cpu
* just before its quantum expires. Otherwise, it would never accumulate
* cpu ticks. The mean frequency of the second timer is stathz.
* If no second timer exists, stathz will be zero; in this case we drive
* profiling and statistics off the main clock. This WILL NOT be accurate;
* do not do it unless absolutely necessary.
* The statistics clock may (or may not) be run at a higher rate while
* profiling. This profile clock runs at profhz. We require that profhz
* be an integral multiple of stathz.
* If the statistics clock is running fast, it must be divided by the ratio
* profhz/stathz for statistics. (For profiling, every tick counts.)
* allocate more timeout table slots when table overflows.
* Bump a timeval by a small number of usec's.
#define BUMPTIME(t, usec) { \
register volatile struct timeval *tp = (t); \
tp->tv_usec = us = tp->tv_usec + (usec); \
tp->tv_usec = us - 1000000; \
static int psdiv
, pscnt
; /* prof => stat divider */
int psratio
; /* ratio: prof / stat */
volatile struct timeval time
;
volatile struct timeval mono_time
;
* Initialize clock frequencies and start both clocks running.
* Set divisors to 1 (normal case) and let the machine-specific
* Compute profhz/stathz, and fix profhz if needed.
i
= stathz
? stathz
: hz
;
* The real-time timer, interrupting hz times per second.
register struct clockframe
*frame
;
register struct callout
*p1
;
* Update real-time timeout queue.
* At front of queue are some number of events which are ``due''.
* The time to these is <= 0 and if negative represents the
* number of ticks which have passed since it was supposed to happen.
* The rest of the q elements (times > 0) are events yet to happen,
* where the time for each is given as a delta from the previous.
* Decrementing just the first of these serves to decrement the time
for (p1
= calltodo
.c_next
; p1
!= NULL
; p1
= p1
->c_next
) {
* Run current process's virtual and profile time, as needed.
if (CLKF_USERMODE(frame
) &&
timerisset(&pstats
->p_timer
[ITIMER_VIRTUAL
].it_value
) &&
itimerdecr(&pstats
->p_timer
[ITIMER_VIRTUAL
], tick
) == 0)
if (timerisset(&pstats
->p_timer
[ITIMER_PROF
].it_value
) &&
itimerdecr(&pstats
->p_timer
[ITIMER_PROF
], tick
) == 0)
* If no separate statistics clock is available, run it from here.
* Increment the time-of-day. The increment is just ``tick'' unless
* we are still adjusting the clock; see adjtime().
bumptime(&time
, tick
-ADJ_TICK
);
bumptime(&time
, tick
+ADJ_TICK
);
delta
= tick
+ tickdelta
;
BUMPTIME(&mono_time
, delta
);
* Process callouts at a very low cpu priority, so we don't keep the
* relatively high clock interrupt priority any longer than necessary.
* Software (low priority) clock interrupt.
* Run periodic events from timeout queue.
register struct callout
*c
;
register void (*func
) __P((void *));
while ((c
= calltodo
.c_next
) != NULL
&& c
->c_time
<= 0) {
calltodo
.c_next
= c
->c_next
;
* Execute a function after a specified length of time.
* Cancel previous timeout function call.
* See AT&T BCI Driver Reference Manual for specification. This
* implementation differs from that one in that no identification
* value is returned from timeout, rather, the original arguments
* to timeout are used to identify entries for untimeout.
void (*ftn
) __P((void *));
register struct callout
*new, *p
, *t
;
/* Lock out the clock. */
/* Fill in the next free callout structure. */
panic("timeout table full");
* The time for each event is stored as a difference from the time
* of the previous event on the queue. Walk the queue, correcting
* the ticks argument for queue entries passed. Correct the ticks
* value for the queue entry immediately after the insertion point
(t
= p
->c_next
) != NULL
&& ticks
> t
->c_time
; p
= t
)
/* Insert the new entry into the queue. */
void (*ftn
) __P((void *));
register struct callout
*p
, *t
;
for (p
= &calltodo
; (t
= p
->c_next
) != NULL
; p
= t
)
if (t
->c_func
== ftn
&& t
->c_arg
== arg
) {
/* Increment next entry's tick count. */
if (t
->c_next
&& t
->c_time
> 0)
t
->c_next
->c_time
+= t
->c_time
;
/* Move entry from callout queue to callfree queue. */
* Compute number of hz until specified time. Used to
* compute third argument to timeout() from an absolute time.
register long ticks
, sec
;
* If number of milliseconds will fit in 32 bit arithmetic,
* then compute number of milliseconds to time and scale to
* ticks. Otherwise just compute number of hz in time, rounding
* times greater than representible to maximum value.
* Delta times less than 25 days can be computed ``exactly''.
* Maximum value for any timeout in 10ms ticks is 250 days.
sec
= tv
->tv_sec
- time
.tv_sec
;
if (sec
<= 0x7fffffff / 1000 - 1000)
ticks
= ((tv
->tv_sec
- time
.tv_sec
) * 1000 +
(tv
->tv_usec
- time
.tv_usec
) / 1000) / (tick
/ 1000);
else if (sec
<= 0x7fffffff / hz
)
* Start profiling on a process.
* Kernel profiling passes proc0 which never exits and hence
* keeps the profile clock running constantly.
if ((p
->p_flag
& SPROFIL
) == 0) {
if (++profprocs
== 1 && stathz
!= 0) {
setstatclockrate(profhz
);
* Stop profiling on a process.
if (p
->p_flag
& SPROFIL
) {
if (--profprocs
== 0 && stathz
!= 0) {
setstatclockrate(stathz
);
int dk_ndrive
= DK_NDRIVE
;
* Statistics clock. Grab profile sample, and if divider reaches 0,
* do process and kernel statistics.
register struct clockframe
*frame
;
register struct gmonparam
*g
;
if (CLKF_USERMODE(frame
)) {
addupc_intr(p
, CLKF_PC(frame
), 1);
* Came from user mode; CPU was in user state.
* If this process is being profiled record the tick.
* Kernel statistics are just like addupc_intr, only easier.
if (g
->state
== GMON_PROF_ON
) {
i
= CLKF_PC(frame
) - g
->lowpc
;
i
/= HISTFRACTION
* sizeof(*g
->kcount
);
* Came from kernel mode, so we were:
* - handling an interrupt,
* - doing syscall or trap work on behalf of the current
* - spinning in the idle loop.
* Whichever it is, charge the time as appropriate.
* Note that we charge interrupts to the current process,
* regardless of whether they are ``for'' that process,
* so that we know how much of its real time was spent
* in ``non-process'' (i.e., interrupt) work.
* We maintain statistics shown by user-level statistics
* programs: the amount of time in each cpu state, and
* the amount of time each of DK_NDRIVE ``drives'' is busy.
* XXX should either run linked list of drives, or (better)
* grab timestamps in the start & done code.
for (i
= 0; i
< DK_NDRIVE
; i
++)
* We adjust the priority of the current process.
* The priority of a process gets worse as it accumulates
* CPU time. The cpu usage estimator (p_cpu) is increased here
* and the formula for computing priorities (in kern_synch.c)
* will compute a different value each time the p_cpu increases
* by 4. The cpu usage estimator ramps up quite quickly when
* the process is running (linearly), and decays away
* exponentially, at a rate which is proportionally slower
* when the system is busy. The basic principal is that the
* system will 90% forget that a process used a lot of CPU
* time in 5*loadav seconds. This causes the system to favor
* processes which haven't run much recently, and to
* round-robin among other processes.
if ((p
->p_cpu
& 3) == 0) {
* Return information about system clocks.
sysctl_clockrate(where
, sizep
)
struct clockinfo clkinfo
;
* Construct clockinfo structure.
clkinfo
.stathz
= stathz
? stathz
: hz
;
return (sysctl_rdstruct(where
, sizep
, NULL
, &clkinfo
, sizeof(clkinfo
)));