debugging version
[unix-history] / usr / src / sys / kern / kern_clock.c
CommitLineData
da7c5cc6 1/*
0157085f 2 * Copyright (c) 1982, 1986, 1991 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
f7fd221a 6 * @(#)kern_clock.c 7.15 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
94368568
JB
9#include "param.h"
10#include "systm.h"
fb1db32c 11#include "dkstat.h"
94368568 12#include "callout.h"
94368568
JB
13#include "kernel.h"
14#include "proc.h"
e7837d79 15#include "resourcevar.h"
83be5fac 16
0157085f 17#include "machine/cpu.h"
961945a8 18
8487304f 19#ifdef GPROF
94368568 20#include "gprof.h"
8487304f
KM
21#endif
22
45e9acec
MK
23#define ADJTIME /* For now... */
24#define ADJ_TICK 1000
25int adjtimedelta;
26
76b2a182
BJ
27/*
28 * Clock handling routines.
29 *
53a32545
SL
30 * This code is written to operate with two timers which run
31 * independently of each other. The main clock, running at hz
32 * times per second, is used to do scheduling and timeout calculations.
33 * The second timer does resource utilization estimation statistically
34 * based on the state of the machine phz times a second. Both functions
35 * can be performed by a single clock (ie hz == phz), however the
36 * statistics will be much more prone to errors. Ideally a machine
37 * would have separate clocks measuring time spent in user state, system
38 * state, interrupt state, and idle state. These clocks would allow a non-
39 * approximate measure of resource utilization.
76b2a182 40 */
6602c75b 41
76b2a182
BJ
42/*
43 * TODO:
88a7a62a
SL
44 * time of day, system/user timing, timeouts, profiling on separate timers
45 * allocate more timeout table slots when table overflows.
76b2a182 46 */
9c5cfb8b 47
ad8023d1
KM
48/*
49 * Bump a timeval by a small number of usec's.
50 */
ad8023d1
KM
51#define BUMPTIME(t, usec) { \
52 register struct timeval *tp = (t); \
53 \
54 tp->tv_usec += (usec); \
55 if (tp->tv_usec >= 1000000) { \
56 tp->tv_usec -= 1000000; \
57 tp->tv_sec++; \
58 } \
59}
83be5fac 60
76b2a182 61/*
53a32545
SL
62 * The hz hardware interval timer.
63 * We update the events relating to real time.
64 * If this timer is also being used to gather statistics,
65 * we run through the statistics gathering routine as well.
76b2a182 66 */
d293217c 67hardclock(frame)
0157085f 68 clockframe frame;
83be5fac 69{
0a34b6fd 70 register struct callout *p1;
0157085f 71 register struct proc *p = curproc;
e7837d79 72 register struct pstats *pstats;
0b355a6e 73 register int s;
83be5fac 74
76b2a182
BJ
75 /*
76 * Update real-time timeout queue.
77 * At front of queue are some number of events which are ``due''.
78 * The time to these is <= 0 and if negative represents the
79 * number of ticks which have passed since it was supposed to happen.
80 * The rest of the q elements (times > 0) are events yet to happen,
81 * where the time for each is given as a delta from the previous.
82 * Decrementing just the first of these serves to decrement the time
83 * to all events.
84 */
88a7a62a
SL
85 p1 = calltodo.c_next;
86 while (p1) {
87 if (--p1->c_time > 0)
88 break;
88a7a62a
SL
89 if (p1->c_time == 0)
90 break;
91 p1 = p1->c_next;
92 }
5da67d35 93
e7837d79
MK
94 /*
95 * Curproc (now in p) is null if no process is running.
96 * We assume that curproc is set in user mode!
97 */
98 if (p)
99 pstats = p->p_stats;
76b2a182
BJ
100 /*
101 * Charge the time out based on the mode the cpu is in.
102 * Here again we fudge for the lack of proper interval timers
103 * assuming that the current state has been around at least
104 * one tick.
105 */
76b2a182
BJ
106 /*
107 * CPU was in user state. Increment
108 * user time counter, and process process-virtual time
877ef342 109 * interval timer.
76b2a182 110 */
53fbb3b3 111 BUMPTIME(&p->p_utime, tick);
0157085f
MK
112 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
113 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
53fbb3b3 114 psignal(p, SIGVTALRM);
83be5fac 115 } else {
76b2a182 116 /*
0b355a6e 117 * CPU was in system state.
76b2a182 118 */
e7837d79 119 if (p)
53fbb3b3 120 BUMPTIME(&p->p_stime, tick);
83be5fac 121 }
27b91f59 122
9fb1a8d0
SL
123 /*
124 * If the cpu is currently scheduled to a process, then
125 * charge it with resource utilization for a tick, updating
126 * statistics which run in (user+system) virtual time,
127 * such as the cpu time limit and profiling timers.
128 * This assumes that the current process has been running
129 * the entire last tick.
130 */
e7837d79 131 if (p) {
53fbb3b3 132 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) >
0157085f 133 p->p_rlimit[RLIMIT_CPU].rlim_cur) {
53fbb3b3 134 psignal(p, SIGXCPU);
0157085f
MK
135 if (p->p_rlimit[RLIMIT_CPU].rlim_cur <
136 p->p_rlimit[RLIMIT_CPU].rlim_max)
137 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5;
9fb1a8d0 138 }
0157085f
MK
139 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
140 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
53fbb3b3 141 psignal(p, SIGPROF);
9fb1a8d0 142
0157085f
MK
143 /*
144 * We adjust the priority of the current process.
145 * The priority of a process gets worse as it accumulates
146 * CPU time. The cpu usage estimator (p_cpu) is increased here
147 * and the formula for computing priorities (in kern_synch.c)
148 * will compute a different value each time the p_cpu increases
149 * by 4. The cpu usage estimator ramps up quite quickly when
150 * the process is running (linearly), and decays away
151 * exponentially, * at a rate which is proportionally slower
152 * when the system is busy. The basic principal is that the
153 * system will 90% forget that a process used a lot of CPU
154 * time in 5*loadav seconds. This causes the system to favor
155 * processes which haven't run much recently, and to
156 * round-robin among other processes.
157 */
27b91f59
BJ
158 p->p_cpticks++;
159 if (++p->p_cpu == 0)
160 p->p_cpu--;
76b2a182 161 if ((p->p_cpu&3) == 0) {
0157085f 162 setpri(p);
27b91f59
BJ
163 if (p->p_pri >= PUSER)
164 p->p_pri = p->p_usrpri;
83be5fac
BJ
165 }
166 }
76b2a182 167
53a32545
SL
168 /*
169 * If the alternate clock has not made itself known then
170 * we must gather the statistics.
171 */
172 if (phz == 0)
0157085f 173 gatherstats(&frame);
53a32545 174
76b2a182
BJ
175 /*
176 * Increment the time-of-day, and schedule
177 * processing of the callouts at a very low cpu priority,
178 * so we don't keep the relatively high clock interrupt
179 * priority any longer than necessary.
180 */
45e9acec
MK
181#ifdef ADJTIME
182 if (adjtimedelta == 0)
183 bumptime(&time, tick);
184 else {
185 if (adjtimedelta < 0) {
186 bumptime(&time, tick-ADJ_TICK);
187 adjtimedelta++;
188 } else {
189 bumptime(&time, tick+ADJ_TICK);
190 adjtimedelta--;
191 }
192 }
193#else
4ca0d0d6 194 if (timedelta == 0)
99e47f6b
MK
195 BUMPTIME(&time, tick)
196 else {
197 register delta;
198
4ca0d0d6
MK
199 if (timedelta < 0) {
200 delta = tick - tickdelta;
201 timedelta += tickdelta;
99e47f6b 202 } else {
4ca0d0d6
MK
203 delta = tick + tickdelta;
204 timedelta -= tickdelta;
99e47f6b
MK
205 }
206 BUMPTIME(&time, delta);
207 }
45e9acec 208#endif
ca6b57a4 209 setsoftclock();
f403d99f
BJ
210}
211
d976d466 212int dk_ndrive = DK_NDRIVE;
53a32545
SL
213/*
214 * Gather statistics on resource utilization.
215 *
216 * We make a gross assumption: that the system has been in the
217 * state it is in (user state, kernel state, interrupt state,
218 * or idle state) for the entire last time interval, and
219 * update statistics accordingly.
220 */
0157085f
MK
221gatherstats(framep)
222 clockframe *framep;
53a32545 223{
9c5cfb8b 224 register int cpstate, s;
53a32545
SL
225
226 /*
227 * Determine what state the cpu is in.
228 */
0157085f 229 if (CLKF_USERMODE(framep)) {
53a32545
SL
230 /*
231 * CPU was in user state.
232 */
0157085f 233 if (curproc->p_nice > NZERO)
53a32545
SL
234 cpstate = CP_NICE;
235 else
236 cpstate = CP_USER;
237 } else {
238 /*
239 * CPU was in system state. If profiling kernel
0b355a6e
JB
240 * increment a counter. If no process is running
241 * then this is a system tick if we were running
242 * at a non-zero IPL (in a driver). If a process is running,
243 * then we charge it with system time even if we were
244 * at a non-zero IPL, since the system often runs
245 * this way during processing of system calls.
246 * This is approximate, but the lack of true interval
247 * timers makes doing anything else difficult.
53a32545
SL
248 */
249 cpstate = CP_SYS;
e7837d79 250 if (curproc == NULL && CLKF_BASEPRI(framep))
53a32545
SL
251 cpstate = CP_IDLE;
252#ifdef GPROF
0157085f 253 s = CLKF_PC(framep) - s_lowpc;
53a32545
SL
254 if (profiling < 2 && s < s_textsize)
255 kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
256#endif
257 }
258 /*
259 * We maintain statistics shown by user-level statistics
260 * programs: the amount of time in each cpu state, and
261 * the amount of time each of DK_NDRIVE ``drives'' is busy.
262 */
263 cp_time[cpstate]++;
264 for (s = 0; s < DK_NDRIVE; s++)
fb1db32c 265 if (dk_busy&(1<<s))
53a32545
SL
266 dk_time[s]++;
267}
268
76b2a182
BJ
269/*
270 * Software priority level clock interrupt.
271 * Run periodic events from timeout queue.
272 */
260ea681 273/*ARGSUSED*/
d293217c 274softclock(frame)
0157085f 275 clockframe frame;
f403d99f 276{
f403d99f 277
27b91f59 278 for (;;) {
76b2a182
BJ
279 register struct callout *p1;
280 register caddr_t arg;
281 register int (*func)();
282 register int a, s;
283
9c5cfb8b 284 s = splhigh();
27b91f59
BJ
285 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
286 splx(s);
287 break;
f403d99f 288 }
76b2a182 289 arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
27b91f59 290 calltodo.c_next = p1->c_next;
27b91f59
BJ
291 p1->c_next = callfree;
292 callfree = p1;
4f083fd7 293 splx(s);
d01b68d6 294 (*func)(arg, a);
f403d99f 295 }
877ef342 296 /*
db1f1262
SL
297 * If trapped user-mode and profiling, give it
298 * a profiling tick.
877ef342 299 */
0157085f
MK
300 if (CLKF_USERMODE(&frame)) {
301 register struct proc *p = curproc;
db1f1262 302
0157085f
MK
303 if (p->p_stats->p_prof.pr_scale)
304 profile_tick(p, &frame);
db1f1262
SL
305 /*
306 * Check to see if process has accumulated
307 * more than 10 minutes of user time. If so
308 * reduce priority to give others a chance.
309 */
0157085f 310 if (p->p_ucred->cr_uid && p->p_nice == NZERO &&
53fbb3b3 311 p->p_utime.tv_sec > 10 * 60) {
0157085f
MK
312 p->p_nice = NZERO + 4;
313 setpri(p);
db1f1262
SL
314 p->p_pri = p->p_usrpri;
315 }
877ef342 316 }
83be5fac
BJ
317}
318
88a7a62a 319/*
0157085f 320 * Arrange that (*func)(arg) is called in t/hz seconds.
83be5fac 321 */
0157085f
MK
322timeout(func, arg, t)
323 int (*func)();
4512b9a4 324 caddr_t arg;
88a7a62a 325 register int t;
83be5fac 326{
c4710996 327 register struct callout *p1, *p2, *pnew;
9c5cfb8b 328 register int s = splhigh();
83be5fac 329
ba96129b 330 if (t <= 0)
88a7a62a 331 t = 1;
c4710996
BJ
332 pnew = callfree;
333 if (pnew == NULL)
334 panic("timeout table overflow");
335 callfree = pnew->c_next;
336 pnew->c_arg = arg;
0157085f 337 pnew->c_func = func;
c4710996 338 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
d45b61eb
SL
339 if (p2->c_time > 0)
340 t -= p2->c_time;
c4710996
BJ
341 p1->c_next = pnew;
342 pnew->c_next = p2;
343 pnew->c_time = t;
344 if (p2)
345 p2->c_time -= t;
83be5fac
BJ
346 splx(s);
347}
1fa9ff62
SL
348
349/*
350 * untimeout is called to remove a function timeout call
351 * from the callout structure.
352 */
0157085f
MK
353untimeout(func, arg)
354 int (*func)();
1fa9ff62
SL
355 caddr_t arg;
356{
1fa9ff62
SL
357 register struct callout *p1, *p2;
358 register int s;
359
9c5cfb8b 360 s = splhigh();
1fa9ff62 361 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
0157085f 362 if (p2->c_func == func && p2->c_arg == arg) {
d01b68d6 363 if (p2->c_next && p2->c_time > 0)
1fa9ff62
SL
364 p2->c_next->c_time += p2->c_time;
365 p1->c_next = p2->c_next;
366 p2->c_next = callfree;
367 callfree = p2;
368 break;
369 }
370 }
371 splx(s);
372}
d01b68d6 373
76b2a182
BJ
374/*
375 * Compute number of hz until specified time.
376 * Used to compute third argument to timeout() from an
377 * absolute time.
378 */
d01b68d6
BJ
379hzto(tv)
380 struct timeval *tv;
381{
76b2a182
BJ
382 register long ticks;
383 register long sec;
9c5cfb8b 384 int s = splhigh();
d01b68d6 385
76b2a182
BJ
386 /*
387 * If number of milliseconds will fit in 32 bit arithmetic,
388 * then compute number of milliseconds to time and scale to
389 * ticks. Otherwise just compute number of hz in time, rounding
390 * times greater than representible to maximum value.
391 *
392 * Delta times less than 25 days can be computed ``exactly''.
393 * Maximum value for any timeout in 10ms ticks is 250 days.
394 */
395 sec = tv->tv_sec - time.tv_sec;
396 if (sec <= 0x7fffffff / 1000 - 1000)
397 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
398 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
399 else if (sec <= 0x7fffffff / hz)
400 ticks = sec * hz;
401 else
402 ticks = 0x7fffffff;
d01b68d6
BJ
403 splx(s);
404 return (ticks);
405}