Fix server return code
[unix-history] / sys / kern / kern_clock.c
CommitLineData
15637ed4
RG
1/*-
2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
600f7f07 33 * from: @(#)kern_clock.c 7.16 (Berkeley) 5/9/91
415253c8 34 * $Id: kern_clock.c,v 1.11 1993/12/19 00:51:20 wollman Exp $
15637ed4
RG
35 */
36
37#include "param.h"
38#include "systm.h"
39#include "dkstat.h"
40#include "callout.h"
41#include "kernel.h"
42#include "proc.h"
fde1aeb2 43#include "signalvar.h"
15637ed4
RG
44#include "resourcevar.h"
45
46#include "machine/cpu.h"
47
a702c214
NW
48#include "resource.h"
49#include "vm/vm.h"
50
15637ed4
RG
51#ifdef GPROF
52#include "gprof.h"
53#endif
54
4c45483e
GW
55static void gatherstats(clockframe *);
56
bbc3f849
GW
57/* From callout.h */
58struct callout *callfree, *callout, calltodo;
59int ncallout;
60
15637ed4
RG
61/*
62 * Clock handling routines.
63 *
64 * This code is written to operate with two timers which run
65 * independently of each other. The main clock, running at hz
66 * times per second, is used to do scheduling and timeout calculations.
67 * The second timer does resource utilization estimation statistically
68 * based on the state of the machine phz times a second. Both functions
69 * can be performed by a single clock (ie hz == phz), however the
70 * statistics will be much more prone to errors. Ideally a machine
71 * would have separate clocks measuring time spent in user state, system
72 * state, interrupt state, and idle state. These clocks would allow a non-
73 * approximate measure of resource utilization.
74 */
75
76/*
77 * TODO:
78 * time of day, system/user timing, timeouts, profiling on separate timers
79 * allocate more timeout table slots when table overflows.
80 */
81
82/*
83 * Bump a timeval by a small number of usec's.
84 */
85#define BUMPTIME(t, usec) { \
86 register struct timeval *tp = (t); \
87 \
88 tp->tv_usec += (usec); \
89 if (tp->tv_usec >= 1000000) { \
90 tp->tv_usec -= 1000000; \
91 tp->tv_sec++; \
92 } \
93}
94
95/*
96 * The hz hardware interval timer.
97 * We update the events relating to real time.
98 * If this timer is also being used to gather statistics,
99 * we run through the statistics gathering routine as well.
100 */
4c45483e 101void
15637ed4
RG
102hardclock(frame)
103 clockframe frame;
104{
105 register struct callout *p1;
106 register struct proc *p = curproc;
4c45483e 107 register struct pstats *pstats = 0;
a702c214
NW
108 register struct rusage *ru;
109 register struct vmspace *vm;
15637ed4
RG
110 register int s;
111 int needsoft = 0;
112 extern int tickdelta;
113 extern long timedelta;
114
115 /*
116 * Update real-time timeout queue.
117 * At front of queue are some number of events which are ``due''.
118 * The time to these is <= 0 and if negative represents the
119 * number of ticks which have passed since it was supposed to happen.
120 * The rest of the q elements (times > 0) are events yet to happen,
121 * where the time for each is given as a delta from the previous.
122 * Decrementing just the first of these serves to decrement the time
123 * to all events.
124 */
125 p1 = calltodo.c_next;
126 while (p1) {
127 if (--p1->c_time > 0)
128 break;
129 needsoft = 1;
130 if (p1->c_time == 0)
131 break;
132 p1 = p1->c_next;
133 }
134
135 /*
136 * Curproc (now in p) is null if no process is running.
137 * We assume that curproc is set in user mode!
138 */
139 if (p)
140 pstats = p->p_stats;
141 /*
142 * Charge the time out based on the mode the cpu is in.
143 * Here again we fudge for the lack of proper interval timers
144 * assuming that the current state has been around at least
145 * one tick.
146 */
147 if (CLKF_USERMODE(&frame)) {
148 if (pstats->p_prof.pr_scale)
149 needsoft = 1;
150 /*
151 * CPU was in user state. Increment
152 * user time counter, and process process-virtual time
153 * interval timer.
154 */
155 BUMPTIME(&p->p_utime, tick);
156 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
157 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
158 psignal(p, SIGVTALRM);
159 } else {
160 /*
161 * CPU was in system state.
162 */
163 if (p)
164 BUMPTIME(&p->p_stime, tick);
165 }
166
a702c214
NW
167 /* bump the resource usage of integral space use */
168 if (p && pstats && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) {
d2ca7c11
DG
169 ru->ru_ixrss += vm->vm_tsize * NBPG / 1024;
170 ru->ru_idrss += vm->vm_dsize * NBPG / 1024;
171 ru->ru_isrss += vm->vm_ssize * NBPG / 1024;
172 if ((vm->vm_pmap.pm_stats.resident_count * NBPG / 1024) >
173 ru->ru_maxrss) {
174 ru->ru_maxrss =
175 vm->vm_pmap.pm_stats.resident_count * NBPG / 1024;
176 }
a702c214
NW
177 }
178
15637ed4
RG
179 /*
180 * If the cpu is currently scheduled to a process, then
181 * charge it with resource utilization for a tick, updating
182 * statistics which run in (user+system) virtual time,
183 * such as the cpu time limit and profiling timers.
184 * This assumes that the current process has been running
185 * the entire last tick.
186 */
187 if (p) {
188 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) >
189 p->p_rlimit[RLIMIT_CPU].rlim_cur) {
190 psignal(p, SIGXCPU);
191 if (p->p_rlimit[RLIMIT_CPU].rlim_cur <
192 p->p_rlimit[RLIMIT_CPU].rlim_max)
193 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5;
194 }
195 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
196 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
197 psignal(p, SIGPROF);
198
199 /*
200 * We adjust the priority of the current process.
201 * The priority of a process gets worse as it accumulates
202 * CPU time. The cpu usage estimator (p_cpu) is increased here
203 * and the formula for computing priorities (in kern_synch.c)
204 * will compute a different value each time the p_cpu increases
205 * by 4. The cpu usage estimator ramps up quite quickly when
206 * the process is running (linearly), and decays away
207 * exponentially, * at a rate which is proportionally slower
208 * when the system is busy. The basic principal is that the
209 * system will 90% forget that a process used a lot of CPU
210 * time in 5*loadav seconds. This causes the system to favor
211 * processes which haven't run much recently, and to
212 * round-robin among other processes.
213 */
214 p->p_cpticks++;
215 if (++p->p_cpu == 0)
216 p->p_cpu--;
217 if ((p->p_cpu&3) == 0) {
218 setpri(p);
219 if (p->p_pri >= PUSER)
220 p->p_pri = p->p_usrpri;
221 }
222 }
223
224 /*
225 * If the alternate clock has not made itself known then
226 * we must gather the statistics.
227 */
228 if (phz == 0)
229 gatherstats(&frame);
230
231 /*
232 * Increment the time-of-day, and schedule
233 * processing of the callouts at a very low cpu priority,
234 * so we don't keep the relatively high clock interrupt
235 * priority any longer than necessary.
236 */
237 if (timedelta == 0)
238 BUMPTIME(&time, tick)
239 else {
240 register delta;
241
242 if (timedelta < 0) {
243 delta = tick - tickdelta;
244 timedelta += tickdelta;
245 } else {
246 delta = tick + tickdelta;
247 timedelta -= tickdelta;
248 }
249 BUMPTIME(&time, delta);
250 }
15637ed4
RG
251 if (needsoft) {
252#if 0
253/*
254 * XXX - hardclock runs at splhigh, so the splsoftclock is useless and
255 * softclock runs at splhigh as well if we do this. It is not much of
256 * an optimization, since the "software interrupt" is done with a call
257 * from doreti, and the overhead of checking there is sometimes less
258 * than checking here. Moreover, the whole %$$%$^ frame is passed by
259 * value here.
260 */
261 if (CLKF_BASEPRI(&frame)) {
262 /*
263 * Save the overhead of a software interrupt;
264 * it will happen as soon as we return, so do it now.
265 */
266 (void) splsoftclock();
267 softclock(frame);
268 } else
269#endif
270 setsoftclock();
271 }
272}
273
274int dk_ndrive = DK_NDRIVE;
275/*
276 * Gather statistics on resource utilization.
277 *
278 * We make a gross assumption: that the system has been in the
279 * state it is in (user state, kernel state, interrupt state,
280 * or idle state) for the entire last time interval, and
281 * update statistics accordingly.
282 */
4c45483e 283void
15637ed4
RG
284gatherstats(framep)
285 clockframe *framep;
286{
287 register int cpstate, s;
288
289 /*
290 * Determine what state the cpu is in.
291 */
292 if (CLKF_USERMODE(framep)) {
293 /*
294 * CPU was in user state.
295 */
296 if (curproc->p_nice > NZERO)
297 cpstate = CP_NICE;
298 else
299 cpstate = CP_USER;
300 } else {
301 /*
302 * CPU was in system state. If profiling kernel
303 * increment a counter. If no process is running
304 * then this is a system tick if we were running
305 * at a non-zero IPL (in a driver). If a process is running,
306 * then we charge it with system time even if we were
307 * at a non-zero IPL, since the system often runs
308 * this way during processing of system calls.
309 * This is approximate, but the lack of true interval
310 * timers makes doing anything else difficult.
311 */
312 cpstate = CP_SYS;
313 if (curproc == NULL && CLKF_BASEPRI(framep))
314 cpstate = CP_IDLE;
315#ifdef GPROF
316 s = (u_long) CLKF_PC(framep) - (u_long) s_lowpc;
317 if (profiling < 2 && s < s_textsize)
318 kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
319#endif
320 }
321 /*
322 * We maintain statistics shown by user-level statistics
323 * programs: the amount of time in each cpu state, and
324 * the amount of time each of DK_NDRIVE ``drives'' is busy.
325 */
326 cp_time[cpstate]++;
327 for (s = 0; s < DK_NDRIVE; s++)
328 if (dk_busy&(1<<s))
329 dk_time[s]++;
330}
331
332/*
333 * Software priority level clock interrupt.
334 * Run periodic events from timeout queue.
335 */
336/*ARGSUSED*/
4c45483e 337void
15637ed4
RG
338softclock(frame)
339 clockframe frame;
340{
341
342 for (;;) {
343 register struct callout *p1;
344 register caddr_t arg;
4c45483e 345 register timeout_func_t func;
15637ed4
RG
346 register int a, s;
347
348 s = splhigh();
349 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
350 splx(s);
351 break;
352 }
353 arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
354 calltodo.c_next = p1->c_next;
355 p1->c_next = callfree;
356 callfree = p1;
357 splx(s);
358 (*func)(arg, a);
359 }
360
361 /*
362 * If no process to work with, we're finished.
363 */
364 if (curproc == 0) return;
365
366 /*
367 * If trapped user-mode and profiling, give it
368 * a profiling tick.
369 */
370 if (CLKF_USERMODE(&frame)) {
371 register struct proc *p = curproc;
372
373 if (p->p_stats->p_prof.pr_scale)
374 profile_tick(p, &frame);
375 /*
376 * Check to see if process has accumulated
377 * more than 10 minutes of user time. If so
378 * reduce priority to give others a chance.
379 */
380 if (p->p_ucred->cr_uid && p->p_nice == NZERO &&
381 p->p_utime.tv_sec > 10 * 60) {
382 p->p_nice = NZERO + 4;
383 setpri(p);
384 p->p_pri = p->p_usrpri;
385 }
386 }
387}
388
389/*
390 * Arrange that (*func)(arg) is called in t/hz seconds.
391 */
4c45483e 392void
15637ed4 393timeout(func, arg, t)
4c45483e 394 timeout_func_t func;
15637ed4
RG
395 caddr_t arg;
396 register int t;
397{
398 register struct callout *p1, *p2, *pnew;
399 register int s = splhigh();
400
401 if (t <= 0)
402 t = 1;
403 pnew = callfree;
404 if (pnew == NULL)
405 panic("timeout table overflow");
406 callfree = pnew->c_next;
407 pnew->c_arg = arg;
408 pnew->c_func = func;
409 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
410 if (p2->c_time > 0)
411 t -= p2->c_time;
412 p1->c_next = pnew;
413 pnew->c_next = p2;
414 pnew->c_time = t;
415 if (p2)
416 p2->c_time -= t;
417 splx(s);
418}
419
420/*
421 * untimeout is called to remove a function timeout call
422 * from the callout structure.
423 */
4c45483e 424void
15637ed4 425untimeout(func, arg)
4c45483e 426 timeout_func_t func;
15637ed4
RG
427 caddr_t arg;
428{
429 register struct callout *p1, *p2;
430 register int s;
431
432 s = splhigh();
433 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
434 if (p2->c_func == func && p2->c_arg == arg) {
435 if (p2->c_next && p2->c_time > 0)
436 p2->c_next->c_time += p2->c_time;
437 p1->c_next = p2->c_next;
438 p2->c_next = callfree;
439 callfree = p2;
440 break;
441 }
442 }
443 splx(s);
444}
445
446/*
447 * Compute number of hz until specified time.
448 * Used to compute third argument to timeout() from an
449 * absolute time.
450 */
c95f3c5b
AC
451
452/* XXX clock_t */
4c45483e 453u_long
15637ed4
RG
454hzto(tv)
455 struct timeval *tv;
456{
c95f3c5b 457 register unsigned long ticks;
15637ed4 458 register long sec;
c95f3c5b
AC
459 register long usec;
460 int s;
15637ed4
RG
461
462 /*
c95f3c5b
AC
463 * If the number of usecs in the whole seconds part of the time
464 * difference fits in a long, then the total number of usecs will
465 * fit in an unsigned long. Compute the total and convert it to
466 * ticks, rounding up and adding 1 to allow for the current tick
467 * to expire. Rounding also depends on unsigned long arithmetic
468 * to avoid overflow.
469 *
470 * Otherwise, if the number of ticks in the whole seconds part of
471 * the time difference fits in a long, then convert the parts to
472 * ticks separately and add, using similar rounding methods and
473 * overflow avoidance. This method would work in the previous
474 * case but it is slightly slower and assumes that hz is integral.
15637ed4 475 *
c95f3c5b
AC
476 * Otherwise, round the time difference down to the maximum
477 * representable value.
478 *
479 * Maximum value for any timeout in 10ms ticks is 248 days.
15637ed4 480 */
c95f3c5b 481 s = splhigh();
15637ed4 482 sec = tv->tv_sec - time.tv_sec;
c95f3c5b 483 usec = tv->tv_usec - time.tv_usec;
da6d4c06 484 splx(s);
c95f3c5b
AC
485 if (usec < 0) {
486 sec--;
487 usec += 1000000;
488 }
489 if (sec < 0) {
90945f8b 490#ifdef DIAGNOSTIC
c95f3c5b
AC
491 printf("hzto: negative time difference %ld sec %ld usec\n",
492 sec, usec);
493#endif
494 ticks = 1;
495 } else if (sec <= LONG_MAX / 1000000)
496 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
497 / tick + 1;
498 else if (sec <= LONG_MAX / hz)
499 ticks = sec * hz
500 + ((unsigned long)usec + (tick - 1)) / tick + 1;
501 else
502 ticks = LONG_MAX;
503#define CLOCK_T_MAX INT_MAX /* XXX should be ULONG_MAX */
504 if (ticks > CLOCK_T_MAX)
505 ticks = CLOCK_T_MAX;
15637ed4
RG
506 return (ticks);
507}