X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/20bbf2f51aebc5f4803be6e7e9a69a0a74e2d48a..961945a80c6f995f4567dbce8881af0bbdee211c:/usr/src/sys/kern/kern_clock.c diff --git a/usr/src/sys/kern/kern_clock.c b/usr/src/sys/kern/kern_clock.c index a4e438473c..67698724d1 100644 --- a/usr/src/sys/kern/kern_clock.c +++ b/usr/src/sys/kern/kern_clock.c @@ -1,449 +1,263 @@ -/* kern_clock.c 4.30 81/12/19 */ +/* kern_clock.c 4.48 82/12/17 */ + +#include "../machine/reg.h" +#include "../machine/psl.h" #include "../h/param.h" #include "../h/systm.h" #include "../h/dk.h" #include "../h/callout.h" -#include "../h/seg.h" #include "../h/dir.h" #include "../h/user.h" +#include "../h/kernel.h" #include "../h/proc.h" -#include "../h/reg.h" -#include "../h/psl.h" #include "../h/vm.h" -#include "../h/buf.h" #include "../h/text.h" -#include "../h/vlimit.h" -#include "../h/mtpr.h" -#include "../h/clock.h" -#include "../h/cpu.h" -#include "../h/protosw.h" +#ifdef MUSH +#include "../h/quota.h" +#include "../h/share.h" +#endif -#include "bk.h" -#include "dh.h" -#include "dz.h" +#ifdef vax +#include "../vax/mtpr.h" +#endif +# /* - * Hardclock is called straight from - * the real time clock interrupt. - * We limit the work we do at real clock interrupt time to: - * reloading clock - * decrementing time to callouts - * recording cpu time usage - * modifying priority of current process - * arrange for soft clock interrupt - * kernel pc profiling - * - * At software (softclock) interrupt time we: - * implement callouts - * maintain date - * lightning bolt wakeup (every second) - * alarm clock signals - * jab the scheduler + * Clock handling routines. * - * On the vax softclock interrupts are implemented by - * software interrupts. Note that we may have multiple softclock - * interrupts compressed into one (due to excessive interrupt load), - * but that hardclock interrupts should never be lost. + * This code is written for a machine with only one interval timer, + * and does timing and resource utilization estimation statistically + * based on the state of the machine hz times a second. A machine + * with proper clocks (running separately in user state, system state, + * interrupt state and idle state) as well as a time-of-day clock + * would allow a non-approximate implementation. */ -#ifdef KPROF -int kcounts[20000]; -#endif /* - * Protoslow is like lbolt, but for slow protocol timeouts, counting - * up to (hz/PR_SLOWHZ), then causing a pfslowtimo(). - * Protofast is like lbolt, but for fast protocol timeouts, counting - * up to (hz/PR_FASTHZ), then causing a pffasttimo(). + * TODO: + * * Keep more accurate statistics by simulating good interval timers. + * * Use the time-of-day clock on the VAX to keep more accurate time + * than is possible by repeated use of the interval timer. + * * Allocate more timeout table slots when table overflows. */ -int protoslow; -int protofast; +/* bump a timeval by a small number of usec's */ +#define bumptime(tp, usec) \ + (tp)->tv_usec += usec; \ + if ((tp)->tv_usec >= 1000000) { \ + (tp)->tv_usec -= 1000000; \ + (tp)->tv_sec++; \ + } + +/* + * The (single) hardware interval timer. + * We update the events relating to real time, and then + * make a gross assumption: that the system has been in the + * state it is in (user state, kernel state, interrupt state, + * or idle state) for the entire last time interval, and + * update statistics accordingly. + */ /*ARGSUSED*/ +#ifdef vax hardclock(pc, ps) caddr_t pc; + int ps; +{ +#endif +#ifdef sun +hardclock(regs) + struct regs regs; { + int ps = regs.r_sr; + caddr_t pc = (caddr_t)regs.r_pc; +#endif register struct callout *p1; - register struct proc *pp; + register struct proc *p; register int s, cpstate; +#ifdef sun + if (USERMODE(ps)) /* aston needs ar0 */ + u.u_ar0 = ®s.r_r0; +#endif /* - * reprime clock - */ - clkreld(); - - /* - * update callout times + * Update real-time timeout queue. + * At front of queue are some number of events which are ``due''. + * The time to these is <= 0 and if negative represents the + * number of ticks which have passed since it was supposed to happen. + * The rest of the q elements (times > 0) are events yet to happen, + * where the time for each is given as a delta from the previous. + * Decrementing just the first of these serves to decrement the time + * to all events. */ for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) - ; + --p1->c_time; if (p1) - p1->c_time--; + --p1->c_time; /* - * Maintain iostat and per-process cpu statistics + * If the cpu is currently scheduled to a process, then + * charge it with resource utilization for a tick, updating + * statistics which run in (user+system) virtual time, + * such as the cpu time limit and profiling timers. + * This assumes that the current process has been running + * the entire last tick. */ if (!noproc) { s = u.u_procp->p_rssize; - u.u_vm.vm_idsrss += s; + u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ if (u.u_procp->p_textp) { register int xrss = u.u_procp->p_textp->x_rssize; s += xrss; - u.u_vm.vm_ixrss += xrss; + u.u_ru.ru_ixrss += xrss; } - if (s > u.u_vm.vm_maxrss) - u.u_vm.vm_maxrss = s; - if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { + if (s > u.u_ru.ru_maxrss) + u.u_ru.ru_maxrss = s; + if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > + u.u_rlimit[RLIMIT_CPU].rlim_cur) { psignal(u.u_procp, SIGXCPU); - if (u.u_limit[LIM_CPU] < INFINITY - 5) - u.u_limit[LIM_CPU] += 5; + if (u.u_rlimit[RLIMIT_CPU].rlim_cur < + u.u_rlimit[RLIMIT_CPU].rlim_max) + u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; } + if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && + itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) + psignal(u.u_procp, SIGPROF); } + /* - * Update iostat information. + * Charge the time out based on the mode the cpu is in. + * Here again we fudge for the lack of proper interval timers + * assuming that the current state has been around at least + * one tick. */ if (USERMODE(ps)) { - u.u_vm.vm_utime++; - if(u.u_procp->p_nice > NZERO) + /* + * CPU was in user state. Increment + * user time counter, and process process-virtual time + * interval timer. + */ + bumptime(&u.u_ru.ru_utime, tick); + if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) + psignal(u.u_procp, SIGVTALRM); + if (u.u_procp->p_nice > NZERO) cpstate = CP_NICE; else cpstate = CP_USER; } else { -#ifdef KPROF - int k = ((int)pc & 0x7fffffff) / 8; - if (k < 20000) - kcounts[k]++; + /* + * CPU was in system state. If profiling kernel + * increment a counter. If no process is running + * then this is a system tick if we were running + * at a non-zero IPL (in a driver). If a process is running, + * then we charge it with system time even if we were + * at a non-zero IPL, since the system often runs + * this way during processing of system calls. + * This is approximate, but the lack of true interval + * timers makes doing anything else difficult. + */ +#ifdef GPROF + int k = pc - s_lowpc; + if (profiling < 2 && k < s_textsize) + kcount[k / sizeof (*kcount)]++; #endif cpstate = CP_SYS; - if (noproc) - cpstate = CP_IDLE; - else - u.u_vm.vm_stime++; + if (noproc) { + if (BASEPRI(ps)) + cpstate = CP_IDLE; + } else { + bumptime(&u.u_ru.ru_stime, tick); + } } + + /* + * We maintain statistics shown by user-level statistics + * programs: the amount of time in each cpu state, and + * the amount of time each of DK_NDRIVE ``drives'' is busy. + */ cp_time[cpstate]++; for (s = 0; s < DK_NDRIVE; s++) if (dk_busy&(1<p_cpticks++; - if(++pp->p_cpu == 0) - pp->p_cpu--; - if(pp->p_cpu % 4 == 0) { - (void) setpri(pp); - if (pp->p_pri >= PUSER) - pp->p_pri = pp->p_usrpri; + p = u.u_procp; + p->p_cpticks++; + if (++p->p_cpu == 0) + p->p_cpu--; +#ifdef MUSH + p->p_quota->q_cost += (p->p_nice > NZERO ? + (shconsts.sc_tic * ((2*NZERO)-p->p_nice)) / NZERO : + shconsts.sc_tic) * (((int)avenrun[0]+2)/3); +#endif + if ((p->p_cpu&3) == 0) { + (void) setpri(p); + if (p->p_pri >= PUSER) + p->p_pri = p->p_usrpri; } } - /* - * Time moves on. - */ - ++lbolt; /* - * Time moves on for protocols. - */ - --protoslow; --protofast; - -#if VAX780 - /* - * On 780's, impelement a fast UBA watcher, - * to make sure uba's don't get stuck. - */ - if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) - unhang(); -#endif - /* - * Schedule a software interrupt for the rest - * of clock activities. + * Increment the time-of-day, and schedule + * processing of the callouts at a very low cpu priority, + * so we don't keep the relatively high clock interrupt + * priority any longer than necessary. */ + bumptime(&time, tick); setsoftclock(); } /* - * The digital decay cpu usage priority assignment is scaled to run in - * time as expanded by the 1 minute load average. Each second we - * multiply the the previous cpu usage estimate by - * nrscale*avenrun[0] - * The following relates the load average to the period over which - * cpu usage is 90% forgotten: - * loadav 1 5 seconds - * loadav 5 24 seconds - * loadav 10 47 seconds - * loadav 20 93 seconds - * This is a great improvement on the previous algorithm which - * decayed the priorities by a constant, and decayed away all knowledge - * of previous activity in about 20 seconds. Under heavy load, - * the previous algorithm degenerated to round-robin with poor response - * time when there was a high load average. - */ -#undef ave -#define ave(a,b) ((int)(((int)(a*b))/(b+1))) -int nrscale = 2; -double avenrun[]; - -/* - * Constant for decay filter for cpu usage field - * in process table (used by ps au). - */ -double ccpu = 0.95122942450071400909; /* exp(-1/20) */ - -/* - * Software clock interrupt. - * This routine runs at lower priority than device interrupts. + * Software priority level clock interrupt. + * Run periodic events from timeout queue. */ /*ARGSUSED*/ +#ifdef vax softclock(pc, ps) caddr_t pc; + int ps; { - register struct callout *p1; - register struct proc *pp; - register int a, s; - caddr_t arg; - int (*func)(); - - /* - * Perform callouts (but not after panic's!) - */ - if (panicstr == 0) { - for (;;) { - s = spl7(); - if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { - splx(s); - break; - } - calltodo.c_next = p1->c_next; - arg = p1->c_arg; - func = p1->c_func; - p1->c_next = callfree; - callfree = p1; - (void) splx(s); - (*func)(arg); - } - } - - /* - * Drain silos. - */ -#if NDH > 0 - s = spl5(); dhtimer(); splx(s); #endif -#if NDZ > 0 - s = spl5(); dztimer(); splx(s); +#ifdef sun +softclock() +{ + int ps = u.u_ar0[PS]; + caddr_t pc = (caddr_t)u.u_ar0[PC]; #endif - /* - * If idling and processes are waiting to swap in, - * check on them. - */ - if (noproc && runin) { - runin = 0; - wakeup((caddr_t)&runin); - } - - /* - * Run paging daemon every 1/4 sec. - */ - if (lbolt % (hz/4) == 0) { - vmpago(); - } - - /* - * Reschedule every 1/10 sec. - */ - if (lbolt % (hz/10) == 0) { - runrun++; - aston(); - } - - /* - * Run network slow and fast timeouts. - */ - if (protofast <= 0) { - protofast = hz / PR_FASTHZ; - pffasttimo(); - } - if (protoslow <= 0) { - protoslow = hz / PR_SLOWHZ; - pfslowtimo(); - } - - /* - * Lightning bolt every second: - * sleep timeouts - * process priority recomputation - * process %cpu averaging - * virtual memory metering - * kick swapper if processes want in - */ - if (lbolt >= hz) { - /* - * This doesn't mean much on VAX since we run at - * software interrupt time... if hardclock() - * calls softclock() directly, it prevents - * this code from running when the priority - * was raised when the clock interrupt occurred. - */ - if (BASEPRI(ps)) - return; + for (;;) { + register struct callout *p1; + register caddr_t arg; + register int (*func)(); + register int a, s; - /* - * If we didn't run a few times because of - * long blockage at high ipl, we don't - * really want to run this code several times, - * so squish out all multiples of hz here. - */ - time += lbolt / hz; - lbolt %= hz; - - /* - * Wakeup lightning bolt sleepers. - * Processes sleep on lbolt to wait - * for short amounts of time (e.g. 1 second). - */ - wakeup((caddr_t)&lbolt); - - /* - * Recompute process priority and process - * sleep() system calls as well as internal - * sleeps with timeouts (tsleep() kernel routine). - */ - for (pp = proc; pp < procNPROC; pp++) - if (pp->p_stat && pp->p_stat!=SZOMB) { - /* - * Increase resident time, to max of 127 seconds - * (it is kept in a character.) For - * loaded processes this is time in core; for - * swapped processes, this is time on drum. - */ - if (pp->p_time != 127) - pp->p_time++; - /* - * If process has clock counting down, and it - * expires, set it running (if this is a tsleep()), - * or give it an SIGALRM (if the user process - * is using alarm signals. - */ - if (pp->p_clktim && --pp->p_clktim == 0) - if (pp->p_flag & STIMO) { - s = spl6(); - switch (pp->p_stat) { - - case SSLEEP: - setrun(pp); - break; - - case SSTOP: - unsleep(pp); - break; - } - pp->p_flag &= ~STIMO; - splx(s); - } else - psignal(pp, SIGALRM); - /* - * If process is blocked, increment computed - * time blocked. This is used in swap scheduling. - */ - if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) - if (pp->p_slptime != 127) - pp->p_slptime++; - /* - * Update digital filter estimation of process - * cpu utilization for loaded processes. - */ - if (pp->p_flag&SLOAD) - pp->p_pctcpu = ccpu * pp->p_pctcpu + - (1.0 - ccpu) * (pp->p_cpticks/(float)hz); - /* - * Recompute process priority. The number p_cpu - * is a weighted estimate of cpu time consumed. - * A process which consumes cpu time has this - * increase regularly. We here decrease it by - * a fraction based on load average giving a digital - * decay filter which damps out in about 5 seconds - * when seconds are measured in time expanded by the - * load average. - * - * If a process is niced, then the nice directly - * affects the new priority. The final priority - * is in the range 0 to 255, to fit in a character. - */ - pp->p_cpticks = 0; - a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + - pp->p_nice - NZERO; - if (a < 0) - a = 0; - if (a > 255) - a = 255; - pp->p_cpu = a; - (void) setpri(pp); - /* - * Now have computed new process priority - * in p->p_usrpri. Carefully change p->p_pri. - * A process is on a run queue associated with - * this priority, so we must block out process - * state changes during the transition. - */ - s = spl6(); - if (pp->p_pri >= PUSER) { - if ((pp != u.u_procp || noproc) && - pp->p_stat == SRUN && - (pp->p_flag & SLOAD) && - pp->p_pri != pp->p_usrpri) { - remrq(pp); - pp->p_pri = pp->p_usrpri; - setrq(pp); - } else - pp->p_pri = pp->p_usrpri; - } + s = spl7(); + if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { splx(s); + break; } - - /* - * Perform virtual memory metering. - */ - vmmeter(); - - /* - * If the swap process is trying to bring - * a process in, have it look again to see - * if it is possible now. - */ - if (runin!=0) { - runin = 0; - wakeup((caddr_t)&runin); - } - - /* - * If there are pages that have been cleaned, - * jolt the pageout daemon to process them. - * We do this here so that these pages will be - * freed if there is an abundance of memory and the - * daemon would not be awakened otherwise. - */ - if (bclnlist != NULL) - wakeup((caddr_t)&proc[2]); - - /* - * If the trap occurred from usermode, - * then check to see if it has now been - * running more than 10 minutes of user time - * and should thus run with reduced priority - * to give other processes a chance. - */ - if (USERMODE(ps)) { - pp = u.u_procp; - if (pp->p_uid && pp->p_nice == NZERO && - u.u_vm.vm_utime > 600 * hz) - pp->p_nice = NZERO+4; - (void) setpri(pp); - pp->p_pri = pp->p_usrpri; - } + arg = p1->c_arg; func = p1->c_func; a = p1->c_time; + calltodo.c_next = p1->c_next; + p1->c_next = callfree; + callfree = p1; + splx(s); + (*func)(arg, a); } /* * If trapped user-mode, give it a profiling tick. @@ -455,33 +269,17 @@ softclock(pc, ps) } /* - * Timeout is called to arrange that - * fun(arg) is called in tim/hz seconds. - * An entry is linked into the callout - * structure. The time in each structure - * entry is the number of hz's more - * than the previous entry. - * In this way, decrementing the - * first entry has the effect of - * updating all entries. - * - * The panic is there because there is nothing - * intelligent to be done if an entry won't fit. + * Arrange that (*fun)(arg) is called in tim/hz seconds. */ timeout(fun, arg, tim) int (*fun)(); caddr_t arg; + int tim; { register struct callout *p1, *p2, *pnew; register int t; int s; -/* DEBUGGING CODE */ - int ttrstrt(); - - if (fun == ttrstrt && arg == 0) - panic("timeout ttrstr arg"); -/* END DEBUGGING CODE */ t = tim; s = spl7(); pnew = callfree; @@ -491,7 +289,8 @@ timeout(fun, arg, tim) pnew->c_arg = arg; pnew->c_func = fun; for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) - t -= p2->c_time; + if (p2->c_time > 0) + t -= p2->c_time; p1->c_next = pnew; pnew->c_next = p2; pnew->c_time = t; @@ -499,3 +298,61 @@ timeout(fun, arg, tim) p2->c_time -= t; splx(s); } + +/* + * untimeout is called to remove a function timeout call + * from the callout structure. + */ +untimeout(fun, arg) + int (*fun)(); + caddr_t arg; +{ + register struct callout *p1, *p2; + register int s; + + s = spl7(); + for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { + if (p2->c_func == fun && p2->c_arg == arg) { + if (p2->c_next && p2->c_time > 0) + p2->c_next->c_time += p2->c_time; + p1->c_next = p2->c_next; + p2->c_next = callfree; + callfree = p2; + break; + } + } + splx(s); +} + +/* + * Compute number of hz until specified time. + * Used to compute third argument to timeout() from an + * absolute time. + */ +hzto(tv) + struct timeval *tv; +{ + register long ticks; + register long sec; + int s = spl7(); + + /* + * If number of milliseconds will fit in 32 bit arithmetic, + * then compute number of milliseconds to time and scale to + * ticks. Otherwise just compute number of hz in time, rounding + * times greater than representible to maximum value. + * + * Delta times less than 25 days can be computed ``exactly''. + * Maximum value for any timeout in 10ms ticks is 250 days. + */ + sec = tv->tv_sec - time.tv_sec; + if (sec <= 0x7fffffff / 1000 - 1000) + ticks = ((tv->tv_sec - time.tv_sec) * 1000 + + (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); + else if (sec <= 0x7fffffff / hz) + ticks = sec * hz; + else + ticks = 0x7fffffff; + splx(s); + return (ticks); +}