reformat messages
[unix-history] / usr / src / sys / kern / kern_clock.c
index ba31178..a483580 100644 (file)
@@ -1,8 +1,9 @@
-/*     kern_clock.c    3.7     %H%     */
+/*     kern_clock.c    4.15    %G%     */
 
 #include "../h/param.h"
 #include "../h/systm.h"
 
 #include "../h/param.h"
 #include "../h/systm.h"
-#include "../h/callo.h"
+#include "../h/dk.h"
+#include "../h/callout.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/vm.h"
 #include "../h/buf.h"
 #include "../h/text.h"
 #include "../h/vm.h"
 #include "../h/buf.h"
 #include "../h/text.h"
+#include "../h/vlimit.h"
+#include "../h/mtpr.h"
+#include "../h/clock.h"
+#include "../h/cpu.h"
+
+#include "dh.h"
+#include "dz.h"
 
 #define        SCHMAG  9/10
 
 
 /*
 
 #define        SCHMAG  9/10
 
 
 /*
- * clock is called straight from
+ * Hardclock is called straight from
  * the real time clock interrupt.
  * the real time clock interrupt.
+ * We limit the work we do at real clock interrupt time to:
+ *     reloading clock
+ *     decrementing time to callouts
+ *     recording cpu time usage
+ *     modifying priority of current process
+ *     arrange for soft clock interrupt
+ *     kernel pc profiling
  *
  *
- * Functions:
+ * At softclock interrupt time we:
  *     implement callouts
  *     implement callouts
- *     maintain user/system times
  *     maintain date
  *     maintain date
- *     profile
  *     lightning bolt wakeup (every second)
  *     alarm clock signals
  *     jab the scheduler
  *     lightning bolt wakeup (every second)
  *     alarm clock signals
  *     jab the scheduler
+ *
+ * On the vax softclock interrupts are implemented by
+ * software interrupts.  Note that we may have multiple softclock
+ * interrupts compressed into one (due to excessive interrupt load),
+ * but that hardclock interrupts should never be lost.
  */
  */
-#ifdef KPROF
-unsigned short kcount[20000];
-#endif
-
-/*
- * We handle regular calls to the dh and dz silo input processors
- * without using timeouts to save a little time.
- */
-int    rintvl = 0;             /* every 1/60'th of sec check receivers */
-int    rcnt;
 
 
-clock(pc, ps)
-caddr_t pc;
+/*ARGSUSED*/
+hardclock(pc, ps)
+       caddr_t pc;
 {
 {
-       register struct callo *p1, *p2;
+       register struct callout *p1;
        register struct proc *pp;
        register struct proc *pp;
-       register int s;
-       int a;
+       register int s, cpstate;
 
        /*
         * reprime clock
 
        /*
         * reprime clock
@@ -54,65 +62,19 @@ caddr_t pc;
        clkreld();
 
        /*
        clkreld();
 
        /*
-        * callouts
-        * else update first non-zero time
+        * update callout times
         */
         */
-
        if(callout[0].c_func == NULL)
                goto out;
        if(callout[0].c_func == NULL)
                goto out;
-       p2 = &callout[0];
-       while(p2->c_time<=0 && p2->c_func!=NULL)
-               p2++;
-       p2->c_time--;
-
-       /*
-        * if ps is high, just return
-        */
-       if (BASEPRI(ps))
-               goto out;
-
-       /*
-        * callout
-        */
-
-       if(callout[0].c_time <= 0) {
-               p1 = &callout[0];
-               while(p1->c_func != 0 && p1->c_time <= 0) {
-                       (*p1->c_func)(p1->c_arg);
-                       p1++;
-               }
-               p2 = &callout[0];
-               while(p2->c_func = p1->c_func) {
-                       p2->c_time = p1->c_time;
-                       p2->c_arg = p1->c_arg;
-                       p1++;
-                       p2++;
-               }
-       }
-
-       /*
-        * lightning bolt time-out
-        * and time of day
-        */
+       p1 = &callout[0];
+       while(p1->c_time<=0 && p1->c_func!=NULL)
+               p1++;
+       p1->c_time--;
 out:
 
        /*
 out:
 
        /*
-        * In order to not take input character interrupts to use
-        * the input silo on DZ's we have to guarantee to echo
-        * characters regularly.  This means that we have to
-        * call the timer routines predictably.  Since blocking
-        * in these routines is at spl5(), we have to make spl5()
-        * really spl6() blocking off the clock to put this code
-        * here.  Note also that it is critical that we run spl5()
-        * (i.e. really spl6()) in the receiver interrupt routines
-        * so we can't enter them recursively and transpose characters.
+        * Maintain iostat and per-process cpu statistics
         */
         */
-       if (rcnt >= rintvl) {
-               dhtimer();
-               dztimer();
-               rcnt = 0;
-       } else
-               rcnt++;
        if (!noproc) {
                s = u.u_procp->p_rssize;
                u.u_vm.vm_idsrss += s;
        if (!noproc) {
                s = u.u_procp->p_rssize;
                u.u_vm.vm_idsrss += s;
@@ -124,22 +86,32 @@ out:
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
+               if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
+                       psignal(u.u_procp, SIGXCPU);
+                       if (u.u_limit[LIM_CPU] < INFINITY - 5)
+                               u.u_limit[LIM_CPU] += 5;
+               }
        }
        }
-       a = dk_busy&07;
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
-                       a += 8;
+                       cpstate = CP_NICE;
+               else
+                       cpstate = CP_USER;
        } else {
        } else {
-               a += 16;
+               cpstate = CP_SYS;
                if (noproc)
                if (noproc)
-                       a += 8;
+                       cpstate = CP_IDLE;
                else
                        u.u_vm.vm_stime++;
        }
                else
                        u.u_vm.vm_stime++;
        }
-       dk_time[a]++;
+       cp_time[cpstate]++;
+       for (s = 0; s < DK_NDRIVE; s++)
+               if (dk_busy&(1<<s))
+                       dk_time[s]++;
        if (!noproc) {
                pp = u.u_procp;
        if (!noproc) {
                pp = u.u_procp;
+               pp->p_cpticks++;
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
                if(pp->p_cpu % 16 == 0) {
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
                if(pp->p_cpu % 16 == 0) {
@@ -149,48 +121,193 @@ out:
                }
        }
        ++lbolt;
                }
        }
        ++lbolt;
-       if (lbolt % (HZ/4) == 0) {
+#if VAX780
+       if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
+               unhang();
+#endif
+       setsoftclock();
+}
+
+/*
+ * Constant for decay filter for cpu usage.
+ */
+double ccpu = 0.95122942450071400909;          /* exp(-1/20) */
+
+/*
+ * Software clock interrupt.
+ * This routine is blocked by spl1(),
+ * which doesn't block device interrupts!
+ */
+/*ARGSUSED*/
+softclock(pc, ps)
+       caddr_t pc;
+{
+       register struct callout *p1, *p2;
+       register struct proc *pp;
+       register int a, s;
+
+       /*
+        * Perform callouts (but not after panic's!)
+        */
+       if (panicstr == 0 && callout[0].c_time <= 0) {
+               p1 = &callout[0];
+               while (p1->c_func != 0 && p1->c_time <= 0) {
+                       (*p1->c_func)(p1->c_arg);
+                       p1++;
+               }
+               p2 = &callout[0];
+               while (p2->c_func = p1->c_func) {
+                       p2->c_time = p1->c_time;
+                       p2->c_arg = p1->c_arg;
+                       p1++;
+                       p2++;
+               }
+       }
+
+       /*
+        * Drain silos.
+        */
+#if NDH > 0
+       s = spl5(); dhtimer(); splx(s);
+#endif
+#if NDZ > 0
+       s = spl5(); dztimer(); splx(s);
+#endif
+
+       /*
+        * If idling and processes are waiting to swap in,
+        * check on them.
+        */
+       if (noproc && runin) {
+               runin = 0;
+               wakeup((caddr_t)&runin);
+       }
+
+       /*
+        * Run paging daemon and reschedule every 1/4 sec.
+        */
+       if (lbolt % (hz/4) == 0) {
                vmpago();
                runrun++;
                vmpago();
                runrun++;
+               aston();
        }
        }
-       if (lbolt >= HZ) {
+
+       /*
+        * Lightning bolt every second:
+        *      sleep timeouts
+        *      process priority recomputation
+        *      process %cpu averaging
+        *      virtual memory metering
+        *      kick swapper if processes want in
+        */
+       if (lbolt >= hz) {
+               /*
+                * This doesn't mean much since we run at
+                * software interrupt time... if hardclock()
+                * calls softclock() directly, it prevents
+                * this code from running when the priority
+                * was raised when the clock interrupt occurred.
+                */
                if (BASEPRI(ps))
                        return;
                if (BASEPRI(ps))
                        return;
-               lbolt -= HZ;
-               ++time;
-               (void) spl1();
-               runrun++;
+
+               /*
+                * If we didn't run a few times because of
+                * long blockage at high ipl, we don't
+                * really want to run this code several times,
+                * so squish out all multiples of hz here.
+                */
+               time += lbolt / hz;
+               lbolt %= hz;
+
+               /*
+                * Wakeup lightning bolt sleepers.
+                * Processes sleep on lbolt to wait
+                * for short amounts of time (e.g. 1 second).
+                */
                wakeup((caddr_t)&lbolt);
                wakeup((caddr_t)&lbolt);
-               for(pp = &proc[0]; pp < &proc[NPROC]; pp++)
-               if (pp->p_stat && pp->p_stat<SZOMB) {
-                       if(pp->p_time != 127)
+
+               /*
+                * Recompute process priority and process
+                * sleep() system calls as well as internal
+                * sleeps with timeouts (tsleep() kernel routine).
+                */
+               for (pp = proc; pp < procNPROC; pp++)
+               if (pp->p_stat && pp->p_stat!=SZOMB) {
+                       /*
+                        * Increase resident time, to max of 127 seconds
+                        * (it is kept in a character.)  For
+                        * loaded processes this is time in core; for
+                        * swapped processes, this is time on drum.
+                        */
+                       if (pp->p_time != 127)
                                pp->p_time++;
                                pp->p_time++;
-                       if(pp->p_clktim)
-                               if(--pp->p_clktim == 0)
-                                       if (pp->p_flag & STIMO) {
-                                               s = spl6();
-                                               if (pp->p_stat == SSLEEP)
-                                                       setrun(pp);
-                                               pp->p_flag &= ~STIMO;
-                                               splx(s);
-                                       } else
-                                               psignal(pp, SIGCLK);
-                       if(pp->p_stat==SSLEEP||pp->p_stat==SSTOP)
+                       /*
+                        * If process has clock counting down, and it
+                        * expires, set it running (if this is a tsleep()),
+                        * or give it an SIGALRM (if the user process
+                        * is using alarm signals.
+                        */
+                       if (pp->p_clktim && --pp->p_clktim == 0)
+                               if (pp->p_flag & STIMO) {
+                                       s = spl6();
+                                       switch (pp->p_stat) {
+
+                                       case SSLEEP:
+                                               setrun(pp);
+                                               break;
+
+                                       case SSTOP:
+                                               unsleep(pp);
+                                               break;
+                                       }
+                                       pp->p_flag &= ~STIMO;
+                                       splx(s);
+                               } else
+                                       psignal(pp, SIGALRM);
+                       /*
+                        * If process is blocked, increment computed
+                        * time blocked.  This is used in swap scheduling.
+                        */
+                       if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
-                       if(pp->p_flag&SLOAD) {
-                               ave(pp->p_aveflt, pp->p_faults, 5);
-                               pp->p_faults = 0;
-                       }
+                       /*
+                        * Update digital filter estimation of process
+                        * cpu utilization for loaded processes.
+                        */
+                       if (pp->p_flag&SLOAD)
+                               pp->p_pctcpu = ccpu * pp->p_pctcpu +
+                                   (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
+                       /*
+                        * Recompute process priority.  The number p_cpu
+                        * is a weighted estimate of cpu time consumed.
+                        * A process which consumes cpu time has this
+                        * increase regularly.  We here decrease it by
+                        * a fraction (SCHMAG is 90%), giving a digital
+                        * decay filter which damps out in about 10 seconds.
+                        *
+                        * If a process is niced, then the nice directly
+                        * affects the new priority.  The final priority
+                        * is in the range 0 to 255, to fit in a character.
+                        */
+                       pp->p_cpticks = 0;
                        a = (pp->p_cpu & 0377)*SCHMAG + pp->p_nice - NZERO;
                        a = (pp->p_cpu & 0377)*SCHMAG + pp->p_nice - NZERO;
-                       if(a < 0)
+                       if (a < 0)
                                a = 0;
                                a = 0;
-                       if(a > 255)
+                       if (a > 255)
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
+                       /*
+                        * Now have computed new process priority
+                        * in p->p_usrpri.  Carefully change p->p_pri.
+                        * A process is on a run queue associated with
+                        * this priority, so we must block out process
+                        * state changes during the transition.
+                        */
                        s = spl6();
                        s = spl6();
-                       if(pp->p_pri >= PUSER) {
+                       if (pp->p_pri >= PUSER) {
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
@@ -203,11 +320,22 @@ out:
                        }
                        splx(s);
                }
                        }
                        splx(s);
                }
+
+               /*
+                * Perform virtual memory metering.
+                */
                vmmeter();
                vmmeter();
-               if(runin!=0) {
+
+               /*
+                * If the swap process is trying to bring
+                * a process in, have it look again to see
+                * if it is possible now.
+                */
+               if (runin!=0) {
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
+
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
@@ -217,42 +345,38 @@ out:
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
-#ifdef ERNIE
+
+               /*
+                * If the trap occurred from usermode,
+                * then check to see if it has now been
+                * running more than 10 minutes of user time
+                * and should thus run with reduced priority
+                * to give other processes a chance.
+                */
                if (USERMODE(ps)) {
                        pp = u.u_procp;
                if (USERMODE(ps)) {
                        pp = u.u_procp;
-                       if (pp->p_uid)
-                               if (pp->p_nice == NZERO && u.u_vm.vm_utime > 600 * HZ)
-                                       pp->p_nice = NZERO+4;
+                       if (pp->p_uid && pp->p_nice == NZERO &&
+                           u.u_vm.vm_utime > 600 * hz)
+                               pp->p_nice = NZERO+4;
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
                }
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
                }
-#endif
        }
        }
-       if (USERMODE(ps)) {
-               /*
-                * We do this last since it
-                * may block on a page fault in user space.
-                */
-               if (u.u_prof.pr_scale)
-                       addupc(pc, &u.u_prof, 1);
-       }
-#ifdef KPROF
-       else if (!noproc) {
-               register int indx = ((int)pc & 0x7fffffff) / 4;
-
-               if (indx >= 0 && indx < 20000)
-                       if (++kcount[indx] == 0)
-                               --kcount[indx];
+       /*
+        * If trapped user-mode, give it a profiling tick.
+        */
+       if (USERMODE(ps) && u.u_prof.pr_scale) {
+               u.u_procp->p_flag |= SOWEUPC;
+               aston();
        }
        }
-#endif
 }
 
 /*
  * timeout is called to arrange that
 }
 
 /*
  * timeout is called to arrange that
- * fun(arg) is called in tim/HZ seconds.
+ * fun(arg) is called in tim/hz seconds.
  * An entry is sorted into the callout
  * structure. The time in each structure
  * An entry is sorted into the callout
  * structure. The time in each structure
- * entry is the number of HZ's more
+ * entry is the number of hz's more
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
@@ -262,10 +386,10 @@ out:
  * intelligent to be done if an entry won't fit.
  */
 timeout(fun, arg, tim)
  * intelligent to be done if an entry won't fit.
  */
 timeout(fun, arg, tim)
-int (*fun)();
-caddr_t arg;
+       int (*fun)();
+       caddr_t arg;
 {
 {
-       register struct callo *p1, *p2;
+       register struct callout *p1, *p2, *p3;
        register int t;
        int s;
 
        register int t;
        int s;
 
@@ -276,12 +400,14 @@ caddr_t arg;
                t -= p1->c_time;
                p1++;
        }
                t -= p1->c_time;
                p1++;
        }
-       if (p1 >= &callout[NCALL-1])
-               panic("Timeout table overflow");
        p1->c_time -= t;
        p2 = p1;
        p1->c_time -= t;
        p2 = p1;
-       while(p2->c_func != 0)
+       p3 = callout+(ncallout-2);
+       while(p2->c_func != 0) {
+               if (p2 >= p3)
+                       panic("timeout");
                p2++;
                p2++;
+       }
        while(p2 >= p1) {
                (p2+1)->c_time = p2->c_time;
                (p2+1)->c_func = p2->c_func;
        while(p2 >= p1) {
                (p2+1)->c_time = p2->c_time;
                (p2+1)->c_func = p2->c_func;