cleaned up and re-enable compaction
[unix-history] / usr / src / sys / kern / kern_clock.c
index 84eef57..c94dc3c 100644 (file)
@@ -1,9 +1,9 @@
-/*     %H%     3.20    kern_clock.c    */
+/*     kern_clock.c    4.27    81/11/20        */
 
 #include "../h/param.h"
 #include "../h/systm.h"
 #include "../h/dk.h"
 
 #include "../h/param.h"
 #include "../h/systm.h"
 #include "../h/dk.h"
-#include "../h/callo.h"
+#include "../h/callout.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/vlimit.h"
 #include "../h/mtpr.h"
 #include "../h/clock.h"
 #include "../h/vlimit.h"
 #include "../h/mtpr.h"
 #include "../h/clock.h"
+#include "../h/cpu.h"
 
 
-#define        SCHMAG  9/10
+#include "bk.h"
+#include "dh.h"
+#include "dz.h"
 
 /*
 
 /*
- * Constant for decay filter for cpu usage.
- */
-double ccpu = 0.93550698503161773774;          /* exp(-1/15) */
-
-/*
- * Clock is called straight from
+ * Hardclock is called straight from
  * the real time clock interrupt.
  * the real time clock interrupt.
+ * We limit the work we do at real clock interrupt time to:
+ *     reloading clock
+ *     decrementing time to callouts
+ *     recording cpu time usage
+ *     modifying priority of current process
+ *     arrange for soft clock interrupt
+ *     kernel pc profiling
  *
  *
- * Functions:
+ * At software (softclock) interrupt time we:
  *     implement callouts
  *     implement callouts
- *     maintain user/system times
  *     maintain date
  *     maintain date
- *     profile
  *     lightning bolt wakeup (every second)
  *     alarm clock signals
  *     jab the scheduler
  *     lightning bolt wakeup (every second)
  *     alarm clock signals
  *     jab the scheduler
+ *
+ * On the vax softclock interrupts are implemented by
+ * software interrupts.  Note that we may have multiple softclock
+ * interrupts compressed into one (due to excessive interrupt load),
+ * but that hardclock interrupts should never be lost.
  */
 #ifdef KPROF
  */
 #ifdef KPROF
-unsigned short kcount[20000];
+int    kcounts[20000];
 #endif
 
 #endif
 
-/*
- * We handle regular calls to the dh and dz silo input processors
- * without using timeouts to save a little time.
- */
-int    rintvl = 0;             /* every 1/60'th of sec check receivers */
-int    rcnt;
-
-clock(pc, ps)
-caddr_t pc;
+/*ARGSUSED*/
+hardclock(pc, ps)
+       caddr_t pc;
 {
 {
-       register struct callo *p1, *p2;
+       register struct callout *p1;
        register struct proc *pp;
        register struct proc *pp;
-       register int s;
-       int a, cpstate;
+       register int s, cpstate;
 
        /*
         * reprime clock
 
        /*
         * reprime clock
@@ -62,65 +63,16 @@ caddr_t pc;
        clkreld();
 
        /*
        clkreld();
 
        /*
-        * callouts
-        * else update first non-zero time
+        * update callout times
         */
         */
-
-       if(callout[0].c_func == NULL)
-               goto out;
-       p2 = &callout[0];
-       while(p2->c_time<=0 && p2->c_func!=NULL)
-               p2++;
-       p2->c_time--;
-
-       /*
-        * if ps is high, just return
-        */
-       if (BASEPRI(ps))
-               goto out;
+       for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
+               ;
+       if (p1)
+               p1->c_time--;
 
        /*
 
        /*
-        * callout
+        * Maintain iostat and per-process cpu statistics
         */
         */
-
-       if(callout[0].c_time <= 0) {
-               p1 = &callout[0];
-               while(p1->c_func != 0 && p1->c_time <= 0) {
-                       (*p1->c_func)(p1->c_arg);
-                       p1++;
-               }
-               p2 = &callout[0];
-               while(p2->c_func = p1->c_func) {
-                       p2->c_time = p1->c_time;
-                       p2->c_arg = p1->c_arg;
-                       p1++;
-                       p2++;
-               }
-       }
-
-       /*
-        * lightning bolt time-out
-        * and time of day
-        */
-out:
-
-       /*
-        * In order to not take input character interrupts to use
-        * the input silo on DZ's we have to guarantee to echo
-        * characters regularly.  This means that we have to
-        * call the timer routines predictably.  Since blocking
-        * in these routines is at spl5(), we have to make spl5()
-        * really spl6() blocking off the clock to put this code
-        * here.  Note also that it is critical that we run spl5()
-        * (i.e. really spl6()) in the receiver interrupt routines
-        * so we can't enter them recursively and transpose characters.
-        */
-       if (rcnt >= rintvl) {
-               dhtimer();
-               dztimer();
-               rcnt = 0;
-       } else
-               rcnt++;
        if (!noproc) {
                s = u.u_procp->p_rssize;
                u.u_vm.vm_idsrss += s;
        if (!noproc) {
                s = u.u_procp->p_rssize;
                u.u_vm.vm_idsrss += s;
@@ -132,12 +84,15 @@ out:
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
-               if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/HZ > u.u_limit[LIM_CPU]) {
+               if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
                        psignal(u.u_procp, SIGXCPU);
                        if (u.u_limit[LIM_CPU] < INFINITY - 5)
                                u.u_limit[LIM_CPU] += 5;
                }
        }
                        psignal(u.u_procp, SIGXCPU);
                        if (u.u_limit[LIM_CPU] < INFINITY - 5)
                                u.u_limit[LIM_CPU] += 5;
                }
        }
+       /*
+        * Update iostat information.
+        */
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
@@ -145,85 +100,269 @@ out:
                else
                        cpstate = CP_USER;
        } else {
                else
                        cpstate = CP_USER;
        } else {
+#ifdef KPROF
+       int k = ((int)pc & 0x7fffffff) / 8;
+       if (k < 20000)
+               kcounts[k]++;
+#endif
                cpstate = CP_SYS;
                if (noproc)
                        cpstate = CP_IDLE;
                else
                        u.u_vm.vm_stime++;
        }
                cpstate = CP_SYS;
                if (noproc)
                        cpstate = CP_IDLE;
                else
                        u.u_vm.vm_stime++;
        }
-       dk_time[cpstate][dk_busy&(DK_NSTATES-1)]++;
+       cp_time[cpstate]++;
+       for (s = 0; s < DK_NDRIVE; s++)
+               if (dk_busy&(1<<s))
+                       dk_time[s]++;
+       /*
+        * Adjust priority of current process.
+        */
        if (!noproc) {
                pp = u.u_procp;
                pp->p_cpticks++;
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
        if (!noproc) {
                pp = u.u_procp;
                pp->p_cpticks++;
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
-               if(pp->p_cpu % 16 == 0) {
+               if(pp->p_cpu % 4 == 0) {
                        (void) setpri(pp);
                        if (pp->p_pri >= PUSER)
                                pp->p_pri = pp->p_usrpri;
                }
        }
                        (void) setpri(pp);
                        if (pp->p_pri >= PUSER)
                                pp->p_pri = pp->p_usrpri;
                }
        }
+       /*
+        * Time moves on.
+        */
        ++lbolt;
        ++lbolt;
-       if (lbolt % (HZ/4) == 0) {
+#if VAX780
+       /*
+        * On 780's, impelement a fast UBA watcher,
+        * to make sure uba's don't get stuck.
+        */
+       if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
+               unhang();
+#endif
+       /*
+        * Schedule a software interrupt for the rest
+        * of clock activities.
+        */
+       setsoftclock();
+}
+
+/*
+ * The digital decay cpu usage priority assignment is scaled to run in
+ * time as expanded by the 1 minute load average.  Each second we
+ * multiply the the previous cpu usage estimate by
+ *             nrscale*avenrun[0]
+ * The following relates the load average to the period over which
+ * cpu usage is 90% forgotten:
+ *     loadav 1         5 seconds
+ *     loadav 5        24 seconds
+ *     loadav 10       47 seconds
+ *     loadav 20       93 seconds
+ * This is a great improvement on the previous algorithm which
+ * decayed the priorities by a constant, and decayed away all knowledge
+ * of previous activity in about 20 seconds.  Under heavy load,
+ * the previous algorithm degenerated to round-robin with poor response
+ * time when there was a high load average.
+ */
+#undef ave
+#define        ave(a,b) ((int)(((int)(a*b))/(b+1)))
+int    nrscale = 2;
+double avenrun[];
+
+/*
+ * Constant for decay filter for cpu usage field
+ * in process table (used by ps au).
+ */
+double ccpu = 0.95122942450071400909;          /* exp(-1/20) */
+
+/*
+ * Software clock interrupt.
+ * This routine runs at lower priority than device interrupts.
+ */
+/*ARGSUSED*/
+softclock(pc, ps)
+       caddr_t pc;
+{
+       register struct callout *p1;
+       register struct proc *pp;
+       register int a, s;
+       caddr_t arg;
+       int (*func)();
+
+       /*
+        * Perform callouts (but not after panic's!)
+        */
+       if (panicstr == 0) {
+               for (;;) {
+                       s = spl7();
+                       if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
+                               splx(s);
+                               break;
+                       }
+                       calltodo.c_next = p1->c_next;
+                       arg = p1->c_arg;
+                       func = p1->c_func;
+                       p1->c_next = callfree;
+                       callfree = p1;
+                       (void) splx(s);
+                       (*func)(arg);
+               }
+       }
+
+       /*
+        * Drain silos.
+        */
+#if NDH > 0
+       s = spl5(); dhtimer(); splx(s);
+#endif
+#if NDZ > 0
+       s = spl5(); dztimer(); splx(s);
+#endif
+
+       /*
+        * If idling and processes are waiting to swap in,
+        * check on them.
+        */
+       if (noproc && runin) {
+               runin = 0;
+               wakeup((caddr_t)&runin);
+       }
+
+       /*
+        * Run paging daemon every 1/4 sec.
+        */
+       if (lbolt % (hz/4) == 0) {
                vmpago();
                vmpago();
+       }
+
+       /*
+        * Reschedule every 1/10 sec.
+        */
+       if (lbolt % (hz/10) == 0) {
                runrun++;
                runrun++;
+               aston();
        }
        }
-       if (lbolt >= HZ) {
-               extern int hangcnt;
 
 
+       /*
+        * Lightning bolt every second:
+        *      sleep timeouts
+        *      process priority recomputation
+        *      process %cpu averaging
+        *      virtual memory metering
+        *      kick swapper if processes want in
+        */
+       if (lbolt >= hz) {
+               /*
+                * This doesn't mean much on VAX since we run at
+                * software interrupt time... if hardclock()
+                * calls softclock() directly, it prevents
+                * this code from running when the priority
+                * was raised when the clock interrupt occurred.
+                */
                if (BASEPRI(ps))
                        return;
                if (BASEPRI(ps))
                        return;
-               lbolt -= HZ;
-               ++time;
-               (void) spl1();
+
                /*
                /*
-                * machdep.c:unhang uses hangcnt to make sure uba
-                * doesn't forget to interrupt (this has been observed).
-                * This prevents an accumulation of < 5 second uba failures
-                * from summing to a uba reset.
+                * If we didn't run a few times because of
+                * long blockage at high ipl, we don't
+                * really want to run this code several times,
+                * so squish out all multiples of hz here.
+                */
+               time += lbolt / hz;
+               lbolt %= hz;
+
+               /*
+                * Wakeup lightning bolt sleepers.
+                * Processes sleep on lbolt to wait
+                * for short amounts of time (e.g. 1 second).
                 */
                 */
-               if (hangcnt)
-                       hangcnt--;
-               runrun++;
                wakeup((caddr_t)&lbolt);
                wakeup((caddr_t)&lbolt);
-               for(pp = &proc[0]; pp < &proc[NPROC]; pp++)
+
+               /*
+                * Recompute process priority and process
+                * sleep() system calls as well as internal
+                * sleeps with timeouts (tsleep() kernel routine).
+                */
+               for (pp = proc; pp < procNPROC; pp++)
                if (pp->p_stat && pp->p_stat!=SZOMB) {
                if (pp->p_stat && pp->p_stat!=SZOMB) {
-                       if(pp->p_time != 127)
+                       /*
+                        * Increase resident time, to max of 127 seconds
+                        * (it is kept in a character.)  For
+                        * loaded processes this is time in core; for
+                        * swapped processes, this is time on drum.
+                        */
+                       if (pp->p_time != 127)
                                pp->p_time++;
                                pp->p_time++;
-                       if(pp->p_clktim)
-                               if(--pp->p_clktim == 0)
-                                       if (pp->p_flag & STIMO) {
-                                               s = spl6();
-                                               switch (pp->p_stat) {
+                       /*
+                        * If process has clock counting down, and it
+                        * expires, set it running (if this is a tsleep()),
+                        * or give it an SIGALRM (if the user process
+                        * is using alarm signals.
+                        */
+                       if (pp->p_clktim && --pp->p_clktim == 0)
+                               if (pp->p_flag & STIMO) {
+                                       s = spl6();
+                                       switch (pp->p_stat) {
 
 
-                                               case SSLEEP:
-                                                       setrun(pp);
-                                                       break;
+                                       case SSLEEP:
+                                               setrun(pp);
+                                               break;
 
 
-                                               case SSTOP:
-                                                       unsleep(pp);
-                                                       break;
-                                               }
-                                               pp->p_flag &= ~STIMO;
-                                               splx(s);
-                                       } else
-                                               psignal(pp, SIGALRM);
-                       if(pp->p_stat==SSLEEP||pp->p_stat==SSTOP)
+                                       case SSTOP:
+                                               unsleep(pp);
+                                               break;
+                                       }
+                                       pp->p_flag &= ~STIMO;
+                                       splx(s);
+                               } else
+                                       psignal(pp, SIGALRM);
+                       /*
+                        * If process is blocked, increment computed
+                        * time blocked.  This is used in swap scheduling.
+                        */
+                       if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
+                       /*
+                        * Update digital filter estimation of process
+                        * cpu utilization for loaded processes.
+                        */
                        if (pp->p_flag&SLOAD)
                                pp->p_pctcpu = ccpu * pp->p_pctcpu +
                        if (pp->p_flag&SLOAD)
                                pp->p_pctcpu = ccpu * pp->p_pctcpu +
-                                   (1.0 - ccpu) * (pp->p_cpticks/(float)HZ);
+                                   (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
+                       /*
+                        * Recompute process priority.  The number p_cpu
+                        * is a weighted estimate of cpu time consumed.
+                        * A process which consumes cpu time has this
+                        * increase regularly.  We here decrease it by
+                        * a fraction based on load average giving a digital
+                        * decay filter which damps out in about 5 seconds
+                        * when seconds are measured in time expanded by the
+                        * load average.
+                        *
+                        * If a process is niced, then the nice directly
+                        * affects the new priority.  The final priority
+                        * is in the range 0 to 255, to fit in a character.
+                        */
                        pp->p_cpticks = 0;
                        pp->p_cpticks = 0;
-                       a = (pp->p_cpu & 0377)*SCHMAG + pp->p_nice - NZERO;
-                       if(a < 0)
+                       a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
+                            pp->p_nice - NZERO;
+                       if (a < 0)
                                a = 0;
                                a = 0;
-                       if(a > 255)
+                       if (a > 255)
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
+                       /*
+                        * Now have computed new process priority
+                        * in p->p_usrpri.  Carefully change p->p_pri.
+                        * A process is on a run queue associated with
+                        * this priority, so we must block out process
+                        * state changes during the transition.
+                        */
                        s = spl6();
                        s = spl6();
-                       if(pp->p_pri >= PUSER) {
+                       if (pp->p_pri >= PUSER) {
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
@@ -236,11 +375,22 @@ out:
                        }
                        splx(s);
                }
                        }
                        splx(s);
                }
+
+               /*
+                * Perform virtual memory metering.
+                */
                vmmeter();
                vmmeter();
-               if(runin!=0) {
+
+               /*
+                * If the swap process is trying to bring
+                * a process in, have it look again to see
+                * if it is possible now.
+                */
+               if (runin!=0) {
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
+
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
@@ -250,44 +400,38 @@ out:
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
+
+               /*
+                * If the trap occurred from usermode,
+                * then check to see if it has now been
+                * running more than 10 minutes of user time
+                * and should thus run with reduced priority
+                * to give other processes a chance.
+                */
                if (USERMODE(ps)) {
                        pp = u.u_procp;
                if (USERMODE(ps)) {
                        pp = u.u_procp;
-#ifdef ERNIE
-                       if (pp->p_uid)
-                               if (pp->p_nice == NZERO && u.u_vm.vm_utime > 600 * HZ)
-                                       pp->p_nice = NZERO+4;
+                       if (pp->p_uid && pp->p_nice == NZERO &&
+                           u.u_vm.vm_utime > 600 * hz)
+                               pp->p_nice = NZERO+4;
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
-#endif
                }
        }
                }
        }
-       if (!BASEPRI(ps))
-               unhang();
-       if (USERMODE(ps)) {
-               /*
-                * We do this last since it
-                * may block on a page fault in user space.
-                */
-               if (u.u_prof.pr_scale)
-                       addupc(pc, &u.u_prof, 1);
-       }
-#ifdef KPROF
-       else if (!noproc) {
-               register int indx = ((int)pc & 0x7fffffff) / 4;
-
-               if (indx >= 0 && indx < 20000)
-                       if (++kcount[indx] == 0)
-                               --kcount[indx];
+       /*
+        * If trapped user-mode, give it a profiling tick.
+        */
+       if (USERMODE(ps) && u.u_prof.pr_scale) {
+               u.u_procp->p_flag |= SOWEUPC;
+               aston();
        }
        }
-#endif
 }
 
 /*
 }
 
 /*
- * timeout is called to arrange that
- * fun(arg) is called in tim/HZ seconds.
- * An entry is sorted into the callout
- * structure. The time in each structure
- * entry is the number of HZ's more
+ * Timeout is called to arrange that
+ * fun(arg) is called in tim/hz seconds.
+ * An entry is linked into the callout
+ * structure.  The time in each structure
+ * entry is the number of hz's more
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
@@ -297,34 +441,33 @@ out:
  * intelligent to be done if an entry won't fit.
  */
 timeout(fun, arg, tim)
  * intelligent to be done if an entry won't fit.
  */
 timeout(fun, arg, tim)
-int (*fun)();
-caddr_t arg;
+       int (*fun)();
+       caddr_t arg;
 {
 {
-       register struct callo *p1, *p2;
+       register struct callout *p1, *p2, *pnew;
        register int t;
        int s;
 
        register int t;
        int s;
 
+/* DEBUGGING CODE */
+       int ttrstrt();
+
+       if (fun == ttrstrt && arg == 0)
+               panic("timeout ttrstr arg");
+/* END DEBUGGING CODE */
        t = tim;
        t = tim;
-       p1 = &callout[0];
        s = spl7();
        s = spl7();
-       while(p1->c_func != 0 && p1->c_time <= t) {
-               t -= p1->c_time;
-               p1++;
-       }
-       if (p1 >= &callout[NCALL-1])
-               panic("Timeout table overflow");
-       p1->c_time -= t;
-       p2 = p1;
-       while(p2->c_func != 0)
-               p2++;
-       while(p2 >= p1) {
-               (p2+1)->c_time = p2->c_time;
-               (p2+1)->c_func = p2->c_func;
-               (p2+1)->c_arg = p2->c_arg;
-               p2--;
-       }
-       p1->c_time = t;
-       p1->c_func = fun;
-       p1->c_arg = arg;
+       pnew = callfree;
+       if (pnew == NULL)
+               panic("timeout table overflow");
+       callfree = pnew->c_next;
+       pnew->c_arg = arg;
+       pnew->c_func = fun;
+       for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
+               t -= p2->c_time;
+       p1->c_next = pnew;
+       pnew->c_next = p2;
+       pnew->c_time = t;
+       if (p2)
+               p2->c_time -= t;
        splx(s);
 }
        splx(s);
 }