cleaned up and re-enable compaction
[unix-history] / usr / src / sys / kern / kern_clock.c
index c54911d..c94dc3c 100644 (file)
@@ -1,9 +1,9 @@
-/*     kern_clock.c    4.9     %G%     */
+/*     kern_clock.c    4.27    81/11/20        */
 
 #include "../h/param.h"
 #include "../h/systm.h"
 #include "../h/dk.h"
 
 #include "../h/param.h"
 #include "../h/systm.h"
 #include "../h/dk.h"
-#include "../h/callo.h"
+#include "../h/callout.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/seg.h"
 #include "../h/dir.h"
 #include "../h/user.h"
 #include "../h/vlimit.h"
 #include "../h/mtpr.h"
 #include "../h/clock.h"
 #include "../h/vlimit.h"
 #include "../h/mtpr.h"
 #include "../h/clock.h"
+#include "../h/cpu.h"
 
 
+#include "bk.h"
 #include "dh.h"
 #include "dz.h"
 
 #include "dh.h"
 #include "dz.h"
 
-#define        SCHMAG  9/10
-
-
 /*
  * Hardclock is called straight from
  * the real time clock interrupt.
 /*
  * Hardclock is called straight from
  * the real time clock interrupt.
@@ -34,7 +33,7 @@
  *     arrange for soft clock interrupt
  *     kernel pc profiling
  *
  *     arrange for soft clock interrupt
  *     kernel pc profiling
  *
- * At softclock interrupt time we:
+ * At software (softclock) interrupt time we:
  *     implement callouts
  *     maintain date
  *     lightning bolt wakeup (every second)
  *     implement callouts
  *     maintain date
  *     lightning bolt wakeup (every second)
  * interrupts compressed into one (due to excessive interrupt load),
  * but that hardclock interrupts should never be lost.
  */
  * interrupts compressed into one (due to excessive interrupt load),
  * but that hardclock interrupts should never be lost.
  */
+#ifdef KPROF
+int    kcounts[20000];
+#endif
 
 /*ARGSUSED*/
 hardclock(pc, ps)
        caddr_t pc;
 {
 
 /*ARGSUSED*/
 hardclock(pc, ps)
        caddr_t pc;
 {
-       register struct callo *p1;
+       register struct callout *p1;
        register struct proc *pp;
        register int s, cpstate;
 
        register struct proc *pp;
        register int s, cpstate;
 
@@ -63,13 +65,10 @@ hardclock(pc, ps)
        /*
         * update callout times
         */
        /*
         * update callout times
         */
-       if(callout[0].c_func == NULL)
-               goto out;
-       p1 = &callout[0];
-       while(p1->c_time<=0 && p1->c_func!=NULL)
-               p1++;
-       p1->c_time--;
-out:
+       for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
+               ;
+       if (p1)
+               p1->c_time--;
 
        /*
         * Maintain iostat and per-process cpu statistics
 
        /*
         * Maintain iostat and per-process cpu statistics
@@ -85,12 +84,15 @@ out:
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
                }
                if (s > u.u_vm.vm_maxrss)
                        u.u_vm.vm_maxrss = s;
-               if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/HZ > u.u_limit[LIM_CPU]) {
+               if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
                        psignal(u.u_procp, SIGXCPU);
                        if (u.u_limit[LIM_CPU] < INFINITY - 5)
                                u.u_limit[LIM_CPU] += 5;
                }
        }
                        psignal(u.u_procp, SIGXCPU);
                        if (u.u_limit[LIM_CPU] < INFINITY - 5)
                                u.u_limit[LIM_CPU] += 5;
                }
        }
+       /*
+        * Update iostat information.
+        */
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
        if (USERMODE(ps)) {
                u.u_vm.vm_utime++;
                if(u.u_procp->p_nice > NZERO)
@@ -98,6 +100,11 @@ out:
                else
                        cpstate = CP_USER;
        } else {
                else
                        cpstate = CP_USER;
        } else {
+#ifdef KPROF
+       int k = ((int)pc & 0x7fffffff) / 8;
+       if (k < 20000)
+               kcounts[k]++;
+#endif
                cpstate = CP_SYS;
                if (noproc)
                        cpstate = CP_IDLE;
                cpstate = CP_SYS;
                if (noproc)
                        cpstate = CP_IDLE;
@@ -108,68 +115,108 @@ out:
        for (s = 0; s < DK_NDRIVE; s++)
                if (dk_busy&(1<<s))
                        dk_time[s]++;
        for (s = 0; s < DK_NDRIVE; s++)
                if (dk_busy&(1<<s))
                        dk_time[s]++;
+       /*
+        * Adjust priority of current process.
+        */
        if (!noproc) {
                pp = u.u_procp;
                pp->p_cpticks++;
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
        if (!noproc) {
                pp = u.u_procp;
                pp->p_cpticks++;
                if(++pp->p_cpu == 0)
                        pp->p_cpu--;
-               if(pp->p_cpu % 16 == 0) {
+               if(pp->p_cpu % 4 == 0) {
                        (void) setpri(pp);
                        if (pp->p_pri >= PUSER)
                                pp->p_pri = pp->p_usrpri;
                }
        }
                        (void) setpri(pp);
                        if (pp->p_pri >= PUSER)
                                pp->p_pri = pp->p_usrpri;
                }
        }
+       /*
+        * Time moves on.
+        */
        ++lbolt;
        ++lbolt;
-#if VAX==780
-       if (!BASEPRI(ps))
+#if VAX780
+       /*
+        * On 780's, impelement a fast UBA watcher,
+        * to make sure uba's don't get stuck.
+        */
+       if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
                unhang();
 #endif
                unhang();
 #endif
+       /*
+        * Schedule a software interrupt for the rest
+        * of clock activities.
+        */
        setsoftclock();
 }
 
 /*
        setsoftclock();
 }
 
 /*
- * Constant for decay filter for cpu usage.
+ * The digital decay cpu usage priority assignment is scaled to run in
+ * time as expanded by the 1 minute load average.  Each second we
+ * multiply the the previous cpu usage estimate by
+ *             nrscale*avenrun[0]
+ * The following relates the load average to the period over which
+ * cpu usage is 90% forgotten:
+ *     loadav 1         5 seconds
+ *     loadav 5        24 seconds
+ *     loadav 10       47 seconds
+ *     loadav 20       93 seconds
+ * This is a great improvement on the previous algorithm which
+ * decayed the priorities by a constant, and decayed away all knowledge
+ * of previous activity in about 20 seconds.  Under heavy load,
+ * the previous algorithm degenerated to round-robin with poor response
+ * time when there was a high load average.
+ */
+#undef ave
+#define        ave(a,b) ((int)(((int)(a*b))/(b+1)))
+int    nrscale = 2;
+double avenrun[];
+
+/*
+ * Constant for decay filter for cpu usage field
+ * in process table (used by ps au).
  */
 double ccpu = 0.95122942450071400909;          /* exp(-1/20) */
 
 /*
  * Software clock interrupt.
  */
 double ccpu = 0.95122942450071400909;          /* exp(-1/20) */
 
 /*
  * Software clock interrupt.
- * This routine is blocked by spl1(),
- * which doesn't block device interrupts!
+ * This routine runs at lower priority than device interrupts.
  */
 /*ARGSUSED*/
 softclock(pc, ps)
        caddr_t pc;
 {
  */
 /*ARGSUSED*/
 softclock(pc, ps)
        caddr_t pc;
 {
-       register struct callo *p1, *p2;
+       register struct callout *p1;
        register struct proc *pp;
        register int a, s;
        register struct proc *pp;
        register int a, s;
+       caddr_t arg;
+       int (*func)();
 
        /*
 
        /*
-        * callout
+        * Perform callouts (but not after panic's!)
         */
         */
-       if(callout[0].c_time <= 0) {
-               p1 = &callout[0];
-               while(p1->c_func != 0 && p1->c_time <= 0) {
-                       (*p1->c_func)(p1->c_arg);
-                       p1++;
-               }
-               p2 = &callout[0];
-               while(p2->c_func = p1->c_func) {
-                       p2->c_time = p1->c_time;
-                       p2->c_arg = p1->c_arg;
-                       p1++;
-                       p2++;
+       if (panicstr == 0) {
+               for (;;) {
+                       s = spl7();
+                       if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
+                               splx(s);
+                               break;
+                       }
+                       calltodo.c_next = p1->c_next;
+                       arg = p1->c_arg;
+                       func = p1->c_func;
+                       p1->c_next = callfree;
+                       callfree = p1;
+                       (void) splx(s);
+                       (*func)(arg);
                }
        }
 
        /*
         * Drain silos.
         */
                }
        }
 
        /*
         * Drain silos.
         */
-#if NDH11 > 0
+#if NDH > 0
        s = spl5(); dhtimer(); splx(s);
 #endif
        s = spl5(); dhtimer(); splx(s);
 #endif
-#if NDZ11 > 0
+#if NDZ > 0
        s = spl5(); dztimer(); splx(s);
 #endif
 
        s = spl5(); dztimer(); splx(s);
 #endif
 
@@ -183,10 +230,16 @@ softclock(pc, ps)
        }
 
        /*
        }
 
        /*
-        * Run paging daemon and reschedule every 1/4 sec.
+        * Run paging daemon every 1/4 sec.
         */
         */
-       if (lbolt % (HZ/4) == 0) {
+       if (lbolt % (hz/4) == 0) {
                vmpago();
                vmpago();
+       }
+
+       /*
+        * Reschedule every 1/10 sec.
+        */
+       if (lbolt % (hz/10) == 0) {
                runrun++;
                aston();
        }
                runrun++;
                aston();
        }
@@ -199,50 +252,117 @@ softclock(pc, ps)
         *      virtual memory metering
         *      kick swapper if processes want in
         */
         *      virtual memory metering
         *      kick swapper if processes want in
         */
-       if (lbolt >= HZ) {
+       if (lbolt >= hz) {
+               /*
+                * This doesn't mean much on VAX since we run at
+                * software interrupt time... if hardclock()
+                * calls softclock() directly, it prevents
+                * this code from running when the priority
+                * was raised when the clock interrupt occurred.
+                */
                if (BASEPRI(ps))
                        return;
                if (BASEPRI(ps))
                        return;
-               lbolt -= HZ;
-               ++time;
+
+               /*
+                * If we didn't run a few times because of
+                * long blockage at high ipl, we don't
+                * really want to run this code several times,
+                * so squish out all multiples of hz here.
+                */
+               time += lbolt / hz;
+               lbolt %= hz;
+
+               /*
+                * Wakeup lightning bolt sleepers.
+                * Processes sleep on lbolt to wait
+                * for short amounts of time (e.g. 1 second).
+                */
                wakeup((caddr_t)&lbolt);
                wakeup((caddr_t)&lbolt);
-               for(pp = &proc[0]; pp < &proc[NPROC]; pp++)
+
+               /*
+                * Recompute process priority and process
+                * sleep() system calls as well as internal
+                * sleeps with timeouts (tsleep() kernel routine).
+                */
+               for (pp = proc; pp < procNPROC; pp++)
                if (pp->p_stat && pp->p_stat!=SZOMB) {
                if (pp->p_stat && pp->p_stat!=SZOMB) {
-                       if(pp->p_time != 127)
+                       /*
+                        * Increase resident time, to max of 127 seconds
+                        * (it is kept in a character.)  For
+                        * loaded processes this is time in core; for
+                        * swapped processes, this is time on drum.
+                        */
+                       if (pp->p_time != 127)
                                pp->p_time++;
                                pp->p_time++;
-                       if(pp->p_clktim)
-                               if(--pp->p_clktim == 0)
-                                       if (pp->p_flag & STIMO) {
-                                               s = spl6();
-                                               switch (pp->p_stat) {
+                       /*
+                        * If process has clock counting down, and it
+                        * expires, set it running (if this is a tsleep()),
+                        * or give it an SIGALRM (if the user process
+                        * is using alarm signals.
+                        */
+                       if (pp->p_clktim && --pp->p_clktim == 0)
+                               if (pp->p_flag & STIMO) {
+                                       s = spl6();
+                                       switch (pp->p_stat) {
 
 
-                                               case SSLEEP:
-                                                       setrun(pp);
-                                                       break;
+                                       case SSLEEP:
+                                               setrun(pp);
+                                               break;
 
 
-                                               case SSTOP:
-                                                       unsleep(pp);
-                                                       break;
-                                               }
-                                               pp->p_flag &= ~STIMO;
-                                               splx(s);
-                                       } else
-                                               psignal(pp, SIGALRM);
-                       if(pp->p_stat==SSLEEP||pp->p_stat==SSTOP)
+                                       case SSTOP:
+                                               unsleep(pp);
+                                               break;
+                                       }
+                                       pp->p_flag &= ~STIMO;
+                                       splx(s);
+                               } else
+                                       psignal(pp, SIGALRM);
+                       /*
+                        * If process is blocked, increment computed
+                        * time blocked.  This is used in swap scheduling.
+                        */
+                       if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
                                if (pp->p_slptime != 127)
                                        pp->p_slptime++;
+                       /*
+                        * Update digital filter estimation of process
+                        * cpu utilization for loaded processes.
+                        */
                        if (pp->p_flag&SLOAD)
                                pp->p_pctcpu = ccpu * pp->p_pctcpu +
                        if (pp->p_flag&SLOAD)
                                pp->p_pctcpu = ccpu * pp->p_pctcpu +
-                                   (1.0 - ccpu) * (pp->p_cpticks/(float)HZ);
+                                   (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
+                       /*
+                        * Recompute process priority.  The number p_cpu
+                        * is a weighted estimate of cpu time consumed.
+                        * A process which consumes cpu time has this
+                        * increase regularly.  We here decrease it by
+                        * a fraction based on load average giving a digital
+                        * decay filter which damps out in about 5 seconds
+                        * when seconds are measured in time expanded by the
+                        * load average.
+                        *
+                        * If a process is niced, then the nice directly
+                        * affects the new priority.  The final priority
+                        * is in the range 0 to 255, to fit in a character.
+                        */
                        pp->p_cpticks = 0;
                        pp->p_cpticks = 0;
-                       a = (pp->p_cpu & 0377)*SCHMAG + pp->p_nice - NZERO;
-                       if(a < 0)
+                       a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
+                            pp->p_nice - NZERO;
+                       if (a < 0)
                                a = 0;
                                a = 0;
-                       if(a > 255)
+                       if (a > 255)
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
                                a = 255;
                        pp->p_cpu = a;
                        (void) setpri(pp);
+                       /*
+                        * Now have computed new process priority
+                        * in p->p_usrpri.  Carefully change p->p_pri.
+                        * A process is on a run queue associated with
+                        * this priority, so we must block out process
+                        * state changes during the transition.
+                        */
                        s = spl6();
                        s = spl6();
-                       if(pp->p_pri >= PUSER) {
+                       if (pp->p_pri >= PUSER) {
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
                                if ((pp != u.u_procp || noproc) &&
                                    pp->p_stat == SRUN &&
                                    (pp->p_flag & SLOAD) &&
@@ -255,11 +375,22 @@ softclock(pc, ps)
                        }
                        splx(s);
                }
                        }
                        splx(s);
                }
+
+               /*
+                * Perform virtual memory metering.
+                */
                vmmeter();
                vmmeter();
-               if(runin!=0) {
+
+               /*
+                * If the swap process is trying to bring
+                * a process in, have it look again to see
+                * if it is possible now.
+                */
+               if (runin!=0) {
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
                        runin = 0;
                        wakeup((caddr_t)&runin);
                }
+
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
                /*
                 * If there are pages that have been cleaned, 
                 * jolt the pageout daemon to process them.
@@ -269,15 +400,26 @@ softclock(pc, ps)
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
                 */
                if (bclnlist != NULL)
                        wakeup((caddr_t)&proc[2]);
+
+               /*
+                * If the trap occurred from usermode,
+                * then check to see if it has now been
+                * running more than 10 minutes of user time
+                * and should thus run with reduced priority
+                * to give other processes a chance.
+                */
                if (USERMODE(ps)) {
                        pp = u.u_procp;
                if (USERMODE(ps)) {
                        pp = u.u_procp;
-                       if (pp->p_uid)
-                               if (pp->p_nice == NZERO && u.u_vm.vm_utime > 600 * HZ)
-                                       pp->p_nice = NZERO+4;
+                       if (pp->p_uid && pp->p_nice == NZERO &&
+                           u.u_vm.vm_utime > 600 * hz)
+                               pp->p_nice = NZERO+4;
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
                }
        }
                        (void) setpri(pp);
                        pp->p_pri = pp->p_usrpri;
                }
        }
+       /*
+        * If trapped user-mode, give it a profiling tick.
+        */
        if (USERMODE(ps) && u.u_prof.pr_scale) {
                u.u_procp->p_flag |= SOWEUPC;
                aston();
        if (USERMODE(ps) && u.u_prof.pr_scale) {
                u.u_procp->p_flag |= SOWEUPC;
                aston();
@@ -285,11 +427,11 @@ softclock(pc, ps)
 }
 
 /*
 }
 
 /*
- * timeout is called to arrange that
- * fun(arg) is called in tim/HZ seconds.
- * An entry is sorted into the callout
- * structure. The time in each structure
- * entry is the number of HZ's more
+ * Timeout is called to arrange that
+ * fun(arg) is called in tim/hz seconds.
+ * An entry is linked into the callout
+ * structure.  The time in each structure
+ * entry is the number of hz's more
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
  * than the previous entry.
  * In this way, decrementing the
  * first entry has the effect of
@@ -302,33 +444,30 @@ timeout(fun, arg, tim)
        int (*fun)();
        caddr_t arg;
 {
        int (*fun)();
        caddr_t arg;
 {
-       register struct callo *p1, *p2, *p3;
+       register struct callout *p1, *p2, *pnew;
        register int t;
        int s;
 
        register int t;
        int s;
 
+/* DEBUGGING CODE */
+       int ttrstrt();
+
+       if (fun == ttrstrt && arg == 0)
+               panic("timeout ttrstr arg");
+/* END DEBUGGING CODE */
        t = tim;
        t = tim;
-       p1 = &callout[0];
        s = spl7();
        s = spl7();
-       while(p1->c_func != 0 && p1->c_time <= t) {
-               t -= p1->c_time;
-               p1++;
-       }
-       p1->c_time -= t;
-       p2 = p1;
-       p3 = &callout[NCALL-2];
-       while(p2->c_func != 0) {
-               if (p2 >= p3)
-                       panic("timeout");
-               p2++;
-       }
-       while(p2 >= p1) {
-               (p2+1)->c_time = p2->c_time;
-               (p2+1)->c_func = p2->c_func;
-               (p2+1)->c_arg = p2->c_arg;
-               p2--;
-       }
-       p1->c_time = t;
-       p1->c_func = fun;
-       p1->c_arg = arg;
+       pnew = callfree;
+       if (pnew == NULL)
+               panic("timeout table overflow");
+       callfree = pnew->c_next;
+       pnew->c_arg = arg;
+       pnew->c_func = fun;
+       for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
+               t -= p2->c_time;
+       p1->c_next = pnew;
+       pnew->c_next = p2;
+       pnew->c_time = t;
+       if (p2)
+               p2->c_time -= t;
        splx(s);
 }
        splx(s);
 }