fix to allow booting on drives other than 0 (from ps@lbl-csam)
[unix-history] / usr / src / sys / vm / vm_meter.c
CommitLineData
299bcbab 1/* vm_meter.c 4.22 83/06/14 */
e3bf9f41
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/seg.h"
6#include "../h/dir.h"
7#include "../h/user.h"
8#include "../h/proc.h"
9#include "../h/text.h"
10#include "../h/vm.h"
11#include "../h/cmap.h"
c4206698 12#include "../h/kernel.h"
e3bf9f41 13
e3bf9f41 14int maxslp = MAXSLP;
e3bf9f41 15int saferss = SAFERSS;
27b135c8
BJ
16
17/*
18 * The following parameters control operation of the page replacement
19 * algorithm. They are initialized to 0, and then computed at boot time
20 * based on the size of the system. If they are patched non-zero in
21 * a loaded vmunix they are left alone and may thus be changed per system
22 * using adb on the loaded system.
23 */
24int maxpgio = 0;
25int minfree = 0;
26int desfree = 0;
27int lotsfree = 0;
28int slowscan = 0;
29int fastscan = 0;
e3bf9f41 30int klin = KLIN;
27b135c8 31int klseql = KLSEQL;
4643e8eb 32int klsdist = KLSDIST;
27b135c8 33int kltxt = KLTXT;
e3bf9f41
BJ
34int klout = KLOUT;
35int multprog = -1; /* so we don't count process 2 */
36
37double avenrun[3]; /* load average, of runnable procs */
38
27b135c8
BJ
39/*
40 * Setup the paging constants for the clock algorithm.
41 * Called after the system is initialized and the amount of memory
42 * and number of paging devices is known.
27772922
SL
43 *
44 * Threshold constants are defined in ../machine/vmparam.h.
27b135c8
BJ
45 */
46setupclock()
47{
27b135c8
BJ
48
49 /*
50 * Setup thresholds for paging:
51 * lotsfree is threshold where paging daemon turns on
52 * desfree is amount of memory desired free. if less
53 * than this for extended period, do swapping
54 * minfree is minimal amount of free memory which is
55 * tolerable.
f3d75589 56 */
f3d75589 57 if (lotsfree == 0)
27772922 58 lotsfree = LOOPPAGES / LOTSFREEFRACT;
f3d75589 59 if (desfree == 0) {
27772922
SL
60 desfree = DESFREE / NBPG;
61 if (desfree > LOOPPAGES / DESFREEFRACT)
62 desfree = LOOPPAGES / DESFREEFRACT;
f3d75589
SL
63 }
64 if (minfree == 0) {
27772922
SL
65 minfree = MINFREE / NBPG;
66 if (minfree > desfree / MINFREEFRACT)
67 minfree = desfree / MINFREEFRACT;
f3d75589 68 }
27b135c8
BJ
69 /*
70 * Maxpgio thresholds how much paging is acceptable.
71 * This figures that 2/3 busy on an arm is all that is
72 * tolerable for paging. We assume one operation per disk rev.
73 */
74 if (maxpgio == 0)
75 maxpgio = (DISKRPM * 2) / 3;
76
77 /*
55908f17
BJ
78 * Clock to scan using max of ~~10% of processor time for sampling,
79 * this estimated to allow maximum of 200 samples per second.
abc92597 80 * This yields a ``fastscan'' of roughly (with CLSIZE=2):
55908f17
BJ
81 * <=1m 2m 3m 4m 8m
82 * 5s 10s 15s 20s 40s
27b135c8 83 */
f3d75589
SL
84 if (nswdev == 1 && physmem*NBPG > LOTSOFMEM*1024*(1024-16))
85 printf("WARNING: should run interleaved swap with >= %dMb\n",
86 LOTSOFMEM);
abc92597 87 if (fastscan == 0)
55908f17 88 fastscan = (LOOPPAGES/CLSIZE) / 200;
abc92597
BJ
89 if (fastscan < 5)
90 fastscan = 5;
898c2303 91 if (nswdev >= 2)
abc92597 92 maxpgio = (maxpgio * 3) / 2;
27b135c8
BJ
93
94 /*
55908f17 95 * Set slow scan time to 1/2 the fast scan time.
27b135c8
BJ
96 */
97 if (slowscan == 0)
55908f17 98 slowscan = 2 * fastscan;
27b135c8
BJ
99}
100
e3bf9f41
BJ
101/*
102 * The main loop of the scheduling (swapping) process.
103 *
104 * The basic idea is:
105 * see if anyone wants to be swapped in;
106 * swap out processes until there is room;
107 * swap him in;
108 * repeat.
109 * If the paging rate is too high, or the average free memory
110 * is very low, then we do not consider swapping anyone in,
111 * but rather look for someone to swap out.
112 *
113 * The runout flag is set whenever someone is swapped out.
114 * Sched sleeps on it awaiting work.
115 *
116 * Sched sleeps on runin whenever it cannot find enough
117 * core (by swapping out or otherwise) to fit the
118 * selected swapped process. It is awakened when the
119 * core situation changes and in any case once per second.
120 *
121 * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
122 */
123
124#define swappable(p) \
125 (((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
126
127/* insure non-zero */
128#define nz(x) (x != 0 ? x : 1)
129
130#define NBIG 4
131#define MAXNBIG 10
132int nbig = NBIG;
133
134struct bigp {
135 struct proc *bp_proc;
136 int bp_pri;
137 struct bigp *bp_link;
138} bigp[MAXNBIG], bplist;
139
140sched()
141{
142 register struct proc *rp, *p, *inp;
143 int outpri, inpri, rppri;
97afa637 144 int sleeper, desperate, deservin, needs, divisor;
e3bf9f41
BJ
145 register struct bigp *bp, *nbp;
146 int biggot, gives;
147
e3bf9f41 148loop:
7eb2e67e 149 wantin = 0;
e3bf9f41
BJ
150 deservin = 0;
151 sleeper = 0;
152 p = 0;
27b135c8 153 /*
37324218 154 * See if paging system is overloaded; if so swap someone out.
27b135c8
BJ
155 * Conditions for hard outswap are:
156 * if need kernel map (mix it up).
157 * or
158 * 1. if there are at least 2 runnable processes (on the average)
159 * and 2. the paging rate is excessive or memory is now VERY low.
160 * and 3. the short (5-second) and longer (30-second) average
161 * memory is less than desirable.
162 */
f3d75589
SL
163 if (kmapwnt ||
164 (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
e3bf9f41 165 (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
97afa637 166 desperate = 1;
e3bf9f41
BJ
167 goto hardswap;
168 }
97afa637 169 desperate = 0;
e3bf9f41 170 /*
97afa637 171 * Not desperate for core,
e3bf9f41
BJ
172 * look for someone who deserves to be brought in.
173 */
174 outpri = -20000;
86fd527f 175 for (rp = proc; rp < procNPROC; rp++) switch(rp->p_stat) {
e3bf9f41
BJ
176
177 case SRUN:
178 if ((rp->p_flag&SLOAD) == 0) {
27b135c8
BJ
179 rppri = rp->p_time -
180 rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
293c7069 181 rp->p_slptime - (rp->p_nice-NZERO)*8;
e3bf9f41
BJ
182 if (rppri > outpri) {
183 if (rp->p_poip)
184 continue;
185 if (rp->p_textp && rp->p_textp->x_poip)
186 continue;
187 p = rp;
188 outpri = rppri;
189 }
190 }
191 continue;
192
193 case SSLEEP:
194 case SSTOP:
195 if ((freemem < desfree || rp->p_rssize == 0) &&
196 rp->p_slptime > maxslp &&
197 (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
198 swappable(rp)) {
199 /*
200 * Kick out deadwood.
e3bf9f41 201 */
7eb2e67e 202 (void) spl6();
e3bf9f41
BJ
203 rp->p_flag &= ~SLOAD;
204 if (rp->p_stat == SRUN)
205 remrq(rp);
7eb2e67e 206 (void) spl0();
e650efcf 207 (void) swapout(rp, rp->p_dsize, rp->p_ssize);
e3bf9f41
BJ
208 goto loop;
209 }
210 continue;
211 }
212
213 /*
214 * No one wants in, so nothing to do.
215 */
216 if (outpri == -20000) {
7eb2e67e
BJ
217 (void) spl6();
218 if (wantin) {
219 wantin = 0;
220 sleep((caddr_t)&lbolt, PSWP);
221 } else {
222 runout++;
223 sleep((caddr_t)&runout, PSWP);
224 }
225 (void) spl0();
e3bf9f41
BJ
226 goto loop;
227 }
e3bf9f41
BJ
228 /*
229 * Decide how deserving this guy is. If he is deserving
230 * we will be willing to work harder to bring him in.
231 * Needs is an estimate of how much core he will need.
232 * If he has been out for a while, then we will
233 * bring him in with 1/2 the core he will need, otherwise
234 * we are conservative.
235 */
236 deservin = 0;
237 divisor = 1;
238 if (outpri > maxslp/2) {
239 deservin = 1;
240 divisor = 2;
241 }
242 needs = p->p_swrss;
243 if (p->p_textp && p->p_textp->x_ccount == 0)
244 needs += p->p_textp->x_swrss;
27b135c8 245 needs = imin(needs, lotsfree);
e3bf9f41
BJ
246 if (freemem - deficit > needs / divisor) {
247 deficit += needs;
248 if (swapin(p))
249 goto loop;
250 deficit -= imin(needs, deficit);
251 }
252
253hardswap:
254 /*
255 * Need resources (kernel map or memory), swap someone out.
256 * Select the nbig largest jobs, then the oldest of these
257 * is ``most likely to get booted.''
258 */
e3bf9f41
BJ
259 inp = p;
260 sleeper = 0;
261 if (nbig > MAXNBIG)
262 nbig = MAXNBIG;
263 if (nbig < 1)
264 nbig = 1;
265 biggot = 0;
266 bplist.bp_link = 0;
86fd527f 267 for (rp = proc; rp < procNPROC; rp++) {
e3bf9f41
BJ
268 if (!swappable(rp))
269 continue;
270 if (rp->p_stat==SZOMB)
271 continue;
272 if (rp == inp)
273 continue;
274 if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
275 continue;
276 if (rp->p_slptime > maxslp &&
89b899a0 277 (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
e3bf9f41
BJ
278 if (sleeper < rp->p_slptime) {
279 p = rp;
280 sleeper = rp->p_slptime;
281 }
282 } else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
283 rppri = rp->p_rssize;
284 if (rp->p_textp)
293c7069 285 rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
e3bf9f41
BJ
286 if (biggot < nbig)
287 nbp = &bigp[biggot++];
288 else {
289 nbp = bplist.bp_link;
290 if (nbp->bp_pri > rppri)
291 continue;
292 bplist.bp_link = nbp->bp_link;
293 }
294 for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
295 if (rppri < bp->bp_link->bp_pri)
296 break;
297 nbp->bp_link = bp->bp_link;
298 bp->bp_link = nbp;
299 nbp->bp_pri = rppri;
300 nbp->bp_proc = rp;
301 }
302 }
303 if (!sleeper) {
304 p = NULL;
305 inpri = -1000;
306 for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
307 rp = bp->bp_proc;
308 rppri = rp->p_time+rp->p_nice-NZERO;
309 if (rppri >= inpri) {
310 p = rp;
311 inpri = rppri;
312 }
313 }
314 }
315 /*
97afa637 316 * If we found a long-time sleeper, or we are desperate and
e3bf9f41
BJ
317 * found anyone to swap out, or if someone deserves to come
318 * in and we didn't find a sleeper, but found someone who
319 * has been in core for a reasonable length of time, then
320 * we kick the poor luser out.
321 */
97afa637 322 if (sleeper || desperate && p || deservin && inpri > maxslp) {
7eb2e67e 323 (void) spl6();
e3bf9f41
BJ
324 p->p_flag &= ~SLOAD;
325 if (p->p_stat == SRUN)
326 remrq(p);
7eb2e67e 327 (void) spl0();
97afa637 328 if (desperate) {
e3bf9f41
BJ
329 /*
330 * Want to give this space to the rest of
331 * the processes in core so give them a chance
332 * by increasing the deficit.
333 */
334 gives = p->p_rssize;
335 if (p->p_textp)
336 gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
9eec1cb0 337 gives = imin(gives, lotsfree);
e3bf9f41
BJ
338 deficit += gives;
339 } else
340 gives = 0; /* someone else taketh away */
341 if (swapout(p, p->p_dsize, p->p_ssize) == 0)
342 deficit -= imin(gives, deficit);
343 goto loop;
344 }
345 /*
346 * Want to swap someone in, but can't
347 * so wait on runin.
348 */
e650efcf 349 (void) spl6();
e3bf9f41
BJ
350 runin++;
351 sleep((caddr_t)&runin, PSWP);
7eb2e67e 352 (void) spl0();
e3bf9f41
BJ
353 goto loop;
354}
355
356vmmeter()
357{
358 register unsigned *cp, *rp, *sp;
359
27b135c8
BJ
360 deficit -= imin(deficit,
361 imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
e3bf9f41 362 ave(avefree, freemem, 5);
27b135c8 363 ave(avefree30, freemem, 30);
e3bf9f41
BJ
364 /* v_pgin is maintained by clock.c */
365 cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
366 while (cp <= &cnt.v_last) {
367 ave(*rp, *cp, 5);
368 *sp += *cp;
369 *cp = 0;
370 rp++, cp++, sp++;
371 }
c4206698 372 if (time.tv_sec % 5 == 0) {
e3bf9f41
BJ
373 vmtotal();
374 rate.v_swpin = cnt.v_swpin;
375 sum.v_swpin += cnt.v_swpin;
376 cnt.v_swpin = 0;
377 rate.v_swpout = cnt.v_swpout;
378 sum.v_swpout += cnt.v_swpout;
379 cnt.v_swpout = 0;
380 }
381 if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
382 runout = 0;
383 runin = 0;
384 wakeup((caddr_t)&runin);
385 wakeup((caddr_t)&runout);
386 }
387}
388
c4206698
BJ
389#define RATETOSCHEDPAGING 4 /* hz that is */
390
391/*
392 * Schedule rate for paging.
393 * Rate is linear interpolation between
394 * slowscan with lotsfree and fastscan when out of memory.
395 */
396schedpaging()
e3bf9f41 397{
c4206698 398 register int vavail, scanrate;
e3bf9f41 399
e3bf9f41
BJ
400 nscan = desscan = 0;
401 vavail = freemem - deficit;
402 if (vavail < 0)
403 vavail = 0;
e4d539fa
SL
404 if (freemem < lotsfree) {
405 scanrate =
406 (slowscan * vavail + fastscan * (lotsfree - vavail)) /
407 nz(lotsfree);
408 desscan = ((LOOPPAGES / CLSIZE) / nz(scanrate)) /
409 RATETOSCHEDPAGING;
410 wakeup((caddr_t)&proc[2]);
411 }
b32450f4 412 timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
e3bf9f41
BJ
413}
414
415vmtotal()
416{
417 register struct proc *p;
418 register struct text *xp;
419 int nrun = 0;
420
421 total.t_vmtxt = 0;
422 total.t_avmtxt = 0;
423 total.t_rmtxt = 0;
424 total.t_armtxt = 0;
fcb2d36b 425 for (xp = text; xp < textNTEXT; xp++)
e3bf9f41
BJ
426 if (xp->x_iptr) {
427 total.t_vmtxt += xp->x_size;
428 total.t_rmtxt += xp->x_rssize;
429 for (p = xp->x_caddr; p; p = p->p_xlink)
430 switch (p->p_stat) {
431
432 case SSTOP:
433 case SSLEEP:
434 if (p->p_slptime >= maxslp)
435 continue;
436 /* fall into... */
437
438 case SRUN:
439 case SIDL:
440 total.t_avmtxt += xp->x_size;
441 total.t_armtxt += xp->x_rssize;
442 goto next;
443 }
444next:
445 ;
446 }
447 total.t_vm = 0;
448 total.t_avm = 0;
449 total.t_rm = 0;
450 total.t_arm = 0;
451 total.t_rq = 0;
452 total.t_dw = 0;
453 total.t_pw = 0;
454 total.t_sl = 0;
455 total.t_sw = 0;
86fd527f 456 for (p = proc; p < procNPROC; p++) {
e3bf9f41
BJ
457 if (p->p_flag & SSYS)
458 continue;
459 if (p->p_stat) {
460 total.t_vm += p->p_dsize + p->p_ssize;
461 total.t_rm += p->p_rssize;
462 switch (p->p_stat) {
463
464 case SSLEEP:
465 case SSTOP:
89b899a0 466 if (p->p_pri <= PZERO)
e3bf9f41
BJ
467 nrun++;
468 if (p->p_flag & SPAGE)
469 total.t_pw++;
470 else if (p->p_flag & SLOAD) {
89b899a0 471 if (p->p_pri <= PZERO)
e3bf9f41
BJ
472 total.t_dw++;
473 else if (p->p_slptime < maxslp)
474 total.t_sl++;
475 } else if (p->p_slptime < maxslp)
476 total.t_sw++;
477 if (p->p_slptime < maxslp)
478 goto active;
479 break;
480
481 case SRUN:
482 case SIDL:
483 nrun++;
484 if (p->p_flag & SLOAD)
485 total.t_rq++;
486 else
487 total.t_sw++;
488active:
489 total.t_avm += p->p_dsize + p->p_ssize;
490 total.t_arm += p->p_rssize;
491 break;
492 }
493 }
494 }
495 total.t_vm += total.t_vmtxt;
496 total.t_avm += total.t_avmtxt;
497 total.t_rm += total.t_rmtxt;
498 total.t_arm += total.t_armtxt;
499 total.t_free = avefree;
500 loadav(avenrun, nrun);
501}
502
503/*
504 * Constants for averages over 1, 5, and 15 minutes
505 * when sampling at 5 second intervals.
506 */
507double cexp[3] = {
508 0.9200444146293232, /* exp(-1/12) */
509 0.9834714538216174, /* exp(-1/60) */
510 0.9944598480048967, /* exp(-1/180) */
511};
512
513/*
514 * Compute a tenex style load average of a quantity on
515 * 1, 5 and 15 minute intervals.
516 */
517loadav(avg, n)
518 register double *avg;
519 int n;
520{
521 register int i;
522
523 for (i = 0; i < 3; i++)
524 avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
525}