From 77c6e1506b80d29b91c91d554fbe91e9a9b33f0d Mon Sep 17 00:00:00 2001 From: Ozalp Babaoglu Date: Wed, 13 Feb 1980 18:49:24 -0800 Subject: [PATCH] BSD 3 development Work on file usr/src/sys/sys/vmpage.c Work on file usr/src/sys/sys/vmpt.c Work on file usr/src/sys/sys/vmsched.c Work on file usr/src/sys/sys/vmswap.c Co-Authored-By: Bill Joy Co-Authored-By: Juan Porcar Synthesized-from: 3bsd --- usr/src/sys/sys/vmpage.c | 577 ++++++++++++++++++++++++++++++++++++++ usr/src/sys/sys/vmpt.c | 355 +++++++++++++++++++++++ usr/src/sys/sys/vmsched.c | 360 ++++++++++++++++++++++++ usr/src/sys/sys/vmswap.c | 207 ++++++++++++++ 4 files changed, 1499 insertions(+) create mode 100644 usr/src/sys/sys/vmpage.c create mode 100644 usr/src/sys/sys/vmpt.c create mode 100644 usr/src/sys/sys/vmsched.c create mode 100644 usr/src/sys/sys/vmswap.c diff --git a/usr/src/sys/sys/vmpage.c b/usr/src/sys/sys/vmpage.c new file mode 100644 index 0000000000..2aa5b55cc0 --- /dev/null +++ b/usr/src/sys/sys/vmpage.c @@ -0,0 +1,577 @@ +/* vmpage.c 2.2 2/10/80 */ + +#include "../h/param.h" +#include "../h/systm.h" +#include "../h/inode.h" +#include "../h/dir.h" +#include "../h/user.h" +#include "../h/proc.h" +#include "../h/reg.h" +#include "../h/pte.h" +#include "../h/buf.h" +#include "../h/text.h" +#include "../h/mtpr.h" +#include "../h/cmap.h" +#include "../h/vm.h" +#include "../h/vmmon.h" +#include "../h/file.h" + +/* + * Handle a page fault. + */ +pagein(virtaddr) + unsigned virtaddr; +{ + register struct proc *p; + register struct pte *pte; + register struct inode *ip; + register unsigned v; + unsigned pf; + int type, fileno, prot; + struct pte opte; + struct buf *bp; + daddr_t daddr; + dev_t dev; + int i; +#ifdef ERNIE + int otime, olbolt, oicr, a, s; + + s = spl6(); + otime = time, olbolt = lbolt, oicr = mfpr(ICR); +#endif + cnt.v_faults++; + v = clbase(btop(virtaddr)); + p = u.u_procp; + if (isatsv(p, v)) + type = MTEXT; + else if (isassv(p, v)) + type = MSTACK; + else + type = MDATA; + pte = vtopte(p, v); + if (pte->pg_v) + panic("pagein"); +/* + if (pte->pg_v || (pte+1)->pg_v) + panic("pagein pg_v"); + if (pte->pg_fod) { + if ((pte+1)->pg_fod == 0) + panic("pagein pg_fod"); + if (((struct fpte *)pte)->pg_blkno != ((struct fpte *)(pte+1))->pg_blkno) + panic("pagein pg_blkno"); + } else { + if ((pte+1)->pg_fod) + panic("pagein v+fod"); + if (pte->pg_pfnum) { + if (pte->pg_pfnum+1 != (pte+1)->pg_pfnum) + panic("pagein <> pfnum"); + } else if ((pte+1)->pg_pfnum) + panic("pagein +1pfnum <> 0"); + } +*/ + + /* + * If page is reclaimable, reclaim it. + * If page is text and intransit, sleep while it is intransit, + * If it is valid after the sleep, we are done. + * Otherwise we have to start checking again, since page could + * even be reclaimable now (we may have swapped for a long time). + */ +restart: + if (pte->pg_fod == 0 && pte->pg_pfnum) { + if (type == MTEXT && cmap[pgtocm(pte->pg_pfnum)].c_intrans) { + sleep((caddr_t)p->p_textp, PSWP+1); + pte = vtopte(p, v); + if (pte->pg_v) { +valid: + if (p->p_flag & SDLYU) + mlock(pte->pg_pfnum); + tbiscl(v); + cnt.v_intrans++; + return; + } + goto restart; + } + if (cmap[pgtocm(pte->pg_pfnum)].c_flag & MFREE) { + munlink(pte->pg_pfnum); + cnt.v_pgfrec++; + if (type == MTEXT) + p->p_textp->x_rssize += CLSIZE; + else + p->p_rssize += CLSIZE; + } + pte->pg_v = 1; + if (anycl(pte, pg_m)) + pte->pg_m = 1; + distcl(pte); + if (type == MTEXT) + distpte(p->p_textp, vtotp(p, v), pte); + u.u_minorflt++; + cnt.v_pgrec++; + if (p->p_flag & SDLYU) + mlock(pte->pg_pfnum); + tbiscl(v); +#ifdef ERNIE + a = vmtime(otime, olbolt, oicr); + rectime += a; + if (a >= 0) + vmfltmon(rmon, a, rmonmin, rres, NRMON); + splx(s); +#endif + return; + } +#ifdef ERNIE + splx(s); +#endif + + /* + * Now prepare to bring the page in. + * We allocate the page before locking so we will + * be swappable if there is no free memory. + */ + if (freemem < CLSIZE) { + while (freemem < CLSIZE) + sleep((caddr_t)&freemem, PSWP+2); + pte = vtopte(p, v); + if (pte->pg_v) + goto valid; + goto restart; + } + + /* + * Now committed to bringing in the page. + * Lock this process, get a page, + * construct the new pte, and increment + * the (process or text) resident set size. + */ + p->p_flag |= SPAGE; + opte = *pte; + VOID memall(pte, CLSIZE, p, type); + pte->pg_prot = opte.pg_prot; + pf = pte->pg_pfnum; + cmap[pgtocm(pf)].c_intrans = 1; + distcl(pte); + if (type == MTEXT) { + p->p_textp->x_rssize += CLSIZE; + distpte(p->p_textp, vtotp(p, v), pte); + } else + p->p_rssize += CLSIZE; + + if (opte.pg_fod) { + pte->pg_swapm = 1; + fileno = ((struct fpte *)&opte)->pg_fileno; + if (fileno > PG_FMAX) + panic("pagein pg_fileno"); + if (fileno == PG_FZERO) { + for (i = 0; i < CLSIZE; i++) + clearseg(pf+i); + if (type != MTEXT) + cnt.v_zfod += CLSIZE; + pte->pg_v = 1; + distcl(pte); + goto out; + } + if (fileno == PG_FTEXT) { + if (p->p_textp == 0) + panic("pagein PG_FTEXT"); + dev = p->p_textp->x_iptr->i_dev; + cnt.v_exfod += CLSIZE; + } else { + if (u.u_ofile[fileno] == NULL) + panic("pagein u.u_ofile"); + ip = u.u_ofile[fileno]->f_inode; + if ((u.u_vrpages[fileno] -= CLSIZE) <= 0) { + if (u.u_vrpages[fileno] < 0) + panic("pagein u.u_vrpages"); + if (--ip->i_vfdcnt < 0) + panic("pagein i_vfdcnt"); + } + dev = ip->i_dev; + cnt.v_vrfod += CLSIZE; + } + daddr = fsbtodb(((struct fpte *)&opte)->pg_blkno); + if (bp = baddr(dev, dbtofsb(daddr))) { + pte->pg_v = 1; + prot = *(int *)pte & PG_PROT; + pte->pg_prot = 0; + *(int *)pte |= PG_UW; + distcl(pte); + tbiscl(v); + /* THIS ASSUMES THAT CLSIZE*NBPG==BSIZE */ + bcopy(bp->b_un.b_addr, ptob(v), BSIZE); + brelse(bp); + pte->pg_prot = 0; + *(int *)pte |= prot; + distcl(pte); + if (type == MTEXT) + distpte(p->p_textp, vtotp(p, v), pte); + goto out; + } + } else { + if (opte.pg_pfnum) + panic("pagein pfnum"); + daddr = vtod(p, v, &u.u_dmap, &u.u_smap); + dev = swapdev; + pte->pg_vreadm = opte.pg_vreadm; + } + + distcl(pte); + swap(p, daddr, ptob(v), ctob(CLSIZE), B_READ, B_PGIN, dev); + + /* + * Fix page table entries. + */ + pte->pg_v = 1; + distcl(pte); + if (type == MTEXT) { + distpte(p->p_textp, vtotp(p, v), pte); + if (opte.pg_fod) + p->p_textp->x_flag |= XWRIT; + wakeup((caddr_t)p->p_textp); + } + + /* + * Instrumentation. + */ + p->p_faults++; + u.u_majorflt++; + cnt.v_pgin++; +#ifdef ERNIE + a = vmtime(otime, olbolt, oicr) / 100; + pgintime += a; + if (a >= 0) + vmfltmon(pmon, a, pmonmin, pres, NPMON); +#endif +out: + /* + * Memall returned page locked. Unless + * this page is to be used in a raw transfer, + * we should unlock the page. + */ + cmap[pgtocm(pf)].c_intrans = 0; + if ((p->p_flag & SDLYU) == 0) + munlock(pte->pg_pfnum); + + /* + * All done. + */ + p->p_flag &= ~SPAGE; + tbiscl(v); /* conservative */ +} + +/* + * The page out daemon, which runs as process 2. + * + * As long as there are at least lotsfree pages, + * this process is not run. When the number of free + * pages stays in the range desfree to lotsfree, + * this daemon runs through the pages in the loop + * at a rate determined in vmsched(), simulating the missing + * hardware reference bit, and cleaning pages and transferring + * them to the free list. + */ +pageout() +{ + register struct proc *rp; + register struct text *xp; + register struct cmap *c; + register struct pte *pte; + int count, pushes; + swblk_t daddr; + unsigned v; + int maxhand = pgtocm(maxfree); + +loop: + /* + * Before sleeping, look to see if there are any swap I/O headers + * in the ``cleaned'' list that correspond to dirty + * pages that have been pushed asynchronously. If so, + * empty the list by calling cleanup(). + * + * N.B.: We guarantee never to block while the cleaned list is nonempty. + */ + VOID spl6(); + if (bclnlist != NULL) + cleanup(); + sleep((caddr_t)&proc[2], PSWP+1); + VOID spl0(); + count = 0; + pushes = 0; + while (nscan < desscan && freemem < lotsfree) { +top: + /* + * An iteration of the clock pointer (hand) around the loop. + * Look at the page at hand. If it is a + * locked (for physical i/o e.g.), system (u., page table) + * or free, then leave it alone. + * Otherwise, find a process and text pointer for the + * page, and a virtual page number in either the + * process or the text image. + */ + c = &cmap[hand]; + if (c->c_flag & (MLOCK|MSYS|MFREE)) + goto skip; + if (c->c_flag & MTEXT) { + xp = &text[c->c_ndx]; + rp = xp->x_caddr; + v = tptov(rp, c->c_page); + pte = tptopte(rp, c->c_page); + } else { + rp = &proc[c->c_ndx]; + while (rp->p_flag & SNOVM) + rp = rp->p_xlink; + xp = rp->p_textp; + if (c->c_flag & MDATA) { + v = dptov(rp, c->c_page); + pte = dptopte(rp, c->c_page); + } else { + v = sptov(rp, c->c_page); + pte = sptopte(rp, c->c_page); + } + } + + if (pte->pg_pfnum != cmtopg(hand)) + panic("bad c_page"); + + /* + * If page is valid; now it is invalid, but reclaimable. + * If this pte is not valid, then it must be reclaimable + * and we can add it to the free list. + */ + if (pte->pg_v) { + pte->pg_v = 0; + if (anycl(pte, pg_m)) + pte->pg_m = 1; + distcl(pte); + if (c->c_flag & MTEXT) + distpte(xp, vtotp(rp, v), pte); + } else { + /* + * This check guarantees a minimal investment in + * swapped in processes, by protecting about small + * amount of data space from replacement. This + * prevents very large jobs from dragging everything + * into the ground when they are exhibiting atypical + * behaviour (e.g. LISP garbage collections.) + * + * Note that this is a rather flimsy replacement + * for working set size estimation. We expect + * most systems to have a reasonable amount of main + * memory, and thus this check will rarely have + * any effect. + * + * SHOULD DO SOMETHING SIMILAR FOR TEXT SEGMENTS. + */ + if ((c->c_flag & MTEXT) == 0) { + if (rp->p_rssize < saferss - rp->p_slptime) + goto skip; + } + + /* + * If the page is currently dirty, we + * have to arrange to have it cleaned before it + * can be freed. We mark it clean immediately. + * If it is reclaimed while being pushed, then modified + * again, we are assured of the correct order of + * writes because we lock the page during the write. + * This guarantees that a swap() of this process (and + * thus this page), initiated in parallel, will, + * in fact, push the page after us. + * + * The most general worst case here would be for + * a reclaim, a modify and a swapout to occur + * all before the single page transfer completes. + */ + if (dirtycl(pte)) { + if (pushes > MAXPGIO / 2) + goto skip; + pushes++; + /* + * If the process is being swapped out + * or about to exit, do not bother with its + * dirty pages + */ + if (rp->p_flag & (SLOCK|SWEXIT)) + goto skip; + + /* + * Now carefully make sure that there will + * be a header available for the push so that + * we will not block waiting for a header in + * swap(). The reason this is important is + * that we (proc[2]) are the one who cleans + * dirty swap headers and we could otherwise + * deadlock waiting for ourselves to clean + * swap headers. The sleep here on &proc[2] + * is actually (effectively) a sleep on both + * ourselves and &bswlist, and this is known + * to iodone and swap in bio.c. That is, + * &proc[2] will be awakened both when dirty + * headers show up and also to get the pageout + * daemon moving. + */ + VOID spl6(); + if (bclnlist != NULL) + cleanup(); + if (bswlist.av_forw == NULL) { + bswlist.b_flags |= B_WANTED; + sleep((caddr_t)&proc[2], PSWP+2); + VOID spl0(); + /* + * Page disposition may have changed + * since process may have exec'ed, + * forked, exited or just about + * anything else... try this page + * frame again, from the top. + */ + goto top; + } + VOID spl0(); + + uaccess(rp, Pushmap, &pushutl); + if (swpexpand(rp->p_dsize, rp->p_ssize, + &pushutl.u_dmap, &pushutl.u_smap) == 0) { + swkill(rp); + goto skip; + } + daddr = vtod(rp, v, &pushutl.u_dmap, &pushutl.u_smap); + /* + * Now committed to pushing the page... + */ + mlock((unsigned)cmtopg(hand)); + if (anycl(pte, pg_m)) { + pte->pg_vreadm = 1; + pte->pg_m = 0; + } + pte->pg_swapm = 0; + distcl(pte); + if (c->c_flag & MTEXT) { + xp->x_poip++; + distpte(xp, vtotp(rp, v), pte); + } else + rp->p_poip++; + swap(rp, daddr, ptob(v), ctob(CLSIZE), B_WRITE, B_DIRTY, swapdev); + /* + * The cleaning of this page will be + * completed later, in cleanup() called + * (synchronously) by us (proc[2]). In + * the meantime, the page frame is locked + * so no havoc can result. + */ + goto skip; + + } + /* + * Decrement the resident set size of the current + * text object/process, and put the page in the + * free list. Note that we don't give memfree the + * pte as its argument, since we don't want to destroy + * the pte. If it hasn't already been discarded + * it may yet have a chance to be reclaimed from + * the free list. + */ + if ((c->c_flag & MGONE) == 0) + if (c->c_flag & MTEXT) + xp->x_rssize -= CLSIZE; + else + rp->p_rssize -= CLSIZE; + memfree(pte, CLSIZE, 0); + cnt.v_dfree += CLSIZE; + + /* + * We managed to add a page to the free list, + * so we give ourselves another couple of trips + * around the loop. + */ + count = 0; + } +skip: + cnt.v_scan++; + nscan++; + if (++hand >= maxhand) { + hand = 0; + cnt.v_rev++; + if (count > 2) { + /* + * Extremely unlikely, but we went around + * the loop twice and didn't get anywhere. + * Don't cycle, stop till the next clock tick. + */ + goto loop; + } + count++; + } + } + goto loop; +} + +/* + * Process the ``cleaned'' list. + * + * Scan through the linked list of swap I/O headers + * and free the corresponding pages that have been + * cleaned by being written back to the paging area. + * If the page has been reclaimed during this time, + * we do not free the page. As they are processed, + * the swap I/O headers are removed from the cleaned + * list and inserted into the free list. + */ +cleanup() +{ + register struct buf *bp; + register struct proc *rp; + register struct text *xp; + register struct cmap *c; + register struct pte *pte; + unsigned pf; + int s; + + s = spl6(); + while (bp = bclnlist) { + bclnlist = bp->av_forw; + pte = dptopte(&proc[2], btop(bp->b_un.b_addr)); + pf = pte->pg_pfnum; + munlock(pf); + c = &cmap[pgtocm(pf)]; + if (c->c_flag & MTEXT) { + xp = &text[c->c_ndx]; + xp->x_poip--; + if (xp->x_poip == 0) + wakeup((caddr_t)&xp->x_poip); + } else { + rp = &proc[c->c_ndx]; + while (rp->p_flag & SNOVM) + rp = rp->p_xlink; + rp->p_poip--; + if (rp->p_poip == 0) + wakeup((caddr_t)&rp->p_poip); + } + if ((c->c_flag & MGONE) == 0) { + if (c->c_flag & MTEXT) + pte = tptopte(xp->x_caddr, c->c_page); + else { + if (c->c_flag & MDATA) + pte = dptopte(rp, c->c_page); + else + pte = sptopte(rp, c->c_page); + } + if (pte->pg_v) + goto skip; + if (c->c_flag & MTEXT) + xp->x_rssize -= CLSIZE; + else + rp->p_rssize -= CLSIZE; + } + memfree(pte, CLSIZE, 0); + cnt.v_dfree += CLSIZE; +skip: + bp->b_flags = 0; + bp->av_forw = bswlist.av_forw; + bswlist.av_forw = bp; + if (bswlist.b_flags & B_WANTED) { + bswlist.b_flags &= ~B_WANTED; + wakeup((caddr_t)&bswlist); + } + } + splx(s); +} diff --git a/usr/src/sys/sys/vmpt.c b/usr/src/sys/sys/vmpt.c new file mode 100644 index 0000000000..b8a08b4aac --- /dev/null +++ b/usr/src/sys/sys/vmpt.c @@ -0,0 +1,355 @@ +/* vmpt.c 2.5 2/14/80 */ + +#include "../h/param.h" +#include "../h/systm.h" +#include "../h/dir.h" +#include "../h/user.h" +#include "../h/proc.h" +#include "../h/map.h" +#include "../h/mtpr.h" +#include "../h/pte.h" +#include "../h/cmap.h" +#include "../h/vm.h" +#include "../h/buf.h" +#include "../h/text.h" + +/* + * Get page tables for process p. Allocator + * for memory is argument; process must be locked + * from swapping if vmemall is used; if memall is + * used, caller must be prepared for an error return. + */ +vgetpt(p, pmemall) + register struct proc *p; + int (*pmemall)(); +{ + register int a; + register int i; + + if (p->p_szpt == 0) + return (1); + a = malloc(kernelmap, p->p_szpt); + if (a == 0) + return (0); + if ((*pmemall)(&Usrptmap[a], p->p_szpt, p, MSYS) == 0) { + mfree(kernelmap, p->p_szpt, a); + return (0); + } + p->p_p0br = kmxtob(a); + vmaccess(&Usrptmap[a], (caddr_t)p->p_p0br, p->p_szpt); + for (i = 0; i < p->p_szpt; i++) + clearseg(Usrptmap[a + i].pg_pfnum); + return (1); +} + +/* + * Initialize text portion of page table. + */ +vinitpt(p) + struct proc *p; +{ + register struct text *xp; + register struct proc *q; + register struct pte *pte; + register int i; + struct pte proto; + + xp = p->p_textp; + if (xp == 0) + return; + pte = tptopte(p, 0); + if (q = xp->x_caddr) { + bcopy((caddr_t)tptopte(q, 0), (caddr_t)pte, + (unsigned) (sizeof(struct pte) * xp->x_size)); + return; + } + *(int *)&proto = PG_URKR; + if (xp->x_flag & XLOAD) { + proto.pg_fod = 1; + ((struct fpte *)&proto)->pg_fileno = PG_FZERO; + } + for (i = 0; i < xp->x_size; i++) + *pte++ = proto; + if ((xp->x_flag & XPAGI) == 0) + return; + if (xp->x_flag & XLOAD) + vinifod(tptopte(p, 0), PG_FTEXT, xp->x_iptr, 1, xp->x_size); + else + swap(p, xp->x_daddr + ctod(xp->x_size), (caddr_t)tptopte(p, 0), + xp->x_size * sizeof (struct pte), B_READ, B_PAGET, swapdev); +} + +/* + * Update the page tables of all processes linked + * to a particular text segment, by distributing + * dpte to the the text page at virtual frame v. + */ +distpte(xp, tp, dpte) + struct text *xp; + register size_t tp; + register struct pte *dpte; +{ + register struct proc *p; + register struct pte *pte; + register int i; + + for (p = xp->x_caddr; p; p = p->p_xlink) { + pte = tptopte(p, tp); + if (pte != dpte) + for (i = 0; i < CLSIZE; i++) + pte[i] = dpte[i]; + } +} + +/* + * Release page tables of process p. + */ +vrelpt(p) + register struct proc *p; +{ + register int a; + + if (p->p_szpt == 0) + return; + a = btokmx(p->p_p0br); + VOID vmemfree(&Usrptmap[a], p->p_szpt); + mfree(kernelmap, p->p_szpt, a); +} + +/* + * Pass the page tables of process p to process q. + * Used during vfork(). + */ +vpasspt(p, q, up, uq) + register struct proc *p, *q; + register struct user *up, *uq; +{ + + uq->u_pcb.pcb_p0br = q->p_p0br = p->p_p0br; + uq->u_pcb.pcb_p1br = up->u_pcb.pcb_p1br; + up->u_pcb.pcb_p1br = up->u_pcb.pcb_p0br - P1TOP; + uq->u_pcb.pcb_p0lr = up->u_pcb.pcb_p0lr; + up->u_pcb.pcb_p0lr = AST; + uq->u_pcb.pcb_p1lr = up->u_pcb.pcb_p1lr; + up->u_pcb.pcb_p1lr = P1TOP; + uq->u_pcb.pcb_szpt = q->p_szpt = p->p_szpt; + up->u_pcb.pcb_szpt = p->p_szpt = 0; + mtpr(P0BR, u.u_pcb.pcb_p0br); + mtpr(P1BR, u.u_pcb.pcb_p1br); + mtpr(P0LR, u.u_pcb.pcb_p0lr &~ AST); + mtpr(P1LR, u.u_pcb.pcb_p1lr); + mtpr(TBIA, 1); +} + +/* + * Compute number of pages to be allocated to the u. area + * and initialized data page tables. + */ +/*ARGSUSED*/ +vusize(p, utl) + register struct proc *p; + struct user *utl; +{ + register int tsz = p->p_tsize / NPTEPG; + + return (clrnd(UPAGES + clrnd(ctopt(p->p_tsize + p->p_dsize + p->p_ssize)) - tsz)); +} + +/* + * Get u area for process p. If a old u area is given, + * then copy the new area from the old, else + * swap in as specified in the proc structure. + * + * Since argument map/newu is potentially shared, + * we have to be careful not to block after beginning + * to use them. + */ +vgetu(p, palloc, map, newu, oldu) + register struct proc *p; + int (*palloc)(); + register struct pte *map; + register struct user *newu; + struct user *oldu; +{ + register int i; + struct pte temp[clrnd(UPAGES)]; + + if ((*palloc)(temp, clrnd(UPAGES), p, MSYS) == 0) + return (0); + for (i = 0; i < UPAGES; i++) { + map[i] = temp[i]; + p->p_addr[i] = map[i].pg_pfnum; + } + vmaccess(map, (caddr_t)newu, UPAGES); + if (oldu) { + bcopy((caddr_t)oldu, (caddr_t)newu, UPAGES * NBPG); + newu->u_procp = p; + } else { + swap(p, p->p_swaddr, (caddr_t)0, ctob(UPAGES), B_READ, B_UAREA, swapdev); + newu->u_pcb.pcb_szpt = p->p_szpt; + } + newu->u_pcb.pcb_p0br = p->p_p0br; + newu->u_pcb.pcb_p1br = p->p_p0br + p->p_szpt * NPTEPG - P1TOP; + return (1); +} + +vrelswu(p, utl) + struct proc *p; + struct user *utl; +{ + + mfree(swapmap, ctod(vusize(p, utl)), p->p_swaddr); + /* p->p_swaddr = 0; */ /* leave for post-mortems */ +} + +vgetswu(p, utl) + struct proc *p; + struct user *utl; +{ + + p->p_swaddr = malloc(swapmap, ctod(vusize(p, utl))); + return (p->p_swaddr); +} + +/* + * Release u. area. + * + * Note: we run on the mapping established by Umap for a while after + * the call to vmemfree... hence we (should!) run spl6() until we + * we swtch()... currently we are safe only since interrupt code + * doesn't allocate/free memory. + */ +vrelu(p, swapu) + register struct proc *p; +{ + register int i; + struct pte uu[UPAGES]; + register struct pte *up; + + if (swapu) + swap(p, p->p_swaddr, (caddr_t)0, ctob(UPAGES), B_WRITE, B_UAREA, swapdev); + for (i = 0; i < UPAGES; i++) { + up = &uu[i]; + *(int *)up = 0; + up->pg_pfnum = p->p_addr[i]; + up->pg_v = 1; + } + VOID vmemfree(uu, clrnd(UPAGES)); +} + +/* + * Expand a page table. + */ +ptexpand(change) + register int change; +{ + register struct pte *p1, *p2; + register int i; + register int spages, ss = P1TOP - mfpr(P1LR); + register int kold = btokmx((struct pte *)mfpr(P0BR)); + int knew, tdpages; + int szpt = u.u_pcb.pcb_szpt; + + if (change <= 0 || change % CLSIZE) + panic("ptexpand"); +top: + if ((knew=malloc(kernelmap, szpt+change)) == 0) + goto bad; + spages = ss/NPTEPG; + tdpages = szpt - spages; + if (memall(&Usrptmap[knew+tdpages], change, u.u_procp, MSYS) == 0) { + mfree(kernelmap, szpt+change, knew); + goto bad; + } + + kmcopy(knew, kold, tdpages); + kmcopy(knew+tdpages+change, kold+tdpages, spages); + + i = knew + tdpages; + p1 = &Usrptmap[i]; + p2 = p1 + change; + while (p1 < p2) { + *(int *)p1 |= PG_V | PG_KW; + mtpr(TBIS, kmxtob(i)); + clearseg(p1->pg_pfnum); + p1++; + i++; + } + + /* copy stack entries to new page */ + p1 = (struct pte *)mfpr(P1BR) + mfpr(P1LR); + p2 = kmxtob(knew+szpt+change) - ss; + for (i = ss - NPTEPG*spages; i != 0; i--) { + *p2++ = *p1; + *(int *)p1++ = 0; + } + + /* update u area and proc entries */ + u.u_procp->p_p0br = kmxtob(knew); + u.u_pcb.pcb_p0br = kmxtob(knew); + u.u_pcb.pcb_p1br = kmxtob(knew+szpt+change) - P1TOP; + u.u_pcb.pcb_szpt += change; + u.u_procp->p_szpt += change; + mtpr(P0BR, u.u_procp->p_p0br); + mtpr(P1BR, u.u_pcb.pcb_p1br); + if (szpt) + mfree(kernelmap, szpt, kold); + mtpr(TBIA, 1); + return; + /* + * Swap out the process so that the unavailable + * resource will be allocated upon swapin. + * + * When resume is executed for the process, + * here is where it will resume. + */ +bad: + if (save(u.u_ssav)) + return; + if (swapout(u.u_procp, (size_t)(mfpr(P0LR) - u.u_tsize), ss) == 0) { + /* + * No space to swap... it is inconvenient to try + * to exit, so just wait a bit and hope something + * turns up. Could deadlock here. + * + * SOMEDAY REFLECT ERROR BACK THROUGH expand TO CALLERS + * (grow, sbreak) SO CAN'T DEADLOCK HERE. + */ + sleep((caddr_t)&lbolt, PRIBIO); + goto top; + } + u.u_procp->p_flag |= SSWAP; + qswtch(); + /* no return */ +} + +kmcopy(to, from, count) + register int to; + int from; + register int count; +{ + register struct pte *tp = &Usrptmap[to]; + register struct pte *fp = &Usrptmap[from]; + + while (count != 0) { + *tp++ = *fp++; + mtpr(TBIS, kmxtob(to)); + to++; + count--; + } +} + +/* + * Change protection codes of text segment. + */ +chgprot(tprot) + long tprot; +{ + register int *ptaddr, i; + + ptaddr = (int *)mfpr(P0BR); + for (i = 0; i < u.u_tsize; i++) { + ptaddr[i] &= ~PG_PROT; + ptaddr[i] |= tprot; + } +} diff --git a/usr/src/sys/sys/vmsched.c b/usr/src/sys/sys/vmsched.c new file mode 100644 index 0000000000..b9034e1210 --- /dev/null +++ b/usr/src/sys/sys/vmsched.c @@ -0,0 +1,360 @@ +/* vmsched.c 2.2 2/10/80 */ + +#include "../h/param.h" +#include "../h/systm.h" +#include "../h/seg.h" +#include "../h/dir.h" +#include "../h/user.h" +#include "../h/proc.h" +#include "../h/text.h" +#include "../h/vm.h" +#include "../h/cmap.h" + +int maxpgio = MAXPGIO; +int maxslp = MAXSLP; +int minfree = MINFREE; +int desfree = DESFREE; +/* In main.c since LOTSFREE is variable */ +/* int lotsfree = LOTSFREE; */ +int saferss = SAFERSS; +int slowscan = SLOWSCAN; +int fastscan = FASTSCAN; +int multprog = -1; /* so we don't count process 2 */ + +double avenrun[3]; /* load average, of runnable procs */ + +/* + * The main loop of the scheduling (swapping) process. + * + * The basic idea is: + * see if anyone wants to be swapped in; + * swap out processes until there is room; + * swap him in; + * repeat. + * If the paging rate is too high, or the average free memory + * is very low, then we do not consider swapping anyone in, + * but rather look for someone to swap out. + * + * The runout flag is set whenever someone is swapped out. + * Sched sleeps on it awaiting work. + * + * Sched sleeps on runin whenever it cannot find enough + * core (by swapping out or otherwise) to fit the + * selected swapped process. It is awakened when the + * core situation changes and in any case once per second. + */ + +#define swappable(p) \ + (((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT))==SLOAD) + +/* insure non-zero */ +#define nz(x) (x != 0 ? x : 1) + +sched() +{ + register struct proc *rp, *p, *inp; + register int outpri, inpri, rppri; + int smax; + + /* + * Check if paging rate is too high, or average of + * free list very low and if so, adjust multiprogramming + * load by swapping someone out. + * + * Avoid glitches: don't swap out only process to do this, + * and don't swap based on paging rate if there is a reasonable + * amount of free memory. + */ +loop: + VOID spl6(); + if (kmapwnt || (multprog > 1 && avefree < desfree && + (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) { + outpri = 10000; + p = 0; + } else { + /* + * Number of pages available and paging rate seem + * reasonable, consider increasing multiprogramming + * by swapping in process which has been out longest. + * If you went out with a lot of pages, then you are + * lower priority to come in... but are not brought in + * until there is a reasonable fraction of the memory + * you are expected to need available. The system will + * also protect memory for you to some extent in this + * case by computing the expected ``deficit'' (pages + * ``owed'' to you) and not giving them away via further + * swapins of process which want many pages. + */ + outpri = -20000; + for (rp = &proc[0]; rp < &proc[NPROC]; rp++) { + rppri = rp->p_time - (rp->p_nice-NZERO)*8; + if (rp->p_time < MAXSLP) + rppri -= rp->p_swrss / nz(maxpgio / 2); + if (rp->p_stat==SRUN && (rp->p_flag&SLOAD)==0 && + rp->p_poip==0 && + (rp->p_textp==0||rp->p_textp->x_poip==0) && + rppri > outpri) { + p = rp; + outpri = rppri; + } else if ((rp->p_stat==SSLEEP||rp->p_stat==SSTOP) && + (freemem < desfree || rp->p_rssize == 0) && + rp->p_slptime > maxslp && + (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) && + swappable(rp)) { + /* + * We found a process which has been blocked + * in core for a long time, and memory is + * not as free as we would prefer. + * Swap it out to free its u. and page table + * pages, then start over. We do this here + * because we want to get rid of this guy + * even if noone wants to come in. + */ + rp->p_flag &= ~SLOAD; + VOID swapout(rp, rp->p_dsize, rp->p_ssize); + goto loop; + } + } + /* + * If there is no one there, wait. + */ + if (outpri == -20000) { + runout++; + sleep((caddr_t)&runout, PSWP); + goto loop; + } + VOID spl0(); + + /* + * If there are resources (kernel map, memory), swap p in. + * If the process was swapped out while it still had pages, + * don't bring it back unless there is a reasonable amount + * of memory for it to work with. + */ + if (freemem > imin(deficit, lotsfree) + imin(p->p_swrss / 2, 2 * maxpgio) || + p->p_swrss < 2 * maxpgio && freemem > desfree) { + if (swapin(p)) + goto loop; + } + } + + /* + * Need resources (kernel map or memory), swap someone out. + * Select the person who has been sleeping longest + * at bad priority; if none, select the oldest. + */ + VOID spl6(); + inp = p; + p = NULL; + smax = -1; + inpri = -1; + for (rp = &proc[0]; rp < &proc[NPROC]; rp++) { + if (rp->p_stat==SZOMB) + continue; + if (rp == inp) + continue; + if (!swappable(rp)) + continue; + if (rp->p_textp && rp->p_textp->x_flag&XLOCK) + continue; + if ((rp->p_stat==SSLEEP&&rp->p_pri>=PZERO || rp->p_stat==SSTOP) + && rp->p_slptime > maxslp) { + if (smax < rp->p_slptime) { + p = rp; + smax = rp->p_slptime; + } + } else if (smax<0 && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) { + rppri = rp->p_time+rp->p_nice-NZERO; + if (rp->p_time < maxslp) + rppri -= imin(rp->p_swrss / nz(maxpgio), maxslp / 2); + if (rppri > inpri) { + p = rp; + inpri = rppri; + } + } + } + /* + * Swap found user out if sleeping at bad pri for maxslp seconds, + * or if he has spent at least 5 seconds in core and + * the swapped-out process has spent at least 5 seconds out. + * Otherwise wait a bit and try again. + * (Note these are not really ``times'' but priorities. + */ + if (smax>=0 || (outpri>=5 && inpri>=5)) { + p->p_flag &= ~SLOAD; + VOID swapout(p, p->p_dsize, p->p_ssize); + goto loop; + } + VOID spl6(); + runin++; + sleep((caddr_t)&runin, PSWP); + goto loop; +} + +#define vave(field, time) \ + ave(rate.field, cnt.field, time); sum.field += cnt.field; cnt.field = 0 + +vmmeter() +{ + register int scanrate; + register int vavail; + + deficit -= imin(deficit, imax(deficit / 10, maxpgio / 2)); + ave(avefree, freemem, 5); + /* v_pgin is maintained by clock.c */ + vave(v_pgout, 5); + vave(v_intrans, 5); + vave(v_pgrec, 5); + vave(v_exfod, 5); + vave(v_zfod, 5); + vave(v_vrfod, 5); + vave(v_nexfod, 5); + vave(v_nzfod, 5); + vave(v_nvrfod, 5); + vave(v_pgfrec, 5); + vave(v_faults, 5); + vave(v_scan, 5); + vave(v_rev, 5); + vave(v_dfree, 5); + vave(v_swtch, 5); + if (time % 5 == 0) + vmtotal(); + if (time % 10 == 0) { + vave(v_swpin, 2); + vave(v_swpout, 2); + } + if (avefree < minfree && runout || proc[0].p_slptime > 5) { + runout = 0; + runin = 0; + wakeup((caddr_t)&runin); + wakeup((caddr_t)&runout); + } + + /* + * Compute new rate for clock; if + * nonzero, restart clock. + * Rate ranges linearly from one rev per + * slowscan seconds when there is lotsfree memory + * available to one rev per fastscan seconds when + * there is no memory available. + */ + nscan = desscan = 0; + vavail = freemem - deficit; + if (freemem >= lotsfree) + return; + scanrate = (slowscan * vavail + fastscan * (lotsfree - vavail)) / nz(lotsfree); + desscan = LOOPSIZ / nz(scanrate); + wakeup((caddr_t)&proc[2]); +} + +vmtotal() +{ + register struct proc *p; + register struct text *xp; + int nrun = 0; + + total.t_vmtxt = 0; + total.t_avmtxt = 0; + total.t_rmtxt = 0; + total.t_armtxt = 0; + for (xp = &text[0]; xp < &text[NTEXT]; xp++) + if (xp->x_iptr) { + total.t_vmtxt += xp->x_size; + total.t_rmtxt += xp->x_rssize; + for (p = xp->x_caddr; p; p = p->p_xlink) + switch (p->p_stat) { + + case SSTOP: + case SSLEEP: + if (p->p_slptime >= maxslp) + continue; + /* fall into... */ + + case SRUN: + case SIDL: + total.t_avmtxt += xp->x_size; + total.t_armtxt += xp->x_rssize; + goto next; + } +next: + ; + } + total.t_vm = 0; + total.t_avm = 0; + total.t_rm = 0; + total.t_arm = 0; + total.t_rq = 0; + total.t_dw = 0; + total.t_pw = 0; + total.t_sl = 0; + total.t_sw = 0; + for (p = &proc[0]; p < &proc[NPROC]; p++) { + if (p->p_flag & SSYS) + continue; + if (p->p_stat) { + total.t_vm += p->p_dsize + p->p_ssize; + total.t_rm += p->p_rssize; + switch (p->p_stat) { + + case SSLEEP: + case SSTOP: + if (p->p_pri < PZERO) + nrun++; + if (p->p_flag & SPAGE) + total.t_pw++; + else if (p->p_flag & SLOAD) { + if (p->p_pri < PZERO) + total.t_dw++; + else if (p->p_slptime < maxslp) + total.t_sl++; + } else if (p->p_slptime < maxslp) + total.t_sw++; + if (p->p_slptime < maxslp) + goto active; + break; + + case SRUN: + case SIDL: + nrun++; + if (p->p_flag & SLOAD) + total.t_rq++; + else + total.t_sw++; +active: + total.t_avm += p->p_dsize + p->p_ssize; + total.t_arm += p->p_rssize; + break; + } + } + } + total.t_vm += total.t_vmtxt; + total.t_avm += total.t_avmtxt; + total.t_rm += total.t_rmtxt; + total.t_arm += total.t_armtxt; + total.t_free = avefree; + loadav(avenrun, nrun); +} + +/* + * Constants for averages over 1, 5, and 15 minutes + * when sampling at 5 second intervals. + */ +double cexp[3] = { + 0.9200444146293232, /* exp(-1/12) */ + 0.9834714538216174, /* exp(-1/60) */ + 0.9944598480048967, /* exp(-1/180) */ +}; + +/* + * Compute a tenex style load average of a quantity on + * 1, 5 and 15 minute intervals. + */ +loadav(avg, n) + register double *avg; + int n; +{ + register int i; + + for (i = 0; i < 3; i++) + avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]); +} diff --git a/usr/src/sys/sys/vmswap.c b/usr/src/sys/sys/vmswap.c new file mode 100644 index 0000000000..b79e2d15b4 --- /dev/null +++ b/usr/src/sys/sys/vmswap.c @@ -0,0 +1,207 @@ +/* vmswap.c 2.4 2/14/80 */ + +#include "../h/param.h" +#include "../h/systm.h" +#include "../h/dir.h" +#include "../h/user.h" +#include "../h/proc.h" +#include "../h/text.h" +#include "../h/map.h" +#include "../h/buf.h" +#include "../h/pte.h" +#include "../h/mtpr.h" +#include "../h/cmap.h" +#include "../h/vm.h" + +/* + * Swap a process in. + */ +swapin(p) + register struct proc *p; +{ + register struct text *xp; + + if (xp = p->p_textp) + xlock(xp); + p->p_szpt = clrnd(ctopt(p->p_ssize + p->p_dsize + p->p_tsize)); + if (vgetpt(p, memall) == 0) + goto nomem; + if (vgetu(p, memall, Swapmap, &swaputl, (struct user *)0) == 0) { + vrelpt(p); + goto nomem; + } + + deficit += p->p_swrss; + swdspt(p, &swaputl, B_READ); + vrelswu(p, &swaputl); + if (xp) { + if (xp->x_ccount == 0) + deficit += xp->x_swrss; + xlink(p); + xunlock(xp); + } + + p->p_rssize = 0; + p->p_flag |= SLOAD; + rate.v_pgin += p->p_aveflt; + p->p_time = 0; + multprog++; + cnt.v_swpin++; + return (1); + +nomem: + if (xp) + xunlock(xp); + return (0); +} + +int xswapwant, xswaplock; +/* + * Swap out process p. + * ds and ss are the old data size and the stack size + * of the process, and are supplied during page table + * expansion swaps. + */ +swapout(p, ds, ss) + register struct proc *p; + size_t ds, ss; +{ + register struct pte *map; + register struct user *utl; + register int a; + int s; + int rc = 1; + + s = 1; + map = Xswapmap; + utl = &xswaputl; + if (xswaplock & s) + if ((xswaplock & 2) == 0) { + s = 2; + map = Xswap2map; + utl = &xswap2utl; + } + a = spl6(); + while (xswaplock & s) { + xswapwant |= s; + sleep((caddr_t)map, PSWP); + } + xswaplock |= s; + splx(a); + uaccess(p, map, utl); + if (swpexpand(p->p_dsize, p->p_ssize, &utl->u_dmap, &utl->u_smap) == 0 + || vgetswu(p, utl) == 0) { + swkill(p); + rc = 0; + goto out; + } + utl->u_odsize = ds; + utl->u_ossize = ss; + p->p_flag |= SLOCK; + if (p->p_textp) { + if (p->p_textp->x_ccount == 1) + p->p_textp->x_swrss = p->p_textp->x_rssize; + xccdec(p->p_textp, p); + } + p->p_swrss = p->p_rssize; + vsswap(p, dptopte(p, 0), MDATA, 0, ds, &utl->u_dmap); + vsswap(p, sptopte(p, CLSIZE-1), MSTACK, 0, ss, &utl->u_smap); + if (p->p_rssize != 0) + panic("swapout rssize"); + + swdspt(p, utl, B_WRITE); + vrelu(p, 1); + p->p_flag &= ~SLOAD; + rate.v_pgin -= p->p_aveflt; + vrelpt(p); + p->p_flag &= ~SLOCK; + p->p_time = 0; + + multprog--; + cnt.v_swpout++; + + if(runout) { + runout = 0; + wakeup((caddr_t)&runout); + } +out: + xswaplock &= ~s; + if (xswapwant & s) { + xswapwant &= ~s; + wakeup((caddr_t)map); + } + return (rc); +} + +/* + * Swap the data and stack page tables in or out. + * Only hard thing is swapping out when new pt size is different than old. + * If we are growing new pt pages, then we must spread pages with 2 swaps. + * If we are shrinking pt pages, then we must merge stack pte's into last + * data page so as not to lose them (and also do two swaps). + */ +swdspt(p, utl, rdwri) + register struct proc *p; + register struct user *utl; +{ + register int szpt, tsz, ssz; + int tdlast, slast, tdsz; + register struct pte *pte; + register int i; + + szpt = clrnd(ctopt(p->p_tsize + p->p_dsize + p->p_ssize)); + tsz = p->p_tsize / NPTEPG; + if (szpt == p->p_szpt) { + swptstat.pteasy++; + swpt(p, 0, tsz, p->p_szpt - tsz, rdwri); + return; + } + if (szpt < p->p_szpt) + swptstat.ptshrink++; + else + swptstat.ptexpand++; + ssz = ctopt(utl->u_ossize); + if (szpt < p->p_szpt && utl->u_odsize && utl->u_ossize) { + /* + * Page tables shrinking... see if last text+data and + * last stack page must be merged... if so, copy + * stack pte's from last stack page to end of last + * data page, and decrease size of stack pt to be swapped. + */ + tdlast = (p->p_tsize + utl->u_odsize) % (NPTEPG * CLSIZE); + slast = utl->u_ossize % (NPTEPG * CLSIZE); + if (tdlast && slast && tdlast + slast <= (NPTEPG * CLSIZE)) { + swptstat.ptpack++; + tdsz = clrnd(ctopt(p->p_tsize + utl->u_odsize)); + bcopy((caddr_t)sptopte(p, utl->u_ossize - 1), + (caddr_t)&p->p_p0br[tdsz * NPTEPG - slast], + (unsigned)(slast * sizeof (struct pte))); + ssz--; + } + } + if (ssz) + swpt(p, szpt - ssz - tsz, p->p_szpt - ssz, ssz, rdwri); + if (utl->u_odsize) + swpt(p, 0, tsz, clrnd(ctopt(p->p_tsize + utl->u_odsize)) - tsz, rdwri); + for (i = 0; i < utl->u_odsize; i++) { + pte = dptopte(p, i); + if (pte->pg_v || pte->pg_fod == 0 && (pte->pg_pfnum||pte->pg_m)) + panic("swdspt"); + } + for (i = 0; i < utl->u_ossize; i++) { + pte = sptopte(p, i); + if (pte->pg_v || pte->pg_fod == 0 && (pte->pg_pfnum||pte->pg_m)) + panic("swdspt"); + } +} + +swpt(p, doff, a, n, rdwri) + struct proc *p; + int doff, a, n, rdwri; +{ + + if (n == 0) + return; + swap(p, p->p_swaddr + ctod(UPAGES) + doff, + (caddr_t)&p->p_p0br[a * NPTEPG], n * NBPG, rdwri, B_PAGET, swapdev); +} -- 2.20.1