+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the University of Utah, and William Jolitz.
+ *
+ * %sccs.include.386.c%
+ *
+ * @(#)vm_machdep.c 5.1 (Berkeley) %G%
+ */
+
+
+#include "pte.h"
+
+#include "param.h"
+#include "systm.h"
+#include "dir.h"
+#include "user.h"
+#include "proc.h"
+#include "cmap.h"
+#include "mount.h"
+#include "vm.h"
+#include "text.h"
+
+#include "buf.h"
+
+#include "dbg.h"
+/*
+ * Set a red zone in the kernel stack after the u. area.
+ */
+setredzone(pte, vaddr)
+ u_short *pte;
+ caddr_t vaddr;
+{
+/* eventually do this by setting up an expand-down stack segment
+ for ss0: selector, allowing stack access down to top of u.
+ this means though that protection violations need to be handled
+ thru a double fault exception that must do an integral task
+ switch to a known good context, within which a dump can be
+ taken. a sensible scheme might be to save the initial context
+ used by sched (that has physical memory mapped 1:1 at bottom)
+ and take the dump while still in mapped mode */
+}
+
+/*
+ * Check for valid program size
+ * NB - Check data and data growth separately as they may overflow
+ * when summed together.
+ */
+chksize(ts, ids, uds, ss)
+ unsigned ts, ids, uds, ss;
+{
+ extern unsigned maxtsize;
+
+ if (ctob(ts) > maxtsize ||
+ ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
+ ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
+ ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
+ ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) {
+ u.u_error = ENOMEM;
+ return (1);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+newptes(pte, v, size)
+ struct pte *pte;
+ u_int v;
+ register int size;
+{
+ register caddr_t a;
+
+#ifdef lint
+ pte = pte;
+#endif
+ load_cr3(_cr3());
+}
+
+/*
+ * Change protection codes of text segment.
+ * Have to flush translation buffer since this
+ * affect virtual memory mapping of current process.
+ */
+chgprot(addr, tprot)
+ caddr_t addr;
+ long tprot;
+{
+ unsigned v;
+ int tp;
+ register struct pte *pte;
+ register struct cmap *c;
+
+ v = clbase(btop(addr));
+ if (!isatsv(u.u_procp, v)) {
+ u.u_error = EFAULT;
+ return (0);
+ }
+ tp = vtotp(u.u_procp, v);
+ pte = tptopte(u.u_procp, tp);
+ if (pte->pg_fod == 0 && pte->pg_pfnum) {
+ c = &cmap[pgtocm(pte->pg_pfnum)];
+ if (c->c_blkno && c->c_mdev != MSWAPX)
+ munhash(mount[c->c_mdev].m_dev,
+ (daddr_t)(u_long)c->c_blkno);
+ }
+ *(u_int *)pte &= ~PG_PROT;
+ *(u_int *)pte |= tprot;
+ load_cr3(_cr3());
+ return (1);
+}
+
+settprot(tprot)
+ long tprot;
+{
+ register u_int *ptaddr, i;
+
+ ptaddr = (u_int *)u.u_procp->p_p0br;
+ for (i = 0; i < u.u_tsize; i++) {
+ ptaddr[i] &= ~PG_PROT;
+ ptaddr[i] |= tprot;
+ }
+ load_cr3(_cr3());
+}
+
+/*
+ * Simulate effect of VAX region length registers.
+ * The one case where we must do anything is if a region has shrunk.
+ * In that case we must invalidate all the PTEs for the no longer valid VAs.
+ */
+setptlr(region, nlen)
+ int nlen;
+{
+ register struct pte *pte;
+ register int change;
+ int olen;
+
+ if (region == 0) {
+ olen = u.u_pcb.pcb_p0lr;
+ u.u_pcb.pcb_p0lr = nlen;
+ } else {
+ olen = P1PAGES - u.u_pcb.pcb_p1lr;
+ u.u_pcb.pcb_p1lr = nlen;
+ nlen = P1PAGES - nlen;
+ }
+/*pg("setptlr(%x,%x), was %d",region, nlen, olen);*/
+ if ((change = olen - nlen) <= 0)
+ return;
+ if (region == 0)
+ pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr;
+ else
+ pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change;
+/*printf("p0b %x p0l %x", u.u_pcb.pcb_p0br, u.u_pcb.pcb_p0lr);
+printf("p1b %x p1l %x pte %x", u.u_pcb.pcb_p1br, u.u_pcb.pcb_p1lr, pte);*/
+ do {
+ *(u_int *)pte++ = 0;
+ } while (--change);
+ /* short cut newptes */
+ load_cr3(_cr3());
+}
+
+/*
+ * Map `size' bytes of physical memory starting at `paddr' into
+ * kernel VA space using PTEs starting at `pte'. Read/write and
+ * cache-inhibit status are specified by `prot'.
+ */
+physaccess(pte, paddr, size, prot)
+ register struct pte *pte;
+ caddr_t paddr;
+ register int size;
+{
+ register u_int page;
+
+ page = (u_int)paddr & PG_FRAME;
+ for (size = btoc(size); size; size--) {
+ *(int *)pte = PG_V | prot | page;
+ page += NBPG;
+ pte++;
+ }
+ load_cr3(_cr3());
+}
+
+/*
+ * Move pages from one kernel virtual address to another.
+ * Both addresses are assumed to reside in the Sysmap,
+ * and size must be a multiple of CLSIZE.
+ */
+pagemove(from, to, size)
+ register caddr_t from, to;
+ int size;
+{
+ register struct pte *fpte, *tpte;
+
+ if (size % CLBYTES)
+ panic("pagemove");
+ fpte = &Sysmap[btop(from)];
+ tpte = &Sysmap[btop(to)];
+ while (size > 0) {
+ *tpte++ = *fpte;
+ *(int *)fpte++ = 0;
+ from += NBPG;
+ to += NBPG;
+ size -= NBPG;
+ }
+ load_cr3(_cr3());
+}
+
+/*
+ * The probe[rw] routines should probably be redone in assembler
+ * for efficiency.
+ */
+prober(addr)
+ register u_int addr;
+{
+ register int page;
+ register struct proc *p;
+
+ if (addr >= USRSTACK)
+ return(0);
+ p = u.u_procp;
+ page = btop(addr);
+ if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
+ return(1);
+ return(0);
+}
+
+probew(addr)
+ register u_int addr;
+{
+ register int page;
+ register struct proc *p;
+
+ if (addr >= USRSTACK)
+ return(0);
+ p = u.u_procp;
+ page = btop(addr);
+ if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
+{
+dprintf(DPHYS,"vtopte %x %x\n", vtopte(p, page), *(int *)vtopte(p, page) );
+ return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
+}
+ return(0);
+}
+
+/*
+ * NB: assumes a physically contiguous kernel page table
+ * (makes life a LOT simpler).
+ */
+kernacc(addr, count, rw)
+ register u_int addr;
+ int count, rw;
+{
+ register struct pde *pde;
+ register struct pte *pte;
+ register int ix, cnt;
+ extern long Syssize;
+
+dprintf(DPHYS,"kernacc %x count %d rw %d", addr, count, rw);
+ if (count <= 0)
+ return(0);
+ pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
+ ix = (addr & PD_MASK) >> PD_SHIFT;
+ cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
+ cnt -= ix;
+ for (pde += ix; cnt; cnt--, pde++)
+ if (pde->pd_v == 0)
+{
+dprintf(DPHYS,"nope pde %x, idx %x\n", pde, ix);
+ return(0);
+}
+ ix = btop(addr-0xfe000000);
+ cnt = btop(addr-0xfe000000+count+NBPG-1);
+ if (cnt > (int)&Syssize)
+{
+dprintf(DPHYS,"nope cnt %x\n", cnt);
+ return(0);
+}
+ cnt -= ix;
+ for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
+ if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
+{
+dprintf(DPHYS,"nope pte %x %x, idx %x\n", pte, *(int *)pte, ix);
+ return(0);
+}
+dprintf(DPHYS,"yup\n");
+ return(1);
+}
+
+useracc(addr, count, rw)
+ register u_int addr;
+ int count, rw;
+{
+ register int (*func)();
+ register u_int addr2;
+ extern int prober(), probew();
+
+dprintf(DPHYS,"useracc %x count %d rw %d", addr, count, rw);
+ if (count <= 0)
+ return(0);
+ addr2 = addr;
+ addr += count;
+ func = (rw == B_READ) ? prober : probew;
+ do {
+ if ((*func)(addr2) == 0)
+{
+dprintf(DPHYS,"nope %x\n", addr);
+ return(0);
+}
+ addr2 = (addr2 + NBPG) & ~PGOFSET;
+ } while (addr2 < addr);
+dprintf(DPHYS,"yup\n", addr);
+ return(1);
+}
+
+/*
+ * Convert kernel VA to physical address
+ */
+kvtop(addr)
+ register u_int addr;
+{
+ register int pf;
+
+ pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum;
+ if (pf == 0)
+ panic("kvtop: zero page frame");
+ return((u_int)ptob(pf) + (addr & PGOFSET));
+}
+
+struct pde *
+vtopde(p, va)
+ register struct proc *p;
+ register u_int va;
+{
+ register struct pde *pde;
+
+ pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG);
+ return(pde + ((va & PD_MASK) >> PD_SHIFT));
+}
+
+
+initcr3(p)
+ register struct proc *p;
+{
+ return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum);
+}
+
+/*
+ * Initialize page directory table to reflect PTEs in Usrptmap.
+ * Page directory table address is given by Usrptmap index of p_szpt.
+ * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries]
+ */
+initpdt(p, usr)
+ register struct proc *p;
+{
+ register int i, k, sz;
+ register struct pde *pde, *toppde;
+ extern struct pde *vtopde();
+ extern Sysbase;
+
+/*pg("initpdt");*/
+if(!usr) {
+ /* clear entire map */
+ pde = vtopde(p, 0);
+ bzero(pde, NBPG);
+ /* map kernel */
+ pde = vtopde(p, &Sysbase);
+ for (i = 0; i < 5; i++, pde++) {
+ *(int *)pde = PG_UW | PG_V;
+ pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i;
+ }
+ /* map u dot */
+ pde = vtopde(p, &u);
+ *(int *)pde = PG_UW | PG_V;
+ pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum;
+/*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum,
+p->p_addr);*/
+ return;
+}
+
+ /* otherwise, fill in user map */
+ k = btokmx(p->p_p0br);
+ pde = vtopde(p, 0);
+ toppde = vtopde(p, &u);
+
+ /* text and data */
+ sz = ctopt(p->p_tsize + p->p_dsize);
+dprintf(DEXPAND,"textdata 0 to %d\n",sz-1);
+ for (i = 0; i < sz; i++, pde++) {
+ *(int *)pde = PG_UW | PG_V;
+ pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
+/*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/
+ }
+ /*
+ * Bogus! The kernelmap may map unused PT pages
+ * (since we don't shrink PTs) so we need to skip over
+ * those PDEs. We should really free the unused PT
+ * pages in expand().
+ */
+ sz += ctopt(p->p_ssize+UPAGES);
+ if (sz < p->p_szpt)
+ k += p->p_szpt - sz;
+ /* hole */
+ sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase));
+dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1);
+ for ( ; i < sz; i++, pde++)
+/* definite bug here... does not hit all entries, but point moot due
+to bzero above XXX*/
+{
+ *(int *)pde = 0;
+/*pg("pde %x pf %x", pde, *(int *)pde);*/
+}
+ /* stack and u-area */
+ sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase));
+dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1);
+ for ( ; i < sz; i++, pde++) {
+ *(int *)pde = PG_UW | PG_V;
+ pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
+/*pg("pde %x pf %x", pde, *(int *)pde);*/
+ }
+}
+
+#ifdef notdef
+/*
+ * Allocate wired-down, non-paged, cache-inhibited pages in kernel
+ * virtual memory and clear them
+ */
+caddr_t
+cimemall(n)
+ int n;
+{
+ register int npg, a;
+ register struct pte *pte;
+ extern struct map *kernelmap;
+
+ npg = clrnd(btoc(n));
+ a = rmalloc(kernelmap, (long)npg);
+ if (a == 0)
+ return ((caddr_t)0);
+ pte = &Usrptmap[a];
+ (void) vmemall(pte, npg, &proc[0], CSYS);
+ while (--npg >= 0) {
+ *(int *)pte |= (PG_V|PG_KW|PG_CI);
+ clearseg((unsigned)pte->pg_pfnum);
+ pte++;
+ }
+ TBIAS();
+ return ((caddr_t)kmxtob(a));
+}
+#endif
+
+extern char usrio[];
+extern struct pte Usriomap[];
+struct map *useriomap;
+int usriowanted;
+
+/*
+ * Map an IO request into kernel virtual address space. Requests fall into
+ * one of five catagories:
+ *
+ * B_PHYS|B_UAREA: User u-area swap.
+ * Address is relative to start of u-area (p_addr).
+ * B_PHYS|B_PAGET: User page table swap.
+ * Address is a kernel VA in usrpt (Usrptmap).
+ * B_PHYS|B_DIRTY: Dirty page push.
+ * Address is a VA in proc2's address space.
+ * B_PHYS|B_PGIN: Kernel pagein of user pages.
+ * Address is VA in user's address space.
+ * B_PHYS: User "raw" IO request.
+ * Address is VA in user's address space.
+ *
+ * All requests are (re)mapped into kernel VA space via the useriomap
+ * (a name with only slightly more meaning than "kernelmap")
+ */
+vmapbuf(bp)
+ register struct buf *bp;
+{
+ register int npf, a;
+ register caddr_t addr;
+ register struct pte *pte, *iopte;
+ register long flags = bp->b_flags;
+ struct proc *p;
+ int off, s;
+
+ if ((flags & B_PHYS) == 0)
+ panic("vmapbuf");
+ /*
+ * Find PTEs for the area to be mapped
+ */
+ p = flags&B_DIRTY ? &proc[2] : bp->b_proc;
+ addr = bp->b_un.b_addr;
+ if (flags & B_UAREA)
+ pte = &p->p_addr[btop(addr)];
+ else if (flags & B_PAGET)
+ pte = &Usrptmap[btokmx((struct pte *)addr)];
+ else
+ pte = vtopte(p, btop(addr));
+ /*
+ * Allocate some kernel PTEs and load them
+ */
+ off = (int)addr & PGOFSET;
+ npf = btoc(bp->b_bcount + off);
+ s = splbio();
+ while ((a = rmalloc(useriomap, npf)) == 0) {
+ usriowanted = 1;
+ sleep((caddr_t)useriomap, PSWP);
+ }
+ splx(s);
+ iopte = &Usriomap[a];
+ addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off;
+ a = btop(addr);
+ while (npf--) {
+ mapin(iopte, a, pte->pg_pfnum, PG_V);
+ iopte++, pte++;
+ a++;
+ }
+}
+
+/*
+ * Free the io map PTEs associated with this IO operation.
+ * We also invalidate the TLB entries.
+ */
+vunmapbuf(bp)
+ register struct buf *bp;
+{
+ register int a, npf;
+ register caddr_t addr = bp->b_un.b_addr;
+ register struct pte *pte;
+ int s;
+
+ if ((bp->b_flags & B_PHYS) == 0)
+ panic("vunmapbuf");
+ a = (int)(addr - usrio) >> PGSHIFT;
+ npf = btoc(bp->b_bcount + ((int)addr & PGOFSET));
+ s = splbio();
+ rmfree(useriomap, npf, a);
+ if (usriowanted) {
+ usriowanted = 0;
+ wakeup((caddr_t)useriomap);
+ }
+ splx(s);
+ pte = &Usriomap[a];
+ while (npf--) {
+ *(int *)pte = 0;
+ addr += NBPG;
+ pte++;
+ }
+ /*
+ * If we just completed a dirty page push, we must reconstruct
+ * the original b_addr since cleanup() needs it.
+ */
+ if (bp->b_flags & B_DIRTY) {
+ a = ((bp - swbuf) * CLSIZE) * KLMAX;
+ bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a));
+ }
+ load_cr3(_cr3());
+}