* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)pmap.c 8.1 (Berkeley) 6/11/93
* from: $Header: pmap.c,v 1.39 93/04/20 11:17:12 torek Exp $
* SPARC physical map management code.
* Does not function on multiprocessors (yet).
#include <machine/autoconf.h>
#include <machine/bsd_openprom.h>
#include <machine/ctlreg.h>
#include <sparc/sparc/asm.h>
#include <sparc/sparc/cache.h>
#define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M"
extern struct promvec
*promvec
;
* The SPARCstation offers us the following challenges:
* 1. A virtual address cache. This is, strictly speaking, not
* part of the architecture, but the code below assumes one.
* This is a write-through cache on the 4c and a write-back cache
* 2. An MMU that acts like a cache. There is not enough space
* in the MMU to map everything all the time. Instead, we need
* to load MMU with the `working set' of translations for each
* 3. Segmented virtual and physical spaces. The upper 12 bits of
* a virtual address (the virtual segment) index a segment table,
* giving a physical segment. The physical segment selects a
* `Page Map Entry Group' (PMEG) and the virtual page number---the
* next 5 or 6 bits of the virtual address---select the particular
* `Page Map Entry' for the page. We call the latter a PTE and
* call each Page Map Entry Group a pmeg (for want of a better name).
* Since there are no valid bits in the segment table, the only way
* to have an invalid segment is to make one full pmeg of invalid PTEs.
* We use the last one (since the ROM does as well).
* 4. Discontiguous physical pages. The Mach VM expects physical pages
* to be in one sequential lump.
* 5. The MMU is always on: it is not possible to disable it. This is
* mainly a startup hassle.
int ps_unlink_pvfirst
; /* # of pv_unlinks on head */
int ps_unlink_pvsearch
; /* # of pv_unlink searches */
int ps_changeprots
; /* # of calls to changeprot */
int ps_useless_changeprots
; /* # of changeprots for wiring */
int ps_enter_firstpv
; /* pv heads entered */
int ps_enter_secondpv
; /* pv nonheads entered */
int ps_useless_changewire
; /* useless wiring changes */
int ps_npg_prot_all
; /* # of active pages protected */
int ps_npg_prot_actual
; /* # pages actually affected */
#define PDB_CREATE 0x0001
#define PDB_DESTROY 0x0002
#define PDB_REMOVE 0x0004
#define PDB_CHANGEPROT 0x0008
#define PDB_MMU_ALLOC 0x0100
#define PDB_MMU_STEAL 0x0200
#define PDB_CTX_ALLOC 0x0400
#define PDB_CTX_STEAL 0x0800
#define splpmap() splbio()
* First and last managed physical addresses.
vm_offset_t vm_first_phys
, vm_last_phys
;
#define managed(pa) ((pa) >= vm_first_phys && (pa) < vm_last_phys)
vm_offset_t vm_first_phys
, vm_num_phys
;
#define managed(pa) ((unsigned)((pa) - vm_first_phys) < vm_num_phys)
* For each managed physical page, there is a list of all currently
* valid virtual mappings of that page. Since there is usually one
* (or zero) mapping per page, the table begins with an initial entry,
* rather than a pointer; this head entry is empty iff its pv_pmap
* Note that these are per machine independent page (so there may be
* only one for every two hardware pages, e.g.). Since the virtual
* address is aligned on a page boundary, the low order bits are free
* for storing flags. Only the head of each list has flags.
* THIS SHOULD BE PART OF THE CORE MAP
struct pvlist
*pv_next
; /* next pvlist, if any */
struct pmap
*pv_pmap
; /* pmap of this va */
int pv_va
; /* virtual address */
int pv_flags
; /* flags (below) */
* Flags in pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2
* since they must line up with the bits in the hardware PTEs (see pte.h).
#define PV_MOD 1 /* page modified */
#define PV_REF 2 /* page referenced */
#define PV_NC 4 /* page cannot be cached */
/*efine PV_ALLF 7 ** all of the above */
struct pvlist
*pv_table
; /* array of entries, one per physical page */
#define pvhead(pa) (&pv_table[atop((pa) - vm_first_phys)])
* Each virtual segment within each pmap is either valid or invalid.
* It is valid if pm_npte[VA_VSEG(va)] is not 0. This does not mean
* it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)]
* does not point to the invalid PMEG.
* If a virtual segment is valid and loaded, the correct PTEs appear
* in the MMU only. If it is valid and unloaded, the correct PTEs appear
* in the pm_pte[VA_VSEG(va)] only. However, some effort is made to keep
* the software copies consistent enough with the MMU so that libkvm can
* do user address translations. In particular, pv_changepte() and
* pmap_enu() maintain consistency, while less critical changes are
* not maintained. pm_pte[VA_VSEG(va)] always points to space for those
* PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not
* Each PMEG in the MMU is either free or contains PTEs corresponding to
* some pmap and virtual segment. If it contains some PTEs, it also contains
* reference and modify bits that belong in the pv_table. If we need
* to steal a PMEG from some process (if we need one and none are free)
* we must copy the ref and mod bits, and update pm_segmap in the other
* pmap to show that its virtual segment is no longer in the MMU.
* There are 128 PMEGs in a small Sun-4, of which only a few dozen are
* tied down permanently, leaving `about' 100 to be spread among
* running processes. These are managed as an LRU cache. Before
* calling the VM paging code for a user page fault, the fault handler
* calls mmu_load(pmap, va) to try to get a set of PTEs put into the
* MMU. mmu_load will check the validity of the segment and tell whether
* Since I hate the name PMEG I call this data structure an `mmu entry'.
* Each mmuentry is on exactly one of three `usage' lists: free, LRU,
* or locked. The LRU list is for user processes; the locked list is
* for kernel entries; both are doubly linked queues headed by `mmuhd's.
* The free list is a simple list, headed by a free list pointer.
struct mmuentry
*mh_next
;
struct mmuentry
*mh_prev
;
struct mmuentry
*me_next
; /* queue (MUST BE FIRST) or next free */
struct mmuentry
*me_prev
; /* queue (MUST BE FIRST) */
struct pmap
*me_pmap
; /* pmap, if in use */
struct mmuentry
*me_pmforw
; /* pmap pmeg chain */
struct mmuentry
**me_pmback
; /* pmap pmeg chain */
u_short me_vseg
; /* virtual segment number in pmap */
pmeg_t me_pmeg
; /* hardware PMEG number */
struct mmuentry
*mmuentry
; /* allocated in pmap_bootstrap */
struct mmuentry
*me_freelist
; /* free list (not a queue) */
struct mmuhd me_lru
= { /* LRU (user) entries */
(struct mmuentry
*)&me_lru
, (struct mmuentry
*)&me_lru
struct mmuhd me_locked
= { /* locked (kernel) entries */
(struct mmuentry
*)&me_locked
, (struct mmuentry
*)&me_locked
int seginval
; /* the invalid segment number */
* A context is simply a small number that dictates which set of 4096
* segment map entries the MMU uses. The Sun 4c has eight such sets.
* These are alloted in an `almost MRU' fashion.
* Each context is either free or attached to a pmap.
* Since the virtual address cache is tagged by context, when we steal
* a context we have to flush (that part of) the cache.
union ctxinfo
*c_nextfree
; /* free list (if free) */
struct pmap
*c_pmap
; /* pmap (if busy) */
union ctxinfo
*ctxinfo
; /* allocated at in pmap_bootstrap */
union ctxinfo
*ctx_freelist
; /* context free list */
int ctx_kick
; /* allocation rover when none free */
int ctx_kickdir
; /* ctx_kick roves both directions */
/* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */
caddr_t vpage
[2]; /* two reserved MD virtual pages */
caddr_t vmempage
; /* one reserved MI vpage for /dev/mem */
caddr_t vdumppages
; /* 32KB worth of reserved dump pages */
struct kpmap kernel_pmap_store
; /* the kernel's pmap */
* We need to know real physical memory ranges (for /dev/mem).
#define MA_SIZE 32 /* size of memory descriptor arrays */
struct memarr pmemarr
[MA_SIZE
];/* physical memory regions */
int npmemarr
; /* number of entries in pmemarr */
* The following four global variables are set in pmap_bootstrap
* for the vm code to find. This is Wrong.
vm_offset_t avail_start
; /* first free physical page number */
vm_offset_t avail_end
; /* last free physical page number */
vm_offset_t virtual_avail
; /* first free virtual page number */
vm_offset_t virtual_end
; /* last free virtual page number */
* pseudo-functions for mnemonic value
* NB: setsegmap should be stba for 4c, but stha works and makes the
* code right for the Sun-4 as well.
#define getcontext() lduba(AC_CONTEXT, ASI_CONTROL)
#define setcontext(c) stba(AC_CONTEXT, ASI_CONTROL, c)
#define getsegmap(va) lduha(va, ASI_SEGMAP)
#define setsegmap(va, pmeg) stha(va, ASI_SEGMAP, pmeg)
#define getsegmap(va) lduba(va, ASI_SEGMAP)
#define setsegmap(va, pmeg) stba(va, ASI_SEGMAP, pmeg)
#define getpte(va) lda(va, ASI_PTE)
#define setpte(va, pte) sta(va, ASI_PTE, pte)
/*----------------------------------------------------------------*/
* Translations from dense (contiguous) pseudo physical addresses
* (fed to the VM code, to keep it happy) to sparse (real, hardware)
* physical addresses. We call the former `software' page frame
* numbers and the latter `hardware' page frame numbers. The
* translation is done on a `per bank' basis.
* The HWTOSW and SWTOHW macros handle the actual translation.
* They are defined as no-ops on Sun-4s.
* SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS
* ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES (TODO)
* Since we cannot use the memory allocated to the ROM monitor, and
* this happens to be just under 64K, I have chosen a bank size of
* 64K. This is necessary since all banks must be completely full.
* I have also chosen a physical memory limit of 128 MB. The 4c is
* architecturally limited to 256 MB, but 128 MB is more than will
* fit on present hardware.
* XXX FIX THIS: just make all of each bank available and then
* take out the pages reserved to the monitor!!
#define MAXMEM (128 * 1024 * 1024) /* no more than 128 MB phys mem */
#define NPGBANK 16 /* 2^4 pages per bank (64K / bank) */
#define BSHIFT 4 /* log2(NPGBANK) */
#define BOFFSET (NPGBANK - 1)
#define BTSIZE (MAXMEM / NBPG / NPGBANK)
int pmap_dtos
[BTSIZE
]; /* dense to sparse */
int pmap_stod
[BTSIZE
]; /* sparse to dense */
#define HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET))
#define SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET))
struct memarr pmap_ama
[MA_SIZE
];
* init_translations sets up pmap_dtos[] and pmap_stod[], and
* returns the number of usable physical pages.
register struct memarr
*mp
;
register u_int vbank
= 0, pbank
, v
, a
;
register u_int pages
= 0, lost
= 0;
struct memarr ama
[MA_SIZE
]; /* available memory array */
nmem
= makememarr(ama
, MA_SIZE
, MEMARR_AVAILPHYS
);
for (mp
= ama
; --nmem
>= 0; mp
++) {
if ((n
= a
& BOFFSET
) != 0) {
/* round up to next bank */
if (v
< n
) { /* not a whole bank: skip it */
lost
+= n
; /* lose n pages from front */
n
= v
>> BSHIFT
; /* calculate number of banks */
pbank
= a
>> BSHIFT
; /* and the bank itself */
pages
+= n
; /* off by a factor of 2^BSHIFT */
lost
+= v
- (n
<< BSHIFT
);
pmap_dtos
[vbank
] = pbank
<< BSHIFT
;
pmap_stod
[pbank
] = vbank
<< BSHIFT
;
printf("note: lost %d pages in translation\n", lost
);
* Pages are physically contiguous, and hardware PFN == software PFN.
* XXX assumes PAGE_SIZE == NBPG (???)
/* update pv_flags given a valid pte */
#define MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF))
/*----------------------------------------------------------------*/
* Agree with the monitor ROM as to how many MMU entries are
* to be reserved, and map all of its segments into all contexts.
* Unfortunately, while the Version 0 PROM had a nice linked list of
* taken virtual memory, the Version 2 PROM provides instead a convoluted
* description of *free* virtual memory. Rather than invert this, we
* resort to two magic constants from the PROM vector description file.
va
= OPENPROM_STARTVADDR
;
for (i
= ncontext
; --i
> 0;)
(*promvec
->pv_setctxt
)(i
, (caddr_t
)va
, mmuseg
);
if (mmuseg
== seginval
) {
/* PROM maps its memory user-accessible: fix it. */
for (i
= NPTESG
; --i
>= 0; va
+= NBPG
)
setpte(va
, getpte(va
) | PG_S
);
* TODO: agree with the ROM on physical pages by taking them away
* from the page list, rather than having a dinky BTSIZE above.
/*----------------------------------------------------------------*/
* Change contexts. We need the old context number as well as the new
* one. If the context is changing, we must write all user windows
* first, lest an interrupt cause them to be written to the (other)
* user whose context we set here.
#define CHANGE_CONTEXTS(old, new) \
* Allocate an MMU entry (i.e., a PMEG).
* If necessary, steal one from someone else.
* Put it on the tail of the given queue
* (which is either the LRU list or the locked list).
* The locked list is not actually ordered, but this is easiest.
* Also put it on the given (new) pmap's chain,
* enter its pmeg number into that pmap's segmap,
* and store the pmeg's new virtual segment number (me->me_vseg).
* This routine is large and complicated, but it must be fast
* since it implements the dynamic allocation of MMU entries.
me_alloc(mh
, newpm
, newvseg
)
register struct mmuhd
*mh
;
register struct pmap
*newpm
;
register struct mmuentry
*me
;
register struct pmap
*pm
;
register int i
, va
, pa
, *pte
, tpte
;
/* try free list first */
if ((me
= me_freelist
) != NULL
) {
me_freelist
= me
->me_next
;
panic("me_alloc: freelist entry has pmap");
if (pmapdebug
& PDB_MMU_ALLOC
)
printf("me_alloc: got pmeg %x\n", me
->me_pmeg
);
insque(me
, mh
->mh_prev
); /* onto end of queue */
/* onto on pmap chain; pmap is already locked, if needed */
me
->me_pmback
= newpm
->pm_mmuback
;
newpm
->pm_mmuback
= &me
->me_pmforw
;
/* into pmap segment table, with backpointers */
newpm
->pm_segmap
[newvseg
] = me
->me_pmeg
;
/* no luck, take head of LRU list */
if ((me
= me_lru
.mh_next
) == (struct mmuentry
*)&me_lru
)
panic("me_alloc: all pmegs gone");
panic("me_alloc: LRU entry has no pmap");
panic("me_alloc: stealing from kernel");
pte
= pm
->pm_pte
[me
->me_vseg
];
panic("me_alloc: LRU entry's pmap has no ptes");
if (pmapdebug
& (PDB_MMU_ALLOC
| PDB_MMU_STEAL
))
printf("me_alloc: stealing pmeg %x from pmap %x\n",
* Remove from LRU list, and insert at end of new list
* (probably the LRU list again, but so what?).
* The PMEG must be mapped into some context so that we can
* read its PTEs. Use its current context if it has one;
* if not, and since context 0 is reserved for the kernel,
* the simplest method is to switch to 0 and map the PMEG
* to virtual address 0---which, being a user space address,
* is by definition not in use.
* XXX for ncpus>1 must use per-cpu VA?
* XXX do not have to flush cache immediately
CHANGE_CONTEXTS(ctx
, pm
->pm_ctxnum
);
cache_flush_segment(me
->me_vseg
);
va
= VSTOVA(me
->me_vseg
);
setsegmap(0, me
->me_pmeg
);
* No cache flush needed: it happened earlier when
* the old context was taken.
* Record reference and modify bits for each page,
* and copy PTEs into kernel memory so that they can
pa
= ptoa(HWTOSW(tpte
& PG_PFNUM
));
pvhead(pa
)->pv_flags
|= MR(tpte
);
*pte
++ = tpte
& ~(PG_U
|PG_M
);
/* update segment tables */
simple_lock(&pm
->pm_lock
); /* what if other cpu takes mmuentry ?? */
setsegmap(VSTOVA(me
->me_vseg
), seginval
);
pm
->pm_segmap
[me
->me_vseg
] = seginval
;
if ((*me
->me_pmback
= me
->me_pmforw
) != NULL
) {
me
->me_pmforw
->me_pmback
= me
->me_pmback
;
pm
->pm_mmuback
= me
->me_pmback
;
simple_unlock(&pm
->pm_lock
);
setcontext(ctx
); /* done with old context */
/* onto new pmap chain; new pmap is already locked, if needed */
/* me->me_pmforw = NULL; */ /* done earlier */
me
->me_pmback
= newpm
->pm_mmuback
;
newpm
->pm_mmuback
= &me
->me_pmforw
;
/* into new segment table, with backpointers */
newpm
->pm_segmap
[newvseg
] = me
->me_pmeg
;
* Assumes the corresponding pmap is already locked.
* Does NOT flush cache, but does record ref and mod bits.
* The rest of each PTE is discarded.
* CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has
* a context) or to 0 (if not). Caller must also update
* pm->pm_segmap and (possibly) the hardware.
register struct pmap
*pm
;
register struct mmuentry
*me
= &mmuentry
[pmeg
];
register int i
, va
, pa
, tpte
;
if (pmapdebug
& PDB_MMU_ALLOC
)
printf("me_free: freeing pmeg %x from pmap %x\n",
panic("me_free: wrong mmuentry");
panic("me_free: pm != me_pmap");
/* just like me_alloc, but no cache flush, and context already set */
va
= VSTOVA(me
->me_vseg
);
setsegmap(0, me
->me_pmeg
);
pa
= ptoa(HWTOSW(tpte
& PG_PFNUM
));
pvhead(pa
)->pv_flags
|= MR(tpte
);
/* take mmu entry off pmap chain */
*me
->me_pmback
= me
->me_pmforw
;
if ((*me
->me_pmback
= me
->me_pmforw
) != NULL
)
me
->me_pmforw
->me_pmback
= me
->me_pmback
;
pm
->pm_mmuback
= me
->me_pmback
;
/* ... and remove from segment map */
pm
->pm_segmap
[me
->me_vseg
] = seginval
;
/* off LRU or lock chain */
/* no associated pmap; on free list */
me
->me_next
= me_freelist
;
* `Page in' (load or inspect) an MMU entry; called on page faults.
* Returns 1 if we reloaded the segment, -1 if the segment was
* already loaded and the page was marked valid (in which case the
* fault must be a bus error or something), or 0 (segment loaded but
* PTE not valid, or segment not loaded at all).
register struct pmap
*pm
;
register struct mmuentry
*me
;
register int vseg
= VA_VSEG(va
), pmeg
, i
, s
;
/* return 0 if we have no PTEs to load */
if ((pte
= pm
->pm_pte
[vseg
]) == NULL
)
/* return -1 if the fault is `hard', 0 if not */
if (pm
->pm_segmap
[vseg
] != seginval
)
return (bits
&& (getpte(va
) & bits
) == bits
? -1 : 0);
/* reload segment: write PTEs into a new LRU entry */
va
= VA_ROUNDDOWNTOSEG(va
);
s
= splpmap(); /* paranoid */
pmeg
= me_alloc(&me_lru
, pm
, vseg
)->me_pmeg
;
* Allocate a context. If necessary, steal one from someone else.
* Changes hardware context number and loads segment map.
* This routine is only ever called from locore.s just after it has
* saved away the previous process, so there are no active user windows.
register struct pmap
*pm
;
register union ctxinfo
*c
;
register int cnum
, i
, va
;
panic("ctx_alloc pm_ctx");
if (pmapdebug
& PDB_CTX_ALLOC
)
printf("ctx_alloc(%x)\n", pm
);
if ((c
= ctx_freelist
) != NULL
) {
ctx_freelist
= c
->c_nextfree
;
if ((ctx_kick
+= ctx_kickdir
) >= ncontext
) {
} else if (ctx_kick
< 1) {
c
= &ctxinfo
[cnum
= ctx_kick
];
panic("ctx_alloc cu_pmap");
if (pmapdebug
& (PDB_CTX_ALLOC
| PDB_CTX_STEAL
))
printf("ctx_alloc: steal context %x from %x\n",
c
->c_pmap
->pm_ctx
= NULL
;
* XXX loop below makes 3584 iterations ... could reduce
* by remembering valid ranges per context: two ranges
* should suffice (for text/data/bss and for stack).
for (va
= 0, i
= NUSEG
; --i
>= 0; va
+= NBPSG
)
* Give away a context. Flushes cache and sets current context to 0.
register union ctxinfo
*c
;
if ((c
= pm
->pm_ctx
) == NULL
)
if (vactype
!= VAC_NONE
) {
CHANGE_CONTEXTS(oldc
, newc
);
CHANGE_CONTEXTS(oldc
, 0);
c
->c_nextfree
= ctx_freelist
;
/*----------------------------------------------------------------*/
* Walk the given pv list, and for each PTE, set or clear some bits
* As a special case, this never clears PG_W on `pager' pages.
* These, being kernel addresses, are always in hardware and have
* This routine flushes the cache for any page whose PTE changes,
* as long as the process has a context; this is overly conservative.
* It also copies ref and mod bits to the pvlist, on the theory that
* this might save work later. (XXX should test this theory)
pv_changepte(pv0
, bis
, bic
)
register struct pvlist
*pv0
;
register struct pvlist
*pv
;
register struct pmap
*pm
;
register int va
, vseg
, pmeg
, i
, flags
;
write_user_windows(); /* paranoid? */
s
= splpmap(); /* paranoid? */
if (pv0
->pv_pmap
== NULL
) {
for (pv
= pv0
; pv
!= NULL
; pv
= pv
->pv_next
) {
if(pm
==NULL
)panic("pv_changepte 1");
if ((pmeg
= pm
->pm_segmap
[vseg
]) != seginval
) {
/* in hardware: fix hardware copy */
extern vm_offset_t pager_sva
, pager_eva
;
va
>= pager_sva
&& va
< pager_eva
)
setcontext(pm
->pm_ctxnum
);
/* XXX should flush only when necessary */
flags
|= (tpte
>> PG_M_SHIFT
) &
tpte
= (tpte
| bis
) & ~bic
;
if (pte
!= NULL
) /* update software copy */
/* not in hardware: just fix software copy */
*pte
= (*pte
| bis
) & ~bic
;
* Sync ref and mod bits in pvlist (turns off same in hardware PTEs).
* This is just like pv_changepte, but we never add or remove bits,
* hence never need to adjust software copies.
register struct pvlist
*pv0
;
register struct pvlist
*pv
;
register struct pmap
*pm
;
register int tpte
, va
, vseg
, pmeg
, i
, flags
;
write_user_windows(); /* paranoid? */
s
= splpmap(); /* paranoid? */
if (pv0
->pv_pmap
== NULL
) { /* paranoid */
for (pv
= pv0
; pv
!= NULL
; pv
= pv
->pv_next
) {
if ((pmeg
= pm
->pm_segmap
[vseg
]) == seginval
)
setcontext(pm
->pm_ctxnum
);
/* XXX should flush only when necessary */
if (tpte
& (PG_M
|PG_U
) && tpte
& PG_V
) {
flags
|= (tpte
>> PG_M_SHIFT
) &
* pv_unlink is a helper function for pmap_remove.
* It takes a pointer to the pv_table head for some physical address
* and removes the appropriate (pmap, va) entry.
* Once the entry is removed, if the pv_table head has the cache
* inhibit bit set, see if we can turn that off; if so, walk the
* pvlist and turn off PG_NC in each PTE. (The pvlist is by
* definition nonempty, since it must have at least two elements
* in it to have PV_NC set, and we only remove one here.)
register struct pvlist
*pv
;
register struct pmap
*pm
;
register struct pvlist
*npv
;
* First entry is special (sigh).
if (pv
->pv_pmap
== pm
&& pv
->pv_va
== va
) {
pmap_stats
.ps_unlink_pvfirst
++;
pv
->pv_next
= npv
->pv_next
;
pv
->pv_pmap
= npv
->pv_pmap
;
free((caddr_t
)npv
, M_VMPVENT
);
register struct pvlist
*prev
;
for (prev
= pv
;; prev
= npv
, npv
= npv
->pv_next
) {
pmap_stats
.ps_unlink_pvsearch
++;
if (npv
->pv_pmap
== pm
&& npv
->pv_va
== va
)
prev
->pv_next
= npv
->pv_next
;
free((caddr_t
)npv
, M_VMPVENT
);
if (pv
->pv_flags
& PV_NC
) {
* Not cached: check to see if we can fix that now.
for (npv
= pv
->pv_next
; npv
!= NULL
; npv
= npv
->pv_next
)
if (BADALIAS(va
, npv
->pv_va
))
pv_changepte(pv
, 0, PG_NC
);
* pv_link is the inverse of pv_unlink, and is used in pmap_enter.
* It returns PG_NC if the (new) pvlist says that the address cannot
register struct pvlist
*pv
;
register struct pmap
*pm
;
register struct pvlist
*npv
;
if (pv
->pv_pmap
== NULL
) {
/* no pvlist entries yet */
pmap_stats
.ps_enter_firstpv
++;
* Before entering the new mapping, see if
* it will cause old mappings to become aliased
* and thus need to be `discached'.
pmap_stats
.ps_enter_secondpv
++;
if (pv
->pv_flags
& PV_NC
) {
/* already uncached, just stay that way */
/* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */
for (npv
= pv
; npv
!= NULL
; npv
= npv
->pv_next
) {
if (BADALIAS(va
, npv
->pv_va
)) {
pv_changepte(pv
, ret
= PG_NC
, 0);
npv
= (struct pvlist
*)malloc(sizeof *npv
, M_VMPVENT
, M_WAITOK
);
npv
->pv_next
= pv
->pv_next
;
* Walk the given list and flush the cache for each (MI) page that is
* potentially in the cache.
register struct pvlist
*pv
;
register struct pmap
*pm
;
write_user_windows(); /* paranoia? */
s
= splpmap(); /* XXX extreme paranoia */
if ((pm
= pv
->pv_pmap
) != NULL
) {
setcontext(pm
->pm_ctxnum
);
cache_flush_page(pv
->pv_va
);
/*----------------------------------------------------------------*/
* Bootstrap the system enough to run with VM enabled.
* nmmu is the number of mmu entries (``PMEGs'');
* nctx is the number of contexts.
pmap_bootstrap(nmmu
, nctx
)
register union ctxinfo
*ci
;
register struct mmuentry
*me
;
register int i
, j
, n
, z
, vs
;
register void (*rom_setmap
)(int ctx
, caddr_t va
, int pmeg
);
extern caddr_t
reserve_dumppages(caddr_t
);
* Last segment is the `invalid' one (one PMEG of pte's with !pg_v).
* It will never be used for anything else.
* Preserve the monitor ROM's reserved VM region, so that
* we can use L1-A or the monitor's debugger. As a side
* effect we map the ROM's reserved VM into all contexts
* (otherwise L1-A crashes the machine!).
nmmu
= mmu_reservemon(nmmu
);
* Allocate and clear mmu entry and context structures.
mmuentry
= me
= (struct mmuentry
*)p
;
ctxinfo
= ci
= (union ctxinfo
*)p
;
* Set up the `constants' for the call to vm_init()
* in main(). All pages beginning at p (rounded up to
* the next whole page) and continuing through the number
* of available pages are free, but they start at a higher
* virtual address. This gives us two mappable MD pages
* for pmap_zero_page and pmap_copy_page, and one MI page
* for /dev/mem, all with no associated physical memory.
p
= (caddr_t
)(((u_int
)p
+ NBPG
- 1) & ~PGOFSET
);
avail_start
= (int)p
- KERNBASE
;
avail_end
= init_translations() << PGSHIFT
;
p
= reserve_dumppages(p
);
virtual_avail
= (vm_offset_t
)p
;
virtual_end
= VM_MAX_KERNEL_ADDRESS
;
p
= (caddr_t
)i
; /* retract to first free phys */
* Intialize the kernel pmap.
register struct kpmap
*k
= &kernel_pmap_store
;
/* kernel_pmap = (struct pmap *)k; */
simple_lock_init(&k
->pm_lock
);
k
->pm_mmuback
= &k
->pm_mmuforw
;
k
->pm_segmap
= &k
->pm_rsegmap
[-NUSEG
];
k
->pm_pte
= &k
->pm_rpte
[-NUSEG
];
k
->pm_npte
= &k
->pm_rnpte
[-NUSEG
];
for (i
= NKSEG
; --i
>= 0;)
k
->pm_rsegmap
[i
] = seginval
;
* All contexts are free except the kernel's.
* XXX sun4c could use context 0 for users?
ci
->c_pmap
= kernel_pmap
;
for (i
= 1; i
< ncontext
; i
++) {
/* me_freelist = NULL; */ /* already NULL */
* Init mmu entries that map the kernel physical addresses.
* If the page bits in p are 0, we filled the last segment
* exactly (now how did that happen?); if not, it is
* the last page filled in the last segment.
* All the other MMU entries are free.
* THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE
z
= ((((u_int
)p
+ NBPSG
- 1) & ~SGOFSET
) - KERNBASE
) >> SGSHIFT
;
p
= (caddr_t
)KERNBASE
; /* first va */
vs
= VA_VSEG(KERNBASE
); /* first virtual segment */
rom_setmap
= promvec
->pv_setctxt
;
* Distribute each kernel segment into all contexts.
* This is done through the monitor ROM, rather than
* directly here: if we do a setcontext we will fault,
* as we are not (yet) mapped in any other context.
for (j
= 1; j
< nctx
; j
++)
/* set up the mmu entry */
insque(me
, me_locked
.mh_prev
);
/* me->me_pmforw = NULL; */
me
->me_pmback
= kernel_pmap
->pm_mmuback
;
*kernel_pmap
->pm_mmuback
= me
;
kernel_pmap
->pm_mmuback
= &me
->me_pmforw
;
me
->me_pmap
= kernel_pmap
;
kernel_pmap
->pm_segmap
[vs
] = i
;
n
= ++i
< z
? NPTESG
: lastpage
;
kernel_pmap
->pm_npte
[vs
] = n
;
* Unmap the pages, if any, that are not part of
for (p
+= n
* NBPG
; j
< NPTESG
; j
++, p
+= NBPG
)
for (; i
< nmmu
; i
++, me
++) {
me
->me_next
= me_freelist
;
/* me->me_pmap = NULL; */
* write protect & encache kernel text;
* set red zone at kernel base; enable cache on message buffer.
extern char etext
[], trapbase
[];
register int mask
= ~PG_NC
; /* XXX chgkprot is busted */
register int mask
= ~(PG_W
| PG_NC
);
for (p
= trapbase
; p
< etext
; p
+= NBPG
)
setpte(p
, getpte(p
) & mask
);
setpte(p
, getpte(p
) & ~PG_NC
);
* Grab physical memory list (for /dev/mem).
npmemarr
= makememarr(pmemarr
, MA_SIZE
, MEMARR_TOTALPHYS
);
* Bootstrap memory allocator. This function allows for early dynamic
* memory allocation until the virtual memory system has been bootstrapped.
* After that point, either kmem_alloc or malloc should be used. This
* function works by stealing pages from the (to be) managed page pool,
* stealing virtual address space, then mapping the pages and zeroing them.
* It should be used from pmap_bootstrap till vm_page_startup, afterwards
* it cannot be used, and will generate a panic if tried. Note that this
* memory will never be freed, and in essence it is wired down.
pmap_bootstrap_alloc(size
)
extern int vm_page_startup_initialized
;
if (vm_page_startup_initialized
)
panic("pmap_bootstrap_alloc: called after startup initialized");
mem
= (void *)virtual_avail
;
virtual_avail
= pmap_map(virtual_avail
, avail_start
,
avail_start
+ size
, VM_PROT_READ
|VM_PROT_WRITE
);
bzero((void *)mem
, size
);
* Initialize the pmap module.
pmap_init(phys_start
, phys_end
)
register vm_offset_t phys_start
, phys_end
;
panic("pmap_init: CLSIZE!=1");
* Allocate and clear memory for the pv_table.
s
= sizeof(struct pvlist
) * atop(phys_end
- phys_start
);
pv_table
= (struct pvlist
*)kmem_alloc(kernel_map
, s
);
bzero((caddr_t
)pv_table
, s
);
vm_first_phys
= phys_start
;
vm_num_phys
= phys_end
- phys_start
;
* Map physical addresses into kernel VM.
pmap_map(va
, pa
, endpa
, prot
)
register vm_offset_t va
, pa
, endpa
;
register int pgsize
= PAGE_SIZE
;
pmap_enter(kernel_pmap
, va
, pa
, prot
, 1);
* Create and return a physical map.
* If size is nonzero, the map is useless. (ick)
register struct pmap
*pm
;
pm
= (struct pmap
*)malloc(sizeof *pm
, M_VMPMAP
, M_WAITOK
);
if (pmapdebug
& PDB_CREATE
)
printf("pmap_create: created %x\n", pm
);
bzero((caddr_t
)pm
, sizeof *pm
);
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
register struct pmap
*pm
;
if (pmapdebug
& PDB_CREATE
)
printf("pmap_pinit(%x)\n", pm
);
simple_lock_init(&pm
->pm_lock
);
/* pm->pm_mmuforw = NULL; */
pm
->pm_mmuback
= &pm
->pm_mmuforw
;
pm
->pm_segmap
= pm
->pm_rsegmap
;
pm
->pm_pte
= pm
->pm_rpte
;
pm
->pm_npte
= pm
->pm_rnpte
;
for (i
= NUSEG
; --i
>= 0;)
pm
->pm_rsegmap
[i
] = seginval
;
/* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */
/* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */
* Retire the given pmap from service.
* Should only be called if the map contains no valid mappings.
register struct pmap
*pm
;
if (pmapdebug
& PDB_DESTROY
)
printf("pmap_destroy(%x)\n", pm
);
simple_lock(&pm
->pm_lock
);
count
= --pm
->pm_refcount
;
simple_unlock(&pm
->pm_lock
);
free((caddr_t
)pm
, M_VMPMAP
);
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
register struct pmap
*pm
;
register union ctxinfo
*c
;
register int s
= splpmap(); /* paranoia */
if (pmapdebug
& PDB_DESTROY
)
printf("pmap_release(%x)\n", pm
);
panic("pmap_release mmuforw");
if ((c
= pm
->pm_ctx
) != NULL
) {
panic("pmap_release: releasing kernel");
* Add a reference to the given pmap.
simple_lock(&pm
->pm_lock
);
simple_unlock(&pm
->pm_lock
);
static int pmap_rmk(struct pmap
*, vm_offset_t
, vm_offset_t
, int, int, int);
static int pmap_rmu(struct pmap
*, vm_offset_t
, vm_offset_t
, int, int, int);
* Remove the given range of mapping entries.
* The starting and ending addresses are already rounded to pages.
* Sheer lunacy: pmap_remove is often asked to remove nonexistent
pmap_remove(pm
, va
, endva
)
register struct pmap
*pm
;
register vm_offset_t va
, endva
;
register vm_offset_t nva
;
register int vseg
, nleft
, s
, ctx
;
register int (*rm
)(struct pmap
*, vm_offset_t
, vm_offset_t
,
if (pmapdebug
& PDB_REMOVE
)
printf("pmap_remove(%x, %x, %x)\n", pm
, va
, endva
);
* Removing from kernel address space.
* Removing from user address space.
s
= splpmap(); /* XXX conservative */
simple_lock(&pm
->pm_lock
);
for (; va
< endva
; va
= nva
) {
/* do one virtual segment at a time */
if (nva
== 0 || nva
> endva
)
if ((nleft
= pm
->pm_npte
[vseg
]) != 0)
pm
->pm_npte
[vseg
] = (*rm
)(pm
, va
, nva
,
vseg
, nleft
, pm
->pm_segmap
[vseg
]);
simple_unlock(&pm
->pm_lock
);
/* counters, one per possible length */
int rmk_vlen
[NPTESG
+1]; /* virtual length per rmk() call */
int rmk_npg
[NPTESG
+1]; /* n valid pages per rmk() call */
int rmk_vlendiff
; /* # times npg != vlen */
* The following magic number was chosen because:
* 1. It is the same amount of work to cache_flush_page 4 pages
* as to cache_flush_segment 1 segment (so at 4 the cost of
* 2. Flushing extra pages is bad (causes cache not to work).
* 3. The current code, which malloc()s 5 pages for each process
* for a user vmspace/pmap, almost never touches all 5 of those
#define PMAP_RMK_MAGIC 5 /* if > magic, use cache_flush_segment */
* Remove a range contained within a single segment.
* These are egregiously complicated routines.
/* remove from kernel, return new nleft */
pmap_rmk(pm
, va
, endva
, vseg
, nleft
, pmeg
)
register struct pmap
*pm
;
register vm_offset_t va
, endva
;
register int vseg
, nleft
, pmeg
;
register int i
, tpte
, perpage
, npg
;
register struct pvlist
*pv
;
panic("pmap_rmk: not loaded");
panic("pmap_rmk: lost context");
/* decide how to flush cache */
npg
= (endva
- va
) >> PGSHIFT
;
if (npg
> PMAP_RMK_MAGIC
) {
/* flush the whole segment */
cache_flush_segment(vseg
);
/* flush each page individually; some never need flushing */
if ((tpte
& PG_V
) == 0) {
/* if cacheable, flush page as needed */
if ((tpte
& PG_NC
) == 0) {
if ((tpte
& PG_TYPE
) == PG_OBMEM
) {
i
= ptoa(HWTOSW(tpte
& PG_PFNUM
));
pv
->pv_flags
|= MR(tpte
);
* If the segment is all gone, remove it from everyone and
va
= VSTOVA(vseg
); /* retract */
for (i
= ncontext
; --i
> 0;) {
/* as before but for pmap_rmu */
int rmu_vlen
[NPTESG
+1]; /* virtual length per rmu() call */
int rmu_npg
[NPTESG
+1]; /* n valid pages per rmu() call */
int rmu_vlendiff
; /* # times npg != vlen */
int rmu_noflush
; /* # times rmu does not need to flush at all */
* Just like pmap_rmk_magic, but we have a different threshold.
* Note that this may well deserve further tuning work.
#define PMAP_RMU_MAGIC 4 /* if > magic, use cache_flush_segment */
pmap_rmu(pm
, va
, endva
, vseg
, nleft
, pmeg
)
register struct pmap
*pm
;
register vm_offset_t va
, endva
;
register int vseg
, nleft
, pmeg
;
register int *pte0
, i
, pteva
, tpte
, perpage
, npg
;
register struct pvlist
*pv
;
register int doflush
, nvalid
;
register int *pte
= pte0
+ VA_VPG(va
);
* PTEs are not in MMU. Just invalidate software copies.
for (; va
< endva
; pte
++, va
+= PAGE_SIZE
) {
if ((tpte
& PG_V
) == 0) {
/* nothing to remove (braindead VM layer) */
if ((tpte
& PG_TYPE
) == PG_OBMEM
) {
i
= ptoa(HWTOSW(tpte
& PG_PFNUM
));
pv_unlink(pvhead(i
), pm
, va
);
free((caddr_t
)pte0
, M_VMPMAP
);
* PTEs are in MMU. Invalidate in hardware, update ref &
* mod bits, and flush cache if required.
/* process has a context, must flush cache */
npg
= (endva
- va
) >> PGSHIFT
;
setcontext(pm
->pm_ctxnum
);
if (npg
> PMAP_RMU_MAGIC
) {
perpage
= 0; /* flush the whole segment */
cache_flush_segment(vseg
);
/* no context, use context 0; cache flush unnecessary */
/* XXX use per-cpu pteva? */
pteva
= VA_VPG(va
) * NBPG
;
for (; va
< endva
; pteva
+= PAGE_SIZE
, va
+= PAGE_SIZE
) {
/* if cacheable, flush page as needed */
if (doflush
&& (tpte
& PG_NC
) == 0) {
if ((tpte
& PG_TYPE
) == PG_OBMEM
) {
i
= ptoa(HWTOSW(tpte
& PG_PFNUM
));
pv
->pv_flags
|= MR(tpte
);
* If the segment is all gone, and the context is loaded, give
if (nleft
== 0 && pm
->pm_ctx
!= NULL
) {
va
= VSTOVA(vseg
); /* retract */
free((caddr_t
)pte0
, M_VMPMAP
);
* Lower (make more strict) the protection on the specified
* There are only two cases: either the protection is going to 0
* (in which case we do the dirty work here), or it is going from
* to read-only (in which case pv_changepte does the trick).
pmap_page_protect(pa
, prot
)
register struct pvlist
*pv
, *pv0
, *npv
;
register struct pmap
*pm
;
register int va
, vseg
, pteva
, tpte
;
register int flags
, nleft
, i
, pmeg
, s
, ctx
, doflush
;
if ((pmapdebug
& PDB_CHANGEPROT
) ||
(pmapdebug
& PDB_REMOVE
&& prot
== VM_PROT_NONE
))
printf("pmap_page_protect(%x, %x)\n", pa
, prot
);
* Skip unmanaged pages, or operations that do not take
if (!managed(pa
) || prot
& VM_PROT_WRITE
)
write_user_windows(); /* paranoia */
if (prot
& VM_PROT_READ
) {
pv_changepte(pvhead(pa
), 0, PG_W
);
* Remove all access to all people talking to this page.
* Walk down PV list, removing all mappings.
* The logic is much like that for pmap_remove,
* but we know we are removing exactly one page.
if ((pm
= pv
->pv_pmap
) == NULL
) {
flags
= pv
->pv_flags
& ~PV_NC
;
for (;; pm
= pv
->pv_pmap
) {
if ((nleft
= pm
->pm_npte
[vseg
]) == 0)
panic("pmap_remove_all: empty vseg");
pm
->pm_npte
[vseg
] = nleft
;
pmeg
= pm
->pm_segmap
[vseg
];
free((caddr_t
)pte
, M_VMPMAP
);
setcontext(pm
->pm_ctxnum
);
doflush
= vactype
!= VAC_NONE
;
/* XXX use per-cpu pteva? */
pteva
= VA_VPG(va
) * NBPG
;
panic("pmap_page_protect !PG_V 1");
panic("pmap_page_protect !PG_V 2");
for (i
= ncontext
; --i
> 0;) {
free((caddr_t
)pte
, M_VMPMAP
);
free((caddr_t
)pv
, M_VMPVENT
);
* Lower (make more strict) the protection on the specified
* There are only two cases: either the protection is going to 0
* (in which case we call pmap_remove to do the dirty work), or
* it is going from read/write to read-only. The latter is
pmap_protect(pm
, sva
, eva
, prot
)
register struct pmap
*pm
;
register int va
, nva
, vseg
, pteva
, pmeg
;
if (pm
== NULL
|| prot
& VM_PROT_WRITE
)
if ((prot
& VM_PROT_READ
) == 0) {
pmap_remove(pm
, sva
, eva
);
simple_lock(&pm
->pm_lock
);
for (va
= sva
; va
< eva
;) {
if (nva
== 0) panic("pmap_protect: last segment"); /* cannot happen */
if (pm
->pm_npte
[vseg
] == 0) {
pmeg
= pm
->pm_segmap
[vseg
];
register int *pte
= &pm
->pm_pte
[vseg
][VA_VPG(va
)];
/* not in MMU; just clear PG_W from core copies */
for (; va
< nva
; va
+= NBPG
)
/* in MMU: take away write bits from MMU PTEs */
* Flush cache so that any existing cache
* tags are updated. This is really only
* needed for PTEs that lose PG_W.
setcontext(pm
->pm_ctxnum
);
for (; va
< nva
; va
+= NBPG
) {
pmap_stats
.ps_npg_prot_all
++;
pmap_stats
.ps_npg_prot_actual
++;
setpte(va
, tpte
& ~PG_W
);
* No context, hence not cached;
/* XXX use per-cpu pteva? */
pteva
= VA_VPG(va
) * NBPG
;
for (; va
< nva
; pteva
+= NBPG
, va
+= NBPG
)
setpte(pteva
, getpte(pteva
) & ~PG_W
);
simple_unlock(&pm
->pm_lock
);
* Change the protection and/or wired status of the given (MI) virtual page.
* XXX: should have separate function (or flag) telling whether only wiring
pmap_changeprot(pm
, va
, prot
, wired
)
register struct pmap
*pm
;
register int vseg
, tpte
, newprot
, pmeg
, ctx
, i
, s
;
if (pmapdebug
& PDB_CHANGEPROT
)
printf("pmap_changeprot(%x, %x, %x, %x)\n",
write_user_windows(); /* paranoia */
newprot
= prot
& VM_PROT_WRITE
? PG_S
|PG_W
: PG_S
;
newprot
= prot
& VM_PROT_WRITE
? PG_W
: 0;
s
= splpmap(); /* conservative */
pmap_stats
.ps_changeprots
++;
/* update PTEs in software or hardware */
if ((pmeg
= pm
->pm_segmap
[vseg
]) == seginval
) {
register int *pte
= &pm
->pm_pte
[vseg
][VA_VPG(va
)];
if ((*pte
& PG_PROT
) == newprot
)
*pte
= (*pte
& ~PG_PROT
) | newprot
;
/* use current context; flush writeback cache */
setcontext(pm
->pm_ctxnum
);
if ((tpte
& PG_PROT
) == newprot
)
if (vactype
== VAC_WRITEBACK
&&
(tpte
& (PG_W
| PG_NC
)) == PG_W
)
cache_flush_page((int)va
);
/* XXX use per-cpu va? */
if ((tpte
& PG_PROT
) == newprot
)
tpte
= (tpte
& ~PG_PROT
) | newprot
;
/* only wiring changed, and we ignore wiring */
pmap_stats
.ps_useless_changeprots
++;
* Insert (MI) physical page pa at virtual address va in the given pmap.
* NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary.
* If pa is not in the `managed' range it will not be `bank mapped'.
* This works during bootstrap only because the first 4MB happens to
* There may already be something else there, or we might just be
* changing protections and/or wiring on an existing mapping.
* XXX should have different entry points for changing!
pmap_enter(pm
, va
, pa
, prot
, wired
)
register struct pmap
*pm
;
register struct pvlist
*pv
;
register int pteproto
, ctx
;
if (pmapdebug
& PDB_ENTER
)
printf("pmap_enter(%x, %x, %x, %x, %x)\n",
pm
, va
, pa
, prot
, wired
);
pteproto
= PG_V
| ((pa
& PMAP_TNC
) << PG_TNC_SHIFT
);
* Set up prototype for new PTE. Cannot set PG_NC from PV_NC yet
* since the pvlist no-cache bit might change as a result of the
pteproto
|= SWTOHW(atop(pa
));
pteproto
|= atop(pa
) & PG_PFNUM
;
if (prot
& VM_PROT_WRITE
)
pmap_enk(pm
, va
, prot
, wired
, pv
, pteproto
| PG_S
);
pmap_enu(pm
, va
, prot
, wired
, pv
, pteproto
);
/* enter new (or change existing) kernel mapping */
pmap_enk(pm
, va
, prot
, wired
, pv
, pteproto
)
register struct pmap
*pm
;
register struct pvlist
*pv
;
register int vseg
, tpte
, pmeg
, i
, s
;
s
= splpmap(); /* XXX way too conservative */
if (pm
->pm_segmap
[vseg
] != seginval
&&
(tpte
= getpte(va
)) & PG_V
) {
register int addr
= tpte
& PG_PFNUM
;
if (addr
== (pteproto
& PG_PFNUM
)) {
/* just changing protection and/or wiring */
pmap_changeprot(pm
, va
, prot
, wired
);
/*printf("pmap_enk: changing existing va=>pa entry\n");*/
* Switcheroo: changing pa for this va.
* If old pa was managed, remove from pvlist.
* If old page was cached, flush cache.
addr
= ptoa(HWTOSW(addr
));
pv_unlink(pvhead(addr
), pm
, va
);
cache_flush_page((int)va
);
* If the new mapping is for a managed PA, enter into pvlist.
* Note that the mapping for a malloc page will always be
* unique (hence will never cause a second call to malloc).
pteproto
|= pv_link(pv
, pm
, va
);
pmeg
= pm
->pm_segmap
[vseg
];
* Allocate an MMU entry now (on locked list),
* and map it into every context. Set all its
* PTEs invalid (we will then overwrite one, but
* this is more efficient than looping twice).
if (pm
->pm_ctx
== NULL
|| pm
->pm_ctxnum
!= 0)
panic("pmap_enk: kern seg but no kern ctx");
pmeg
= me_alloc(&me_locked
, pm
, vseg
)->me_pmeg
;
pm
->pm_segmap
[vseg
] = pmeg
;
/* set all PTEs to invalid, then overwrite one PTE below */
tva
= VA_ROUNDDOWNTOSEG(va
);
/* ptes kept in hardware only */
/* enter new (or change existing) user mapping */
pmap_enu(pm
, va
, prot
, wired
, pv
, pteproto
)
register struct pmap
*pm
;
register struct pvlist
*pv
;
register int vseg
, *pte
, tpte
, pmeg
, i
, s
, doflush
;
write_user_windows(); /* XXX conservative */
s
= splpmap(); /* XXX conservative */
* If there is no space in which the PTEs can be written
* while they are not in the hardware, this must be a new
* virtual segment. Get PTE space and count the segment.
* TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE
/* definitely a new mapping */
register int size
= NPTESG
* sizeof *pte
;
pte
= (int *)malloc((u_long
)size
, M_VMPMAP
, M_WAITOK
);
if (pm
->pm_pte
[vseg
] != NULL
) {
printf("pmap_enter: pte filled during sleep\n"); /* can this happen? */
free((caddr_t
)pte
, M_VMPMAP
);
if (pm
->pm_segmap
[vseg
] != seginval
)
panic("pmap_enter: new ptes, but not seginval");
bzero((caddr_t
)pte
, size
);
/* might be a change: fetch old pte */
if ((pmeg
= pm
->pm_segmap
[vseg
]) == seginval
)
tpte
= pte
[VA_VPG(va
)]; /* software pte */
if (pm
->pm_ctx
) { /* hardware pte */
setcontext(pm
->pm_ctxnum
);
/* XXX use per-cpu pteva? */
tpte
= getpte(VA_VPG(va
) * NBPG
);
register int addr
= tpte
& PG_PFNUM
;
if (addr
== (pteproto
& PG_PFNUM
)) {
/* just changing prot and/or wiring */
/* caller should call this directly: */
pmap_changeprot(pm
, va
, prot
, wired
);
* Switcheroo: changing pa for this va.
* If old pa was managed, remove from pvlist.
* If old page was cached, flush cache.
/*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n",
curproc->p_comm, curproc->p_pid, va);*/
addr
= ptoa(HWTOSW(addr
));
pv_unlink(pvhead(addr
), pm
, va
);
doflush
&& (tpte
& PG_NC
) == 0)
cache_flush_page((int)va
);
pteproto
|= pv_link(pv
, pm
, va
);
* Update hardware or software PTEs (whichever are active).
if ((pmeg
= pm
->pm_segmap
[vseg
]) != seginval
) {
/* ptes are in hardare */
setcontext(pm
->pm_ctxnum
);
/* XXX use per-cpu pteva? */
/* update software copy */
* Change the wiring attribute for a map/virtual-address pair.
pmap_change_wiring(pm
, va
, wired
)
pmap_stats
.ps_useless_changewire
++;
* Extract the physical page address associated
* with the given map/virtual_address pair.
* GRR, the vm code knows; we should not have to do this!
register struct pmap
*pm
;
printf("pmap_extract: null pmap\n");
if (pm
->pm_segmap
[vseg
] != seginval
) {
register int ctx
= getcontext();
setcontext(pm
->pm_ctxnum
);
tpte
= getpte(VA_VPG(va
) * NBPG
);
register int *pte
= pm
->pm_pte
[vseg
];
printf("pmap_extract: invalid vseg\n");
if ((tpte
& PG_V
) == 0) {
printf("pmap_extract: invalid pte\n");
return ((tpte
<< PGSHIFT
) | (va
& PGOFSET
));
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
* This routine is only advisory and need not do anything.
pmap_copy(dst_pmap
, src_pmap
, dst_addr
, len
, src_addr
)
struct pmap
*dst_pmap
, *src_pmap
;
* Require that all active physical maps contain no
* incorrect entries NOW. [This update includes
* forcing updates of any address map caching.]
* Garbage collects the physical map system for
* pages which are no longer used.
* Success need not be guaranteed -- that is, there
* may well be pages which are not referenced, but
* others may be collected.
* Called by the pageout daemon when pages are scarce.
* Clear the modify bit for the given physical page.
register struct pvlist
*pv
;
* Tell whether the given physical page has been modified.
register struct pvlist
*pv
;
if (pv
->pv_flags
& PV_MOD
|| pv_syncflags(pv
) & PV_MOD
)
* Clear the reference bit for the given physical page.
register struct pvlist
*pv
;
* Tell whether the given physical page has been referenced.
register struct pvlist
*pv
;
if (pv
->pv_flags
& PV_REF
|| pv_syncflags(pv
) & PV_REF
)
* Make the specified pages (by pmap, offset) pageable (or not) as requested.
* A page which is not pageable may not take a fault; therefore, its page
* table entry must remain valid for the duration (or at least, the trap
* handler must not call vm_fault).
* This routine is merely advisory; pmap_enter will specify that these pages
* are to be wired down (or not) as appropriate.
pmap_pageable(pm
, start
, end
, pageable
)
* Fill the given MI physical page with zero bytes.
* We avoid stomping on the cache.
* XXX might be faster to use destination's context and allow cache to fill?
* The following might not be necessary since the page
* is being cleared because it is about to be allocated,
* i.e., is in use by no one.
pv_flushcache(pvhead(pa
));
pte
= PG_V
| PG_S
| PG_W
| PG_NC
| SWTOHW(atop(pa
));
pte
= PG_V
| PG_S
| PG_W
| PG_NC
| (atop(pa
) & PG_PFNUM
);
* Copy the given MI physical source page to its destination.
* We avoid stomping on the cache as above (with same `XXX' note).
* We must first flush any write-back cache for the source page.
* We go ahead and stomp on the kernel's virtual cache for the
* source page, since the cache can read memory MUCH faster than
register caddr_t sva
, dva
;
if (vactype
== VAC_WRITEBACK
)
pv_flushcache(pvhead(src
));
spte
= PG_V
| PG_S
| SWTOHW(atop(src
));
spte
= PG_V
| PG_S
| (atop(src
) & PG_PFNUM
);
/* similar `might not be necessary' comment applies */
pv_flushcache(pvhead(dst
));
dpte
= PG_V
| PG_S
| PG_W
| PG_NC
| SWTOHW(atop(dst
));
dpte
= PG_V
| PG_S
| PG_W
| PG_NC
| (atop(dst
) & PG_PFNUM
);
qcopy(sva
, dva
, NBPG
); /* loads cache, so we must ... */
cache_flush_page((int)sva
);
* Turn a cdevsw d_mmap value into a byte address for pmap_enter.
* XXX this should almost certainly be done differently, and
* elsewhere, or even not at all
* Turn off cache for a given (va, number of pages).
* We just assert PG_NC for each PTE; the addresses must reside
* in locked kernel space. A cache flush is also done.
for (; --npages
>= 0; va
+= NBPG
) {
panic("kvm_uncache !pg_v");
cache_flush_page((int)va
);
pmap_enter_hw(pm
, va
, pa
, prot
, wired
)
register struct pmap
*pm
;
register struct memarr
*ma
;
if (pa
>= MAXMEM
) /* ??? */
for (ma
= pmemarr
, n
= npmemarr
; --n
>= 0; ma
++) {
t
= (u_int
)pa
- ma
->addr
;
pa
= (HWTOSW(atop(pa
)) << PGSHIFT
) | (pa
& PGOFSET
);
if (pa
>= vm_first_phys
+ vm_num_phys
) /* ??? */
pmap_enter(pm
, va
, pa
, prot
, wired
);