* Copyright (c) 1991 Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* %sccs.include.redist.c%
* @(#)pmap.c 7.6 (Berkeley) %G%
* HP9000/300 series physical map management code.
* For 68020/68030 machines with HP, 68551, or 68030 MMUs
* (models 320,350,318,319,330,340,360,370,345,375)
* Don't even pay lip service to multiprocessor support.
* XXX will only work for PAGE_SIZE == NBPG (hppagesperpage == 1)
* right now because of the assumed one-to-one relationship of PT
* Manages physical address maps.
* In addition to hardware address maps, this
* module is called upon to provide software-use-only
* maps which may or may not be stored in the same
* form as hardware maps. These pseudo-maps are
* used to store intermediate results from copy
* operations to and from address spaces.
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
#include "../include/cpu.h"
* Allocate various and sundry SYSMAPs used in the days of old VM
* and not yet converted. XXX.
int kernel
; /* entering kernel mapping */
int user
; /* entering user mapping */
int ptpneeded
; /* needed to allocate a PT page */
int pwchange
; /* no mapping change, just wiring or protection */
int wchange
; /* no mapping change, just wiring */
int mchange
; /* was mapped but mapping to different page */
int managed
; /* a managed page */
int firstpv
; /* first mapping for this PA */
int secondpv
; /* second mapping for this PA */
int ci
; /* cache inhibited */
int unmanaged
; /* not a managed page */
int flushes
; /* cache flushes */
#define PDB_FOLLOW 0x0001
#define PDB_REMOVE 0x0008
#define PDB_CREATE 0x0010
#define PDB_PTPAGE 0x0020
#define PDB_COLLECT 0x0100
#define PDB_PROTECT 0x0200
#define PDB_SEGTAB 0x0400
#define PDB_PARANOIA 0x2000
#define PDB_WIRING 0x4000
#define PDB_PVDUMP 0x8000
extern vm_offset_t pager_sva
, pager_eva
;
* Get STEs and PTEs for user/kernel address space
#define pmap_ste(m, v) (&((m)->pm_stab[(vm_offset_t)(v) >> SG_ISHIFT]))
#define pmap_pte(m, v) (&((m)->pm_ptab[(vm_offset_t)(v) >> PG_SHIFT]))
#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME)
#define pmap_ste_v(pte) ((pte)->sg_v)
#define pmap_pte_w(pte) ((pte)->pg_w)
#define pmap_pte_ci(pte) ((pte)->pg_ci)
#define pmap_pte_m(pte) ((pte)->pg_m)
#define pmap_pte_u(pte) ((pte)->pg_u)
#define pmap_pte_v(pte) ((pte)->pg_v)
#define pmap_pte_set_w(pte, v) ((pte)->pg_w = (v))
#define pmap_pte_set_prot(pte, v) ((pte)->pg_prot = (v))
* Given a map and a machine independent protection code,
* convert to a vax protection code.
#define pte_prot(m, p) (protection_codes[p])
* Kernel page table page management.
struct kpt_page
*kpt_next
; /* link on either used or free list */
vm_offset_t kpt_va
; /* always valid kernel VA */
vm_offset_t kpt_pa
; /* PA of this page (for speed) */
struct kpt_page
*kpt_free_list
, *kpt_used_list
;
struct kpt_page
*kpt_pages
;
* Kernel segment/page table and page table map.
* The page table map gives us a level of indirection we need to dynamically
* expand the page table. It is essentially a copy of the segment table
* with PTEs instead of STEs. All are initialized in locore at boot time.
* Sysmap will initially contain VM_KERNEL_PT_PAGES pages of PTEs.
* Segtabzero is an empty segment table which all processes share til they
pt_entry_t
*Sysmap
, *Sysptmap
;
vm_size_t Sysptsize
= VM_KERNEL_PT_PAGES
+ 4 / NPTEPG
;
vm_size_t Sysptsize
= VM_KERNEL_PT_PAGES
;
struct pmap kernel_pmap_store
;
vm_offset_t avail_start
; /* PA of first available physical page */
vm_offset_t avail_end
; /* PA of last available physical page */
vm_size_t mem_size
; /* memory size in bytes */
vm_offset_t virtual_avail
; /* VA of first avail page (after kernel bss)*/
vm_offset_t virtual_end
; /* VA of last avail page (end of kernel AS) */
vm_offset_t vm_first_phys
; /* PA of first managed page */
vm_offset_t vm_last_phys
; /* PA just past last managed page */
int hppagesperpage
; /* PAGE_SIZE / HP_PAGE_SIZE */
boolean_t pmap_initialized
= FALSE
; /* Has pmap_init completed? */
int pmap_aliasmask
; /* seperation at which VA aliasing ok */
char *pmap_attributes
; /* reference and modify bits */
boolean_t
pmap_testbit();
void pmap_enter_ptpage();
* All those kernel PT submaps that BSD is so fond of
struct pte
*CMAP1
, *CMAP2
, *mmap
;
caddr_t CADDR1
, CADDR2
, vmmap
;
* Bootstrap the system enough to run with virtual memory.
* Map the kernel's code and data, and allocate the system page table.
* On the HP this is called after mapping has already been enabled
* and just syncs the pmap module with what has already been done.
* [We can't call it easily with mapping off since the kernel is not
* mapped with PA == VA, hence we would have to relocate every address
* from the linked base (virtual) address 0 to the actual (physical)
* address of 0xFFxxxxxx.]
pmap_bootstrap(firstaddr
, loadaddr
)
extern vm_offset_t maxmem
, physmem
;
avail_end
= maxmem
<< PGSHIFT
;
/* XXX: allow for msgbuf */
avail_end
-= hp300_round_page(sizeof(struct msgbuf
));
mem_size
= physmem
<< PGSHIFT
;
virtual_avail
= VM_MIN_KERNEL_ADDRESS
+ (firstaddr
- loadaddr
);
virtual_end
= VM_MAX_KERNEL_ADDRESS
;
hppagesperpage
= PAGE_SIZE
/ HP_PAGE_SIZE
;
* Determine VA aliasing distance if any
pmap_aliasmask
= 0x3fff; /* 16k */
pmap_aliasmask
= 0x7fff; /* 32k */
* Initialize protection array.
* The kernel's pmap is statically allocated so we don't
* have to use pmap_create, which is unlikely to work
* correctly at this part of the boot sequence.
kernel_pmap
= &kernel_pmap_store
;
* Kernel page/segment table allocated in locore,
* just initialize pointers.
kernel_pmap
->pm_stab
= Sysseg
;
kernel_pmap
->pm_ptab
= Sysmap
;
simple_lock_init(&kernel_pmap
->pm_lock
);
kernel_pmap
->pm_count
= 1;
* Allocate all the submaps we need
#define SYSMAP(c, p, v, n) \
v = (c)va; va += ((n)*HP_PAGE_SIZE); p = pte; pte += (n);
pte
= pmap_pte(kernel_pmap
, va
);
SYSMAP(caddr_t
,CMAP1
,CADDR1
,1 )
SYSMAP(caddr_t
,CMAP2
,CADDR2
,1 )
SYSMAP(caddr_t
,mmap
,vmmap
,1 )
SYSMAP(struct msgbuf
* ,msgbufmap
,msgbufp
,1 )
* Bootstrap memory allocator. This function allows for early dynamic
* memory allocation until the virtual memory system has been bootstrapped.
* After that point, either kmem_alloc or malloc should be used. This
* function works by stealing pages from the (to be) managed page pool,
* stealing virtual address space, then mapping the pages and zeroing them.
* It should be used from pmap_bootstrap till vm_page_startup, afterwards
* it cannot be used, and will generate a panic if tried. Note that this
* memory will never be freed, and in essence it is wired down.
pmap_bootstrap_alloc(size
) {
extern boolean_t vm_page_startup_initialized
;
if (vm_page_startup_initialized
)
panic("pmap_bootstrap_alloc: called after startup initialized");
virtual_avail
= pmap_map(virtual_avail
, avail_start
,
avail_start
+ size
, VM_PROT_READ
|VM_PROT_WRITE
);
blkclr ((caddr_t
) val
, size
);
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
* system needs to map virtual memory.
pmap_init(phys_start
, phys_end
)
vm_offset_t phys_start
, phys_end
;
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_init(%x, %x)\n", phys_start
, phys_end
);
* Now that kernel map has been allocated, we can mark as
* unavailable regions which we have mapped in locore.
addr
= (vm_offset_t
) intiobase
;
(void) vm_map_find(kernel_map
, NULL
, (vm_offset_t
) 0,
&addr
, hp300_ptob(IIOMAPSIZE
+EIOMAPSIZE
), FALSE
);
if (addr
!= (vm_offset_t
)intiobase
)
addr
= (vm_offset_t
) Sysmap
;
vm_object_reference(kernel_object
);
(void) vm_map_find(kernel_map
, kernel_object
, addr
,
&addr
, HP_MAX_PTSIZE
, FALSE
);
* If this fails it is probably because the static portion of
* the kernel page table isn't big enough and we overran the
* page table map. Need to adjust pmap_size() in hp300_init.c.
if (addr
!= (vm_offset_t
)Sysmap
)
addr
= (vm_offset_t
) kstack
;
vm_object_reference(kernel_object
);
(void) vm_map_find(kernel_map
, kernel_object
, addr
,
&addr
, hp300_ptob(UPAGES
), FALSE
);
if (addr
!= (vm_offset_t
)kstack
)
panic("pmap_init: bogons in the VM system!\n");
if (pmapdebug
& PDB_INIT
) {
printf("pmap_init: Sysseg %x, Sysmap %x, Sysptmap %x\n",
Sysseg
, Sysmap
, Sysptmap
);
printf(" pstart %x, pend %x, vstart %x, vend %x\n",
avail_start
, avail_end
, virtual_avail
, virtual_end
);
* Allocate memory for random pmap data structures. Includes the
* initial segment table, pv_head_table and pmap_attributes.
npg
= atop(phys_end
- phys_start
);
s
= (vm_size_t
) (HP_STSIZE
+ sizeof(struct pv_entry
) * npg
+ npg
);
addr
= (vm_offset_t
) kmem_alloc(kernel_map
, s
);
Segtabzero
= (st_entry_t
*) addr
;
pv_table
= (pv_entry_t
) addr
;
addr
+= sizeof(struct pv_entry
) * npg
;
pmap_attributes
= (char *) addr
;
if (pmapdebug
& PDB_INIT
)
printf("pmap_init: %x bytes (%x pgs): seg %x tbl %x attr %x\n",
s
, npg
, Segtabzero
, pv_table
, pmap_attributes
);
* Allocate physical memory for kernel PT pages and their management.
* We need 1 PT page per possible task plus some slop.
npg
= min(atop(HP_MAX_KPTSIZE
), maxproc
+16);
s
= ptoa(npg
) + round_page(npg
* sizeof(struct kpt_page
));
* Verify that space will be allocated in region for which
* we already have kernel PT pages.
rv
= vm_map_find(kernel_map
, NULL
, 0, &addr
, s
, TRUE
);
if (rv
!= KERN_SUCCESS
|| addr
+ s
>= (vm_offset_t
)Sysmap
)
panic("pmap_init: kernel PT too small");
vm_map_remove(kernel_map
, addr
, addr
+ s
);
* Now allocate the space and link the pages together to
* form the KPT free list.
addr
= (vm_offset_t
) kmem_alloc(kernel_map
, s
);
kpt_pages
= &((struct kpt_page
*)addr2
)[npg
];
kpt_free_list
= (struct kpt_page
*) 0;
(--kpt_pages
)->kpt_next
= kpt_free_list
;
kpt_free_list
= kpt_pages
;
kpt_pages
->kpt_va
= addr2
;
kpt_pages
->kpt_pa
= pmap_extract(kernel_pmap
, addr2
);
kpt_stats
.kpttotal
= atop(s
);
if (pmapdebug
& PDB_INIT
)
printf("pmap_init: KPT: %d pages from %x to %x\n",
atop(s
), addr
, addr
+ s
);
* Slightly modified version of kmem_suballoc() to get page table
s
= min(HP_PTMAXSIZE
, maxproc
*HP_MAX_PTSIZE
);
rv
= vm_map_find(kernel_map
, NULL
, 0, &addr
, s
, TRUE
);
panic("pmap_init: cannot allocate space for PT map");
pmap_reference(vm_map_pmap(kernel_map
));
pt_map
= vm_map_create(vm_map_pmap(kernel_map
), addr
, addr2
, TRUE
);
panic("pmap_init: cannot create pt_map");
rv
= vm_map_submap(kernel_map
, addr
, addr2
, pt_map
);
panic("pmap_init: cannot map range to pt_map");
if (pmapdebug
& PDB_INIT
)
printf("pmap_init: pt_map [%x - %x)\n", addr
, addr2
);
* Now it is safe to enable pv_table recording.
vm_first_phys
= phys_start
;
* Used to map a range of physical addresses into kernel
* For now, VM is already on, we only need to map the
pmap_map(virt
, start
, end
, prot
)
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_map(%x, %x, %x, %x)\n", virt
, start
, end
, prot
);
pmap_enter(kernel_pmap
, virt
, start
, prot
, FALSE
);
* Create and return a physical map.
* If the size specified for the map
* is zero, the map is an actual physical
* map, and may be referenced by the
* If the size specified is non-zero,
* the map will be used in software only, and
* is bounded by that size.
if (pmapdebug
& (PDB_FOLLOW
|PDB_CREATE
))
printf("pmap_create(%x)\n", size
);
* Software use map does not need a pmap
/* XXX: is it ok to wait here? */
pmap
= (pmap_t
) malloc(sizeof *pmap
, M_VMPMAP
, M_WAITOK
);
panic("pmap_create: cannot allocate a pmap");
bzero(pmap
, sizeof(*pmap
));
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
register struct pmap
*pmap
;
if (pmapdebug
& (PDB_FOLLOW
|PDB_CREATE
))
printf("pmap_pinit(%x)\n", pmap
);
* No need to allocate page table space yet but we do need a
* valid segment table. Initially, we point everyone at the
* "null" segment table. On the first pmap_enter, a real
* segment table will be allocated.
pmap
->pm_stab
= Segtabzero
;
pmap
->pm_stchanged
= TRUE
;
simple_lock_init(&pmap
->pm_lock
);
* Retire the given physical map from service.
* Should only be called if the map contains
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_destroy(%x)\n", pmap
);
simple_lock(&pmap
->pm_lock
);
count
= --pmap
->pm_count
;
simple_unlock(&pmap
->pm_lock
);
free((caddr_t
)pmap
, M_VMPMAP
);
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
* Should only be called if the map contains no valid mappings.
register struct pmap
*pmap
;
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_release(%x)\n", pmap
);
#ifdef notdef /* DIAGNOSTIC */
/* count would be 0 from pmap_destroy... */
simple_lock(&pmap
->pm_lock
);
panic("pmap_release count");
kmem_free_wakeup(pt_map
, (vm_offset_t
)pmap
->pm_ptab
,
if (pmap
->pm_stab
!= Segtabzero
)
kmem_free(kernel_map
, (vm_offset_t
)pmap
->pm_stab
, HP_STSIZE
);
* Add a reference to the specified pmap.
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_reference(%x)\n", pmap
);
simple_lock(&pmap
->pm_lock
);
simple_unlock(&pmap
->pm_lock
);
* Remove the given range of addresses from the specified map.
* It is assumed that the start and end are properly
* rounded to the page size.
pmap_remove(pmap
, sva
, eva
)
register vm_offset_t pa
, va
;
register pt_entry_t
*pte
;
register pv_entry_t pv
, npv
;
boolean_t firstpage
= TRUE
;
boolean_t flushcache
= FALSE
;
if (pmapdebug
& (PDB_FOLLOW
|PDB_REMOVE
|PDB_PROTECT
))
printf("pmap_remove(%x, %x, %x)\n", pmap
, sva
, eva
);
for (va
= sva
; va
< eva
; va
+= PAGE_SIZE
) {
* Weed out invalid mappings.
* Note: we assume that the segment table is always allocated.
if (!pmap_ste_v(pmap_ste(pmap
, va
))) {
/* XXX: avoid address wrap around */
if (va
>= hp300_trunc_seg((vm_offset_t
)-1))
va
= hp300_round_seg(va
+ PAGE_SIZE
) - PAGE_SIZE
;
pte
= pmap_pte(pmap
, va
);
* Invalidating a non-CI page, must flush external VAC
* unless it is a supervisor mapping and we have already
* flushed the supervisor side.
if (pmap_aliasmask
&& !pmap_pte_ci(pte
) &&
!(pmap
== kernel_pmap
&& firstpage
))
pmap
->pm_stats
.wired_count
--;
pmap
->pm_stats
.resident_count
--;
* XXX: should cluster them up and invalidate as many
if (pmapdebug
& PDB_REMOVE
)
printf("remove: invalidating %x ptes at %x\n",
* Flush VAC to ensure we get the correct state of any
* hardware maintained bits.
if (firstpage
&& pmap_aliasmask
) {
bits
|= *(int *)pte
& (PG_U
|PG_M
);
TBIS(va
+ ix
* HP_PAGE_SIZE
);
} while (++ix
!= hppagesperpage
);
* For user mappings decrement the wiring count on
* the PT page. We do this after the PTE has been
* invalidated because vm_map_pageable winds up in
* pmap_pageable which clears the modify bit for the
if (pmap
!= kernel_pmap
) {
pte
= pmap_pte(pmap
, va
);
vm_map_pageable(pt_map
, trunc_page(pte
),
round_page(pte
+1), TRUE
);
if (pmapdebug
& PDB_WIRING
)
pmap_check_wiring("remove", trunc_page(pte
));
* Remove from the PV table (raise IPL since we
* may be called at interrupt time).
if (pa
< vm_first_phys
|| pa
>= vm_last_phys
)
* If it is the first entry on the list, it is actually
* in the header and we must copy the following entry up
* to the header. Otherwise we must search the list for
* the entry. In either case we free the now unused entry.
if (pmap
== pv
->pv_pmap
&& va
== pv
->pv_va
) {
ste
= (int *)pv
->pv_ptste
;
free((caddr_t
)npv
, M_VMPVENT
);
for (npv
= pv
->pv_next
; npv
; npv
= npv
->pv_next
) {
if (pmap
== npv
->pv_pmap
&& va
== npv
->pv_va
)
panic("pmap_remove: PA not in pv_tab");
ste
= (int *)npv
->pv_ptste
;
pv
->pv_next
= npv
->pv_next
;
free((caddr_t
)npv
, M_VMPVENT
);
* If only one mapping left we no longer need to cache inhibit
pv
->pv_next
== NULL
&& (pv
->pv_flags
& PV_CI
)) {
if (pmapdebug
& PDB_CACHE
)
printf("remove: clearing CI for pa %x\n", pa
);
pmap_changebit(pa
, PG_CI
, FALSE
);
if ((pmapdebug
& (PDB_CACHE
|PDB_PVDUMP
)) ==
* If this was a PT page we must also remove the
* mapping from the associated segment table.
remove_stats
.ptinvalid
++;
if (pmapdebug
& (PDB_REMOVE
|PDB_PTPAGE
)) {
printf("remove: ste was %x@%x pte was %x@%x\n",
*(int *)&opte
, pmap_pte(pmap
, va
));
* If it was a user PT page, we decrement the
* reference count on the segment table as well,
* freeing it if it is now empty.
if (ptpmap
!= kernel_pmap
) {
if (pmapdebug
& (PDB_REMOVE
|PDB_SEGTAB
))
printf("remove: stab %x, refcnt %d\n",
if ((pmapdebug
& PDB_PARANOIA
) &&
ptpmap
->pm_stab
!= (st_entry_t
*)trunc_page(ste
))
panic("remove: bogus ste");
if (--(ptpmap
->pm_sref
) == 0) {
if (pmapdebug
&(PDB_REMOVE
|PDB_SEGTAB
))
printf("remove: free stab %x\n",
(vm_offset_t
)ptpmap
->pm_stab
,
ptpmap
->pm_stab
= Segtabzero
;
ptpmap
->pm_stchanged
= TRUE
;
* XXX may have changed segment table
* pointer for current process so
* update now to reload hardware.
if (ptpmap
== curproc
->p_vmspace
->vm_map
.pmap
)
(struct pcb
*)curproc
->p_addr
, 1);
if (ptpmap
== kernel_pmap
)
pv
->pv_flags
&= ~PV_PTPAGE
;
* Update saved attributes for managed page
pmap_attributes
[pa_index(pa
)] |= bits
;
if (pmapvacflush
& PVF_REMOVE
) {
if (pmapvacflush
& PVF_TOTAL
)
else if (pmap
== kernel_pmap
)
if (pmap
== kernel_pmap
) {
* Lower the permission for all mappings to a given page.
pmap_page_protect(pa
, prot
)
if ((pmapdebug
& (PDB_FOLLOW
|PDB_PROTECT
)) ||
prot
== VM_PROT_NONE
&& (pmapdebug
& PDB_REMOVE
))
printf("pmap_page_protect(%x, %x)\n", pa
, prot
);
if (pa
< vm_first_phys
|| pa
>= vm_last_phys
)
case VM_PROT_READ
|VM_PROT_EXECUTE
:
pmap_changebit(pa
, PG_RO
, TRUE
);
while (pv
->pv_pmap
!= NULL
) {
if (!pmap_ste_v(pmap_ste(pv
->pv_pmap
,pv
->pv_va
)) ||
pmap_pte_pa(pmap_pte(pv
->pv_pmap
,pv
->pv_va
)) != pa
)
panic("pmap_page_protect: bad mapping");
pmap_remove(pv
->pv_pmap
, pv
->pv_va
,
* Set the physical protection on the
* specified range of this map as requested.
pmap_protect(pmap
, sva
, eva
, prot
)
register pt_entry_t
*pte
;
boolean_t firstpage
= TRUE
;
if (pmapdebug
& (PDB_FOLLOW
|PDB_PROTECT
))
printf("pmap_protect(%x, %x, %x, %x)\n", pmap
, sva
, eva
, prot
);
if ((prot
& VM_PROT_READ
) == VM_PROT_NONE
) {
pmap_remove(pmap
, sva
, eva
);
if (prot
& VM_PROT_WRITE
)
pte
= pmap_pte(pmap
, sva
);
hpprot
= pte_prot(pmap
, prot
) == PG_RO
? 1 : 0;
for (va
= sva
; va
< eva
; va
+= PAGE_SIZE
) {
* Page table page is not allocated.
* Skip it, we don't want to force allocation
* of unnecessary PTE pages just to set the protection.
if (!pmap_ste_v(pmap_ste(pmap
, va
))) {
/* XXX: avoid address wrap around */
if (va
>= hp300_trunc_seg((vm_offset_t
)-1))
va
= hp300_round_seg(va
+ PAGE_SIZE
) - PAGE_SIZE
;
pte
= pmap_pte(pmap
, va
);
* Page not valid. Again, skip it.
* Should we do this? Or set protection anyway?
* Flush VAC to ensure we get correct state of HW bits
* so we don't clobber them.
if (firstpage
&& pmap_aliasmask
) {
/* clear VAC here if PG_RO? */
pmap_pte_set_prot(pte
++, hpprot
);
TBIS(va
+ ix
* HP_PAGE_SIZE
);
} while (++ix
!= hppagesperpage
);
if (hpprot
&& (pmapvacflush
& PVF_PROTECT
)) {
if (pmapvacflush
& PVF_TOTAL
)
else if (pmap
== kernel_pmap
)
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
* If specified, the page will be wired down, meaning
* that the related pte can not be reclaimed.
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
pmap_enter(pmap
, va
, pa
, prot
, wired
)
register pt_entry_t
*pte
;
boolean_t cacheable
= TRUE
;
boolean_t checkpv
= TRUE
;
if (pmapdebug
& (PDB_FOLLOW
|PDB_ENTER
))
printf("pmap_enter(%x, %x, %x, %x, %x)\n",
pmap
, va
, pa
, prot
, wired
);
* For user mapping, allocate kernel VM resources if necessary.
if (pmap
->pm_ptab
== NULL
)
pmap
->pm_ptab
= (pt_entry_t
*)
kmem_alloc_wait(pt_map
, HP_MAX_PTSIZE
);
* Segment table entry not valid, we need a new PT page
if (!pmap_ste_v(pmap_ste(pmap
, va
)))
pmap_enter_ptpage(pmap
, va
);
pte
= pmap_pte(pmap
, va
);
if (pmapdebug
& PDB_ENTER
)
printf("enter: pte %x, *pte %x\n", pte
, *(int *)pte
);
* Mapping has not changed, must be protection or wiring change.
* Wiring change, just update stats.
* We don't worry about wiring PT pages as they remain
* resident as long as there are valid mappings in them.
* Hence, if a user page is wired, the PT page will be also.
if (wired
&& !pmap_pte_w(pte
) || !wired
&& pmap_pte_w(pte
)) {
if (pmapdebug
& PDB_ENTER
)
printf("enter: wiring change -> %x\n", wired
);
pmap
->pm_stats
.wired_count
++;
pmap
->pm_stats
.wired_count
--;
* Retain cache inhibition status
* Mapping has changed, invalidate old range and fall through to
* handle validating new mapping.
if (pmapdebug
& PDB_ENTER
)
printf("enter: removing old mapping %x\n", va
);
pmap_remove(pmap
, va
, va
+ PAGE_SIZE
);
* If this is a new user mapping, increment the wiring count
* on this PT page. PT pages are wired down as long as there
* is a valid mapping in the page.
vm_map_pageable(pt_map
, trunc_page(pte
),
round_page(pte
+1), FALSE
);
* Enter on the PV list if part of our managed memory
* Note that we raise IPL while manipulating pv_table
* since pmap_enter can be called at interrupt time.
if (pa
>= vm_first_phys
&& pa
< vm_last_phys
) {
register pv_entry_t pv
, npv
;
if (pmapdebug
& PDB_ENTER
)
printf("enter: pv at %x: %x/%x/%x\n",
pv
, pv
->pv_va
, pv
->pv_pmap
, pv
->pv_next
);
* No entries yet, use header as the first entry
if (pv
->pv_pmap
== NULL
) {
* There is at least one other VA mapping this page.
* Place this entry after the header.
for (npv
= pv
; npv
; npv
= npv
->pv_next
)
if (pmap
== npv
->pv_pmap
&& va
== npv
->pv_va
)
panic("pmap_enter: already in pv_tab");
malloc(sizeof *npv
, M_VMPVENT
, M_NOWAIT
);
npv
->pv_next
= pv
->pv_next
;
* Since there is another logical mapping for the
* same page we may need to cache-inhibit the
* descriptors on those CPUs with external VACs.
* We don't need to CI if:
* - No two mappings belong to the same user pmaps.
* Since the cache is flushed on context switches
* there is no problem between user processes.
* - Mappings within a single pmap are a certain
* magic distance apart. VAs at these appropriate
* boundaries map to the same cache entries or
* otherwise don't conflict.
* To keep it simple, we only check for these special
* cases if there are only two mappings, otherwise we
* Note that there are no aliasing problems with the
* on-chip data-cache when the WA bit is set.
if (pv
->pv_flags
& PV_CI
) {
if (pmapdebug
& PDB_CACHE
)
printf("enter: pa %x already CI'ed\n",
checkpv
= cacheable
= FALSE
;
} else if (npv
->pv_next
||
pv
->pv_pmap
== kernel_pmap
) &&
((pv
->pv_va
& pmap_aliasmask
) !=
(va
& pmap_aliasmask
)))) {
if (pmapdebug
& PDB_CACHE
)
printf("enter: pa %x CI'ing all\n",
* Assumption: if it is not part of our managed memory
* then it must be device memory which may be volitile.
else if (pmap_initialized
) {
checkpv
= cacheable
= FALSE
;
pmap
->pm_stats
.resident_count
++;
pmap
->pm_stats
.wired_count
++;
* Flush VAC to ensure we get correct state of HW bits
* so we don't clobber them.
* Now validate mapping with desired protection/wiring.
* Assume uniform modified and referenced status for all
* HP pages in a MACH page.
npte
= (pa
& PG_FRAME
) | pte_prot(pmap
, prot
) | PG_V
;
npte
|= (*(int *)pte
& (PG_M
|PG_U
));
if (!checkpv
&& !cacheable
)
if (pmapdebug
& PDB_ENTER
)
printf("enter: new pte value %x\n", npte
);
} while (++ix
!= hppagesperpage
);
* The following is executed if we are entering a second
* (or greater) mapping for a physical page and the mappings
* may create an aliasing problem. In this case we must
* cache inhibit the descriptors involved and flush any
if (checkpv
&& !cacheable
) {
pmap_changebit(pa
, PG_CI
, TRUE
);
if ((pmapdebug
& (PDB_CACHE
|PDB_PVDUMP
)) ==
else if (pmapvacflush
& PVF_ENTER
) {
if (pmapvacflush
& PVF_TOTAL
)
else if (pmap
== kernel_pmap
)
if ((pmapdebug
& PDB_WIRING
) && pmap
!= kernel_pmap
) {
pmap_check_wiring("enter", trunc_page(pmap_pte(pmap
, va
)));
* Routine: pmap_change_wiring
* Function: Change the wiring attribute for a map/virtual-address
* The mapping must already exist in the pmap.
pmap_change_wiring(pmap
, va
, wired
)
register pt_entry_t
*pte
;
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_change_wiring(%x, %x, %x)\n", pmap
, va
, wired
);
pte
= pmap_pte(pmap
, va
);
* Page table page is not allocated.
* Should this ever happen? Ignore it for now,
* we don't want to force allocation of unnecessary PTE pages.
if (!pmap_ste_v(pmap_ste(pmap
, va
))) {
if (pmapdebug
& PDB_PARANOIA
)
printf("pmap_change_wiring: invalid STE for %x\n", va
);
* Page not valid. Should this ever happen?
* Just continue and change wiring anyway.
if (pmapdebug
& PDB_PARANOIA
)
printf("pmap_change_wiring: invalid PTE for %x\n", va
);
if (wired
&& !pmap_pte_w(pte
) || !wired
&& pmap_pte_w(pte
)) {
pmap
->pm_stats
.wired_count
++;
pmap
->pm_stats
.wired_count
--;
* Wiring is not a hardware characteristic so there is no need
pmap_pte_set_w(pte
++, wired
);
} while (++ix
!= hppagesperpage
);
* Extract the physical page address associated
* with the given map/virtual_address pair.
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_extract(%x, %x) -> ", pmap
, va
);
if (pmap
&& pmap_ste_v(pmap_ste(pmap
, va
)))
pa
= *(int *)pmap_pte(pmap
, va
);
pa
= (pa
& PG_FRAME
) | (va
& ~PG_FRAME
);
if (pmapdebug
& PDB_FOLLOW
)
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
* This routine is only advisory and need not do anything.
void pmap_copy(dst_pmap
, src_pmap
, dst_addr
, len
, src_addr
)
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_copy(%x, %x, %x, %x, %x)\n",
dst_pmap
, src_pmap
, dst_addr
, len
, src_addr
);
* Require that all active physical maps contain no
* incorrect entries NOW. [This update includes
* forcing updates of any address map caching.]
* Generally used to insure that a thread about
* to run will see a semantically correct world.
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_update()\n");
* Garbage collects the physical map system for
* pages which are no longer used.
* Success need not be guaranteed -- that is, there
* may well be pages which are not referenced, but
* others may be collected.
* Called by the pageout daemon when pages are scarce.
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_collect(%x)\n", pmap
);
kpt_stats
.collectscans
++;
for (pa
= vm_first_phys
; pa
< vm_last_phys
; pa
+= PAGE_SIZE
) {
register struct kpt_page
*kpt
, **pkpt
;
* Locate physical pages which are being used as kernel
if (pv
->pv_pmap
!= kernel_pmap
|| !(pv
->pv_flags
& PV_PTPAGE
))
if (pv
->pv_ptste
&& pv
->pv_ptpmap
== kernel_pmap
)
} while (pv
= pv
->pv_next
);
if (pv
->pv_va
< (vm_offset_t
)Sysmap
||
pv
->pv_va
>= (vm_offset_t
)Sysmap
+ HP_MAX_PTSIZE
)
printf("collect: kernel PT VA out of range\n");
pte
= (int *)(pv
->pv_va
+ HP_PAGE_SIZE
);
while (--pte
>= (int *)pv
->pv_va
&& *pte
== PG_NV
)
if (pte
>= (int *)pv
->pv_va
)
if (pmapdebug
& (PDB_PTPAGE
|PDB_COLLECT
)) {
printf("collect: freeing KPT page at %x (ste %x@%x)\n",
pv
->pv_va
, *(int *)pv
->pv_ptste
, pv
->pv_ptste
);
ste
= (int *)pv
->pv_ptste
;
* If all entries were invalid we can remove the page.
* We call pmap_remove to take care of invalidating ST
kpa
= pmap_extract(pmap
, pv
->pv_va
);
pmap_remove(pmap
, pv
->pv_va
, pv
->pv_va
+ HP_PAGE_SIZE
);
* Use the physical address to locate the original
* (kmem_alloc assigned) address for the page and put
* that page back on the free list.
for (pkpt
= &kpt_used_list
, kpt
= *pkpt
;
kpt
!= (struct kpt_page
*)0;
pkpt
= &kpt
->kpt_next
, kpt
= *pkpt
)
if (kpt
== (struct kpt_page
*)0)
panic("pmap_collect: lost a KPT page");
if (pmapdebug
& (PDB_PTPAGE
|PDB_COLLECT
))
printf("collect: %x (%x) to free list\n",
kpt
->kpt_next
= kpt_free_list
;
kpt_stats
.collectpages
++;
if (pmapdebug
& (PDB_PTPAGE
|PDB_COLLECT
))
printf("collect: kernel STE at %x still valid (%x)\n",
ste
= (int *)&Sysptmap
[(st_entry_t
*)ste
-pmap_ste(kernel_pmap
, 0)];
printf("collect: kernel PTmap at %x still valid (%x)\n",
pmap_activate(pmap
, pcbp
)
if (pmapdebug
& (PDB_FOLLOW
|PDB_SEGTAB
))
printf("pmap_activate(%x, %x)\n", pmap
, pcbp
);
PMAP_ACTIVATE(pmap
, pcbp
, pmap
== curproc
->p_vmspace
->vm_map
.pmap
);
* pmap_zero_page zeros the specified (machine independent)
* page by mapping the page into virtual memory and using
* bzero to clear its contents, one machine dependent page
register vm_offset_t phys
;
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_zero_page(%x)\n", phys
);
} while (++ix
!= hppagesperpage
);
* pmap_copy_page copies the specified (machine independent)
* page by mapping the page into virtual memory and using
* bcopy to copy the page, one machine dependent page at a
register vm_offset_t src
, dst
;
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_copy_page(%x, %x)\n", src
, dst
);
physcopyseg(src
++, dst
++);
} while (++ix
!= hppagesperpage
);
* Make the specified pages (by pmap, offset)
* pageable (or not) as requested.
* A page which is not pageable may not take
* a fault; therefore, its page table entry
* must remain valid for the duration.
* This routine is merely advisory; pmap_enter
* will specify that these pages are to be wired
* down (or not) as appropriate.
pmap_pageable(pmap
, sva
, eva
, pageable
)
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_pageable(%x, %x, %x, %x)\n",
pmap
, sva
, eva
, pageable
);
* If we are making a PT page pageable then all valid
* mappings must be gone from that page. Hence it should
* be all zeros and there is no need to clean it.
* - we are called with only one page at a time
* - PT pages have only one pv_table entry
if (pmap
== kernel_pmap
&& pageable
&& sva
+ PAGE_SIZE
== eva
) {
if ((pmapdebug
& (PDB_FOLLOW
|PDB_PTPAGE
)) == PDB_PTPAGE
)
printf("pmap_pageable(%x, %x, %x, %x)\n",
pmap
, sva
, eva
, pageable
);
if (!pmap_ste_v(pmap_ste(pmap
, sva
)))
pa
= pmap_pte_pa(pmap_pte(pmap
, sva
));
if (pa
< vm_first_phys
|| pa
>= vm_last_phys
)
if (pv
->pv_ptste
== NULL
)
if (pv
->pv_va
!= sva
|| pv
->pv_next
) {
printf("pmap_pageable: bad PT page va %x next %x\n",
* Mark it unmodified to avoid pageout
pmap_changebit(pa
, PG_M
, FALSE
);
if (pmapdebug
& PDB_PTPAGE
)
printf("pmap_pageable: PT page %x(%x) unmodified\n",
sva
, *(int *)pmap_pte(pmap
, sva
));
if (pmapdebug
& PDB_WIRING
)
pmap_check_wiring("pageable", sva
);
* Clear the modify bits on the specified physical page.
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_clear_modify(%x)\n", pa
);
pmap_changebit(pa
, PG_M
, FALSE
);
* Clear the reference bit on the specified physical page.
void pmap_clear_reference(pa
)
if (pmapdebug
& PDB_FOLLOW
)
printf("pmap_clear_reference(%x)\n", pa
);
pmap_changebit(pa
, PG_U
, FALSE
);
* Return whether or not the specified physical page is referenced
if (pmapdebug
& PDB_FOLLOW
) {
boolean_t rv
= pmap_testbit(pa
, PG_U
);
printf("pmap_is_referenced(%x) -> %c\n", pa
, "FT"[rv
]);
return(pmap_testbit(pa
, PG_U
));
* Return whether or not the specified physical page is modified
if (pmapdebug
& PDB_FOLLOW
) {
boolean_t rv
= pmap_testbit(pa
, PG_M
);
printf("pmap_is_modified(%x) -> %c\n", pa
, "FT"[rv
]);
return(pmap_testbit(pa
, PG_M
));
* Miscellaneous support routines follow
for (prot
= 0; prot
< 8; prot
++) {
case VM_PROT_NONE
| VM_PROT_NONE
| VM_PROT_NONE
:
case VM_PROT_READ
| VM_PROT_NONE
| VM_PROT_NONE
:
case VM_PROT_READ
| VM_PROT_NONE
| VM_PROT_EXECUTE
:
case VM_PROT_NONE
| VM_PROT_NONE
| VM_PROT_EXECUTE
:
case VM_PROT_NONE
| VM_PROT_WRITE
| VM_PROT_NONE
:
case VM_PROT_NONE
| VM_PROT_WRITE
| VM_PROT_EXECUTE
:
case VM_PROT_READ
| VM_PROT_WRITE
| VM_PROT_NONE
:
case VM_PROT_READ
| VM_PROT_WRITE
| VM_PROT_EXECUTE
:
if (pa
< vm_first_phys
|| pa
>= vm_last_phys
)
if (pmap_attributes
[pa_index(pa
)] & bit
) {
* Flush VAC to get correct state of any hardware maintained bits.
if (pmap_aliasmask
&& (bit
& (PG_U
|PG_M
)))
* Not found, check current mappings returning
if (pv
->pv_pmap
!= NULL
) {
for (; pv
; pv
= pv
->pv_next
) {
pte
= (int *) pmap_pte(pv
->pv_pmap
, pv
->pv_va
);
} while (++ix
!= hppagesperpage
);
pmap_changebit(pa
, bit
, setem
)
register int *pte
, npte
, ix
;
boolean_t firstpage
= TRUE
;
if (pmapdebug
& PDB_BITS
)
printf("pmap_changebit(%x, %x, %s)\n",
pa
, bit
, setem
? "set" : "clear");
if (pa
< vm_first_phys
|| pa
>= vm_last_phys
)
* Clear saved attributes (modify, reference)
pmap_attributes
[pa_index(pa
)] &= ~bit
;
* Loop over all current mappings setting/clearing as appropos
* If setting RO do we need to clear the VAC?
if (pv
->pv_pmap
!= NULL
) {
for (; pv
; pv
= pv
->pv_next
) {
toflush
|= (pv
->pv_pmap
== kernel_pmap
) ? 2 : 1;
* XXX don't write protect pager mappings
extern vm_offset_t pager_sva
, pager_eva
;
if (va
>= pager_sva
&& va
< pager_eva
)
pte
= (int *) pmap_pte(pv
->pv_pmap
, va
);
* Flush VAC to ensure we get correct state of HW bits
* so we don't clobber them.
if (firstpage
&& pmap_aliasmask
) {
} while (++ix
!= hppagesperpage
);
if (setem
&& bit
== PG_RO
&& (pmapvacflush
& PVF_PROTECT
)) {
if ((pmapvacflush
& PVF_TOTAL
) || toflush
== 3)
pmap_enter_ptpage(pmap
, va
)
register vm_offset_t ptpa
;
if (pmapdebug
& (PDB_FOLLOW
|PDB_ENTER
|PDB_PTPAGE
))
printf("pmap_enter_ptpage: pmap %x, va %x\n", pmap
, va
);
* Allocate a segment table if necessary. Note that it is allocated
* from kernel_map and not pt_map. This keeps user page tables
* aligned on segment boundaries in the kernel address space.
* The segment table is wired down. It will be freed whenever the
* reference count drops to zero.
if (pmap
->pm_stab
== Segtabzero
) {
pmap
->pm_stab
= (st_entry_t
*)
kmem_alloc(kernel_map
, HP_STSIZE
);
pmap
->pm_stchanged
= TRUE
;
* XXX may have changed segment table pointer for current
* process so update now to reload hardware.
if (pmap
== curproc
->p_vmspace
->vm_map
.pmap
)
PMAP_ACTIVATE(pmap
, (struct pcb
*)curproc
->p_addr
, 1);
if (pmapdebug
& (PDB_ENTER
|PDB_PTPAGE
|PDB_SEGTAB
))
printf("enter: pmap %x stab %x\n",
ste
= pmap_ste(pmap
, va
);
va
= trunc_page((vm_offset_t
)pmap_pte(pmap
, va
));
* In the kernel we allocate a page from the kernel PT page
* free list and map it into the kernel page table map (via
if (pmap
== kernel_pmap
) {
register struct kpt_page
*kpt
;
if ((kpt
= kpt_free_list
) == (struct kpt_page
*)0) {
* Try once to free up unused ones.
if (pmapdebug
& PDB_COLLECT
)
printf("enter: no KPT pages, collecting...\n");
pmap_collect(kernel_pmap
);
if ((kpt
= kpt_free_list
) == (struct kpt_page
*)0)
panic("pmap_enter_ptpage: can't get KPT page");
if (++kpt_stats
.kptinuse
> kpt_stats
.kptmaxuse
)
kpt_stats
.kptmaxuse
= kpt_stats
.kptinuse
;
kpt_free_list
= kpt
->kpt_next
;
kpt
->kpt_next
= kpt_used_list
;
bzero(kpt
->kpt_va
, HP_PAGE_SIZE
);
pmap_enter(pmap
, va
, ptpa
, VM_PROT_DEFAULT
, TRUE
);
if (pmapdebug
& (PDB_ENTER
|PDB_PTPAGE
))
printf("enter: add &Sysptmap[%d]: %x (KPT page %x)\n",
*(int *)&Sysptmap
[ste
- pmap_ste(pmap
, 0)],
* For user processes we just simulate a fault on that location
* letting the VM system allocate a zero-filled page.
if (pmapdebug
& (PDB_ENTER
|PDB_PTPAGE
))
printf("enter: about to fault UPT pg at %x\n", va
);
if (vm_fault(pt_map
, va
, VM_PROT_READ
|VM_PROT_WRITE
, FALSE
)
panic("pmap_enter: vm_fault failed");
ptpa
= pmap_extract(kernel_pmap
, va
);
PHYS_TO_VM_PAGE(ptpa
)->ptpage
= TRUE
;
* Locate the PV entry in the kernel for this PT page and
* record the STE address. This is so that we can invalidate
* the STE when we remove the mapping for the page.
pv
->pv_flags
|= PV_PTPAGE
;
if (pv
->pv_pmap
== kernel_pmap
&& pv
->pv_va
== va
)
} while (pv
= pv
->pv_next
);
panic("pmap_enter_ptpage: PT page not entered");
if (pmapdebug
& (PDB_ENTER
|PDB_PTPAGE
))
printf("enter: new PT page at PA %x, ste at %x\n", ptpa
, ste
);
* Map the new PT page into the segment table.
* Also increment the reference count on the segment table if this
* was a user page table page. Note that we don't use vm_map_pageable
* to keep the count like we do for PT pages, this is mostly because
* it would be difficult to identify ST pages in pmap_pageable to
* release them. We also avoid the overhead of vm_map_pageable.
*(int *)ste
= (ptpa
& SG_FRAME
) | SG_RW
| SG_V
;
if (pmap
!= kernel_pmap
) {
if (pmapdebug
& (PDB_ENTER
|PDB_PTPAGE
|PDB_SEGTAB
))
printf("enter: stab %x refcnt %d\n",
pmap
->pm_stab
, pmap
->pm_sref
);
for (pv
= pa_to_pvh(pa
); pv
; pv
= pv
->pv_next
)
printf(" -> pmap %x, va %x, ptste %x, ptpmap %x, flags %x",
pv
->pv_pmap
, pv
->pv_va
, pv
->pv_ptste
, pv
->pv_ptpmap
,
pmap_check_wiring(str
, va
)
register int count
, *pte
;
if (!pmap_ste_v(pmap_ste(kernel_pmap
, va
)) ||
!pmap_pte_v(pmap_pte(kernel_pmap
, va
)))
if (!vm_map_lookup_entry(pt_map
, va
, &entry
)) {
printf("wired_check: entry for %x not found\n", va
);
for (pte
= (int *)va
; pte
< (int *)(va
+PAGE_SIZE
); pte
++)
if (entry
->wired_count
!= count
)
printf("*%s*: %x: w%d/a%d\n",
str
, va
, entry
->wired_count
, count
);