PV i386 patch
Alan Cox
alc at rice.edu
Wed Dec 21 20:47:19 UTC 2011
Can you please try the attached patch? I'm trying to reduce the number
of differences between the native and Xen pmap implementations.
Alan
-------------- next part --------------
--- i386/i386/pmap.c 2011-12-21 13:04:17.000000000 -0600
+++ i386/xen/pmap.c 2011-12-21 13:04:48.000000000 -0600
@@ -5,7 +5,7 @@
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
- * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005 Alan L. Cox <alc at cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -75,7 +75,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 228513 2011-12-14 23:57:47Z alc $");
+__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 228746 2011-12-20 20:16:12Z alc $");
/*
* Manages physical address maps.
@@ -138,7 +138,6 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
-#include <vm/vm_reserv.h>
#include <vm/uma.h>
#include <machine/cpu.h>
@@ -154,6 +153,12 @@
#include <machine/xbox.h>
#endif
+#include <xen/interface/xen.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/hypercall.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
@@ -162,6 +167,8 @@
#define PMAP_SHPGPERPROC 200
#endif
+#define DIAGNOSTIC
+
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@@ -179,9 +186,6 @@
#define PV_STAT(x) do { } while (0)
#endif
-#define pa_index(pa) ((pa) >> PDRSHIFT)
-#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
-
/*
* Get PDEs and PTEs for user/kernel address space
*/
@@ -194,10 +198,13 @@
#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
- atomic_clear_int((u_int *)(pte), PG_W))
#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
+#define HAMFISTED_LOCKING
+#ifdef HAMFISTED_LOCKING
+static struct mtx createdelete_lock;
+#endif
+
struct pmap kernel_pmap_store;
LIST_HEAD(pmaplist, pmap);
static struct pmaplist allpmaps;
@@ -208,34 +215,22 @@
int pgeflag = 0; /* PG_G or-in */
int pseflag = 0; /* PG_PS or-in */
-static int nkpt = NKPT;
-vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR;
+int nkpt;
+vm_offset_t kernel_vm_end;
extern u_int32_t KERNend;
-extern u_int32_t KPTphys;
#ifdef PAE
pt_entry_t pg_nx;
-static uma_zone_t pdptzone;
#endif
static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-static int pat_works = 1;
-SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
- "Is page attribute table fully functional?");
-
-static int pg_ps_enabled = 1;
-SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
- "Are large page mappings enabled?");
-
-#define PAT_INDEX_SIZE 8
-static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */
+static int pat_works; /* Is page attribute table sane? */
/*
* Data for the pv entry allocation mechanism
*/
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static struct md_page *pv_table;
static int shpgperproc = PMAP_SHPGPERPROC;
struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
@@ -253,10 +248,8 @@
caddr_t CADDR2;
};
static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP1 = 0;
static pt_entry_t *CMAP3;
-static pd_entry_t *KPTD;
-caddr_t CADDR1 = 0, ptvmmap = 0;
+caddr_t ptvmmap = 0;
static caddr_t CADDR3;
struct msgbuf *msgbufp = 0;
@@ -286,47 +279,22 @@
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
-static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
-static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
-static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
-static int pmap_pvh_wired_mappings(struct md_page *pvh, int count);
-static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot);
-static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_flush_page(vm_page_t m);
-static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
-static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
-static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
-static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
-static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
-static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
- vm_prot_t prot);
-static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
-static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
vm_page_t *free);
-static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
vm_page_t *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
vm_offset_t va);
-static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
-static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
- pd_entry_t newpde);
-static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -335,10 +303,7 @@
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-#ifdef PAE
-static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
-#endif
-static void pmap_set_pg(void);
+static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
static __inline void pagezero(void *page);
@@ -352,6 +317,36 @@
*/
CTASSERT(KERNBASE % (1 << 24) == 0);
+void
+pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
+{
+ vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
+
+ switch (type) {
+ case SH_PD_SET_VA:
+#if 0
+ xen_queue_pt_update(shadow_pdir_ma,
+ xpmap_ptom(val & ~(PG_RW)));
+#endif
+ xen_queue_pt_update(pdir_ma,
+ xpmap_ptom(val));
+ break;
+ case SH_PD_SET_VA_MA:
+#if 0
+ xen_queue_pt_update(shadow_pdir_ma,
+ val & ~(PG_RW));
+#endif
+ xen_queue_pt_update(pdir_ma, val);
+ break;
+ case SH_PD_SET_VA_CLEAR:
+#if 0
+ xen_queue_pt_update(shadow_pdir_ma, 0);
+#endif
+ xen_queue_pt_update(pdir_ma, 0);
+ break;
+ }
+}
+
/*
* Bootstrap the system enough to run with virtual memory.
*
@@ -389,21 +384,15 @@
#ifdef PAE
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
- kernel_pmap->pm_root = NULL;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
-
- /*
- * Request a spin mutex so that changes to allpmaps cannot be
- * preempted by smp_rendezvous_cpus(). Otherwise,
- * pmap_update_pde_kernel() could access allpmaps while it is
- * being changed.
- */
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
+ if (nkpt == 0)
+ nkpt = NKPT;
/*
* Reserve some special page table entries/VA space for temporary
@@ -424,9 +413,11 @@
mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
+ PT_SET_MA(sysmaps->CADDR1, 0);
+ PT_SET_MA(sysmaps->CADDR2, 0);
}
- SYSMAP(caddr_t, CMAP1, CADDR1, 1)
SYSMAP(caddr_t, CMAP3, CADDR3, 1)
+ PT_SET_MA(CADDR3, 0);
/*
* Crashdump maps.
@@ -444,25 +435,6 @@
SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
/*
- * KPTmap is used by pmap_kextract().
- *
- * KPTmap is first initialized by locore. However, that initial
- * KPTmap can only support NKPT page table pages. Here, a larger
- * KPTmap is created that can support KVA_PAGES page table pages.
- */
- SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
-
- for (i = 0; i < NKPT; i++)
- KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
-
- /*
- * Adjust the start of the KPTD and KPTmap so that the implementation
- * of pmap_kextract() and pmap_growkernel() can be made simpler.
- */
- KPTD -= KPTDI;
- KPTmap -= i386_btop(KPTDI << PDRSHIFT);
-
- /*
* ptemap is used for pmap_pte_quick
*/
SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
@@ -477,6 +449,10 @@
* physical memory region that is used by the ACPI wakeup code. This
* mapping must not have PG_G set.
*/
+#ifndef XEN
+ /*
+ * leave here deliberately to show that this is not supported
+ */
#ifdef XBOX
/* FIXME: This is gross, but needed for the XBOX. Since we are in such
* an early stadium, we cannot yet neatly map video memory ... :-(
@@ -491,6 +467,11 @@
/* Turn on PG_G on kernel page(s) */
pmap_set_pg();
+#endif
+
+#ifdef HAMFISTED_LOCKING
+ mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
+#endif
}
/*
@@ -499,132 +480,43 @@
void
pmap_init_pat(void)
{
- int pat_table[PAT_INDEX_SIZE];
uint64_t pat_msr;
- u_long cr0, cr4;
- int i;
-
- /* Set default PAT index table. */
- for (i = 0; i < PAT_INDEX_SIZE; i++)
- pat_table[i] = -1;
- pat_table[PAT_WRITE_BACK] = 0;
- pat_table[PAT_WRITE_THROUGH] = 1;
- pat_table[PAT_UNCACHEABLE] = 3;
- pat_table[PAT_WRITE_COMBINING] = 3;
- pat_table[PAT_WRITE_PROTECTED] = 3;
- pat_table[PAT_UNCACHED] = 3;
/* Bail if this CPU doesn't implement PAT. */
- if ((cpu_feature & CPUID_PAT) == 0) {
- for (i = 0; i < PAT_INDEX_SIZE; i++)
- pat_index[i] = pat_table[i];
- pat_works = 0;
+ if (!(cpu_feature & CPUID_PAT))
return;
- }
-
- /*
- * Due to some Intel errata, we can only safely use the lower 4
- * PAT entries.
- *
- * Intel Pentium III Processor Specification Update
- * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
- * or Mode C Paging)
- *
- * Intel Pentium IV Processor Specification Update
- * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
- */
- if (cpu_vendor_id == CPU_VENDOR_INTEL &&
- !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe))
- pat_works = 0;
- /* Initialize default PAT entries. */
- pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
- PAT_VALUE(1, PAT_WRITE_THROUGH) |
- PAT_VALUE(2, PAT_UNCACHED) |
- PAT_VALUE(3, PAT_UNCACHEABLE) |
- PAT_VALUE(4, PAT_WRITE_BACK) |
- PAT_VALUE(5, PAT_WRITE_THROUGH) |
- PAT_VALUE(6, PAT_UNCACHED) |
- PAT_VALUE(7, PAT_UNCACHEABLE);
-
- if (pat_works) {
+ if (cpu_vendor_id != CPU_VENDOR_INTEL ||
+ (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
/*
- * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
- * Program 5 and 6 as WP and WC.
- * Leave 4 and 7 as WB and UC.
+ * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
+ * Program 4 and 5 as WP and WC.
+ * Leave 6 and 7 as UC and UC-.
*/
- pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
- pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
- PAT_VALUE(6, PAT_WRITE_COMBINING);
- pat_table[PAT_UNCACHED] = 2;
- pat_table[PAT_WRITE_PROTECTED] = 5;
- pat_table[PAT_WRITE_COMBINING] = 6;
+ pat_msr = rdmsr(MSR_PAT);
+ pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
+ pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
+ PAT_VALUE(5, PAT_WRITE_COMBINING);
+ pat_works = 1;
} else {
/*
- * Just replace PAT Index 2 with WC instead of UC-.
+ * Due to some Intel errata, we can only safely use the lower 4
+ * PAT entries. Thus, just replace PAT Index 2 with WC instead
+ * of UC-.
+ *
+ * Intel Pentium III Processor Specification Update
+ * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
+ * or Mode C Paging)
+ *
+ * Intel Pentium IV Processor Specification Update
+ * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
*/
+ pat_msr = rdmsr(MSR_PAT);
pat_msr &= ~PAT_MASK(2);
pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
- pat_table[PAT_WRITE_COMBINING] = 2;
+ pat_works = 0;
}
-
- /* Disable PGE. */
- cr4 = rcr4();
- load_cr4(cr4 & ~CR4_PGE);
-
- /* Disable caches (CD = 1, NW = 0). */
- cr0 = rcr0();
- load_cr0((cr0 & ~CR0_NW) | CR0_CD);
-
- /* Flushes caches and TLBs. */
- wbinvd();
- invltlb();
-
- /* Update PAT and index table. */
wrmsr(MSR_PAT, pat_msr);
- for (i = 0; i < PAT_INDEX_SIZE; i++)
- pat_index[i] = pat_table[i];
-
- /* Flush caches and TLBs again. */
- wbinvd();
- invltlb();
-
- /* Restore caches and PGE. */
- load_cr0(cr0);
- load_cr4(cr4);
-}
-
-/*
- * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on.
- */
-static void
-pmap_set_pg(void)
-{
- pt_entry_t *pte;
- vm_offset_t va, endva;
-
- if (pgeflag == 0)
- return;
-
- endva = KERNBASE + KERNend;
-
- if (pseflag) {
- va = KERNBASE + KERNLOAD;
- while (va < endva) {
- pdir_pde(PTD, va) |= pgeflag;
- invltlb(); /* Play it safe, invltlb() every time */
- va += NBPDR;
- }
- } else {
- va = (vm_offset_t)btext;
- while (va < endva) {
- pte = vtopte(va);
- if (*pte)
- *pte |= pgeflag;
- invltlb(); /* Play it safe, invltlb() every time */
- va += PAGE_SIZE;
- }
- }
}
/*
@@ -638,18 +530,6 @@
m->md.pat_mode = PAT_WRITE_BACK;
}
-#ifdef PAE
-static void *
-pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
-{
-
- /* Inform UMA that this allocator uses kernel_map/object. */
- *flags = UMA_SLAB_KERNEL;
- return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
- 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
-}
-#endif
-
/*
* ABuse the pte nodes for unmapped kva to thread a kva freelist through.
* Requirements:
@@ -662,43 +542,48 @@
* - Assumes a vm_offset_t will fit in a pte (true for i386).
* Because PG_V is never set, there can be no mappings to invalidate.
*/
+static int ptelist_count = 0;
static vm_offset_t
pmap_ptelist_alloc(vm_offset_t *head)
{
- pt_entry_t *pte;
vm_offset_t va;
-
- va = *head;
- if (va == 0)
- return (va); /* Out of memory */
- pte = vtopte(va);
- *head = *pte;
- if (*head & PG_V)
- panic("pmap_ptelist_alloc: va with PG_V set!");
- *pte = 0;
+ vm_offset_t *phead = (vm_offset_t *)*head;
+
+ if (ptelist_count == 0) {
+ printf("out of memory!!!!!!\n");
+ return (0); /* Out of memory */
+ }
+ ptelist_count--;
+ va = phead[ptelist_count];
return (va);
}
static void
pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
{
- pt_entry_t *pte;
+ vm_offset_t *phead = (vm_offset_t *)*head;
- if (va & PG_V)
- panic("pmap_ptelist_free: freeing va with PG_V set!");
- pte = vtopte(va);
- *pte = *head; /* virtual! PG_V is 0 though */
- *head = va;
+ phead[ptelist_count++] = va;
}
static void
pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
{
- int i;
+ int i, nstackpages;
vm_offset_t va;
+ vm_page_t m;
+
+ nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
+ for (i = 0; i < nstackpages; i++) {
+ va = (vm_offset_t)base + i * PAGE_SIZE;
+ m = vm_page_alloc(NULL, i,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO);
+ pmap_qenter(va, &m, 1);
+ }
- *head = 0;
- for (i = npages - 1; i >= 0; i--) {
+ *head = (vm_offset_t)base;
+ for (i = npages - 1; i >= nstackpages; i--) {
va = (vm_offset_t)base + i * PAGE_SIZE;
pmap_ptelist_free(head, va);
}
@@ -713,22 +598,6 @@
void
pmap_init(void)
{
- vm_page_t mpte;
- vm_size_t s;
- int i, pv_npg;
-
- /*
- * Initialize the vm page array entries for the kernel pmap's
- * page table pages.
- */
- for (i = 0; i < NKPT; i++) {
- mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
- KASSERT(mpte >= vm_page_array &&
- mpte < &vm_page_array[vm_page_array_size],
- ("pmap_init: page table page is out of range"));
- mpte->pindex = i + KPTDI;
- mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
- }
/*
* Initialize the address space (zone) for the pv entries. Set a
@@ -741,54 +610,12 @@
pv_entry_max = roundup(pv_entry_max, _NPCPV);
pv_entry_high_water = 9 * (pv_entry_max / 10);
- /*
- * If the kernel is running in a virtual machine on an AMD Family 10h
- * processor, then it must assume that MCA is enabled by the virtual
- * machine monitor.
- */
- if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
- CPUID_TO_FAMILY(cpu_id) == 0x10)
- workaround_erratum383 = 1;
-
- /*
- * Are large page mappings supported and enabled?
- */
- TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
- if (pseflag == 0)
- pg_ps_enabled = 0;
- else if (pg_ps_enabled) {
- KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
- ("pmap_init: can't assign to pagesizes[1]"));
- pagesizes[1] = NBPDR;
- }
-
- /*
- * Calculate the size of the pv head table for superpages.
- */
- for (i = 0; phys_avail[i + 1]; i += 2);
- pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
-
- /*
- * Allocate memory for the pv head table for superpages.
- */
- s = (vm_size_t)(pv_npg * sizeof(struct md_page));
- s = round_page(s);
- pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
- for (i = 0; i < pv_npg; i++)
- TAILQ_INIT(&pv_table[i].pv_list);
-
pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
PAGE_SIZE * pv_maxchunks);
if (pv_chunkbase == NULL)
panic("pmap_init: not enough kvm for pv chunks");
pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-#ifdef PAE
- pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
- NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
- UMA_ZONE_VM | UMA_ZONE_NOFREE);
- uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
-#endif
}
@@ -800,22 +627,10 @@
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
"2/4MB page mapping counters");
-static u_long pmap_pde_demotions;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
- &pmap_pde_demotions, 0, "2/4MB page demotions");
-
static u_long pmap_pde_mappings;
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
&pmap_pde_mappings, 0, "2/4MB page mappings");
-static u_long pmap_pde_p_failures;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
- &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
-
-static u_long pmap_pde_promotions;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
- &pmap_pde_promotions, 0, "2/4MB page promotions");
-
/***************************************************
* Low level helper routines.....
***************************************************/
@@ -827,90 +642,81 @@
int
pmap_cache_bits(int mode, boolean_t is_pde)
{
- int cache_bits, pat_flag, pat_idx;
-
- if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
- panic("Unknown caching mode %d\n", mode);
+ int pat_flag, pat_index, cache_bits;
/* The PAT bit is different for PTE's and PDE's. */
pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+ /* If we don't support PAT, map extended modes to older ones. */
+ if (!(cpu_feature & CPUID_PAT)) {
+ switch (mode) {
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_THROUGH:
+ case PAT_WRITE_BACK:
+ break;
+ case PAT_UNCACHED:
+ case PAT_WRITE_COMBINING:
+ case PAT_WRITE_PROTECTED:
+ mode = PAT_UNCACHEABLE;
+ break;
+ }
+ }
+
/* Map the caching mode to a PAT index. */
- pat_idx = pat_index[mode];
+ if (pat_works) {
+ switch (mode) {
+ case PAT_UNCACHEABLE:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_UNCACHED:
+ pat_index = 2;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 5;
+ break;
+ case PAT_WRITE_PROTECTED:
+ pat_index = 4;
+ break;
+ default:
+ panic("Unknown caching mode %d\n", mode);
+ }
+ } else {
+ switch (mode) {
+ case PAT_UNCACHED:
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_PROTECTED:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 2;
+ break;
+ default:
+ panic("Unknown caching mode %d\n", mode);
+ }
+ }
/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
cache_bits = 0;
- if (pat_idx & 0x4)
+ if (pat_index & 0x4)
cache_bits |= pat_flag;
- if (pat_idx & 0x2)
+ if (pat_index & 0x2)
cache_bits |= PG_NC_PCD;
- if (pat_idx & 0x1)
+ if (pat_index & 0x1)
cache_bits |= PG_NC_PWT;
return (cache_bits);
}
-
-/*
- * The caller is responsible for maintaining TLB consistency.
- */
-static void
-pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
-{
- pd_entry_t *pde;
- pmap_t pmap;
- boolean_t PTD_updated;
-
- PTD_updated = FALSE;
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
- PG_FRAME))
- PTD_updated = TRUE;
- pde = pmap_pde(pmap, va);
- pde_store(pde, newpde);
- }
- mtx_unlock_spin(&allpmaps_lock);
- KASSERT(PTD_updated,
- ("pmap_kenter_pde: current page table is not in allpmaps"));
-}
-
-/*
- * After changing the page size for the specified virtual address in the page
- * table, flush the corresponding entries from the processor's TLB. Only the
- * calling processor's TLB is affected.
- *
- * The calling thread must be pinned to a processor.
- */
-static void
-pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
-{
- u_long cr4;
-
- if ((newpde & PG_PS) == 0)
- /* Demotion: flush a specific 2MB page mapping. */
- invlpg(va);
- else if ((newpde & PG_G) == 0)
- /*
- * Promotion: flush every 4KB page mapping from the TLB
- * because there are too many to flush individually.
- */
- invltlb();
- else {
- /*
- * Promotion: flush every 4KB page mapping from the TLB,
- * including any global (PG_G) mappings.
- */
- cr4 = rcr4();
- load_cr4(cr4 & ~CR4_PGE);
- /*
- * Although preemption at this point could be detrimental to
- * performance, it would not lead to an error. PG_G is simply
- * ignored if CR4.PGE is clear. Moreover, in case this block
- * is re-entered, the load_cr4() either above or below will
- * modify CR4.PGE flushing the TLB.
- */
- load_cr4(cr4 | CR4_PGE);
- }
-}
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
@@ -937,6 +743,9 @@
cpuset_t other_cpus;
u_int cpuid;
+ CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
+ pmap, va);
+
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
@@ -952,6 +761,7 @@
smp_masked_invlpg(other_cpus, va);
}
sched_unpin();
+ PT_UPDATES_FLUSH();
}
void
@@ -961,6 +771,9 @@
vm_offset_t addr;
u_int cpuid;
+ CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
+ pmap, sva, eva);
+
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
@@ -978,6 +791,7 @@
smp_masked_invlpg_range(other_cpus, sva, eva);
}
sched_unpin();
+ PT_UPDATES_FLUSH();
}
void
@@ -986,6 +800,8 @@
cpuset_t other_cpus;
u_int cpuid;
+ CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
+
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
@@ -1012,98 +828,6 @@
smp_cache_flush();
sched_unpin();
}
-
-struct pde_action {
- cpuset_t invalidate; /* processors that invalidate their TLB */
- vm_offset_t va;
- pd_entry_t *pde;
- pd_entry_t newpde;
- u_int store; /* processor that updates the PDE */
-};
-
-static void
-pmap_update_pde_kernel(void *arg)
-{
- struct pde_action *act = arg;
- pd_entry_t *pde;
- pmap_t pmap;
-
- if (act->store == PCPU_GET(cpuid)) {
-
- /*
- * Elsewhere, this operation requires allpmaps_lock for
- * synchronization. Here, it does not because it is being
- * performed in the context of an all_cpus rendezvous.
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- pde = pmap_pde(pmap, act->va);
- pde_store(pde, act->newpde);
- }
- }
-}
-
-static void
-pmap_update_pde_user(void *arg)
-{
- struct pde_action *act = arg;
-
- if (act->store == PCPU_GET(cpuid))
- pde_store(act->pde, act->newpde);
-}
-
-static void
-pmap_update_pde_teardown(void *arg)
-{
- struct pde_action *act = arg;
-
- if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
- pmap_update_pde_invalidate(act->va, act->newpde);
-}
-
-/*
- * Change the page size for the specified virtual address in a way that
- * prevents any possibility of the TLB ever having two entries that map the
- * same virtual address using different page sizes. This is the recommended
- * workaround for Erratum 383 on AMD Family 10h processors. It prevents a
- * machine check exception for a TLB state that is improperly diagnosed as a
- * hardware error.
- */
-static void
-pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
-{
- struct pde_action act;
- cpuset_t active, other_cpus;
- u_int cpuid;
-
- sched_pin();
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (pmap == kernel_pmap)
- active = all_cpus;
- else
- active = pmap->pm_active;
- if (CPU_OVERLAP(&active, &other_cpus)) {
- act.store = cpuid;
- act.invalidate = active;
- act.va = va;
- act.pde = pde;
- act.newpde = newpde;
- CPU_SET(cpuid, &active);
- smp_rendezvous_cpus(active,
- smp_no_rendevous_barrier, pmap == kernel_pmap ?
- pmap_update_pde_kernel : pmap_update_pde_user,
- pmap_update_pde_teardown, &act);
- } else {
- if (pmap == kernel_pmap)
- pmap_kenter_pde(va, newpde);
- else
- pde_store(pde, newpde);
- if (CPU_ISSET(cpuid, &active))
- pmap_update_pde_invalidate(va, newpde);
- }
- sched_unpin();
-}
#else /* !SMP */
/*
* Normal, non-SMP, 486+ invalidation functions.
@@ -1112,9 +836,12 @@
PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
+ CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
+ pmap, va);
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invlpg(va);
+ PT_UPDATES_FLUSH();
}
PMAP_INLINE void
@@ -1122,15 +849,22 @@
{
vm_offset_t addr;
+ if (eva - sva > PAGE_SIZE)
+ CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
+ pmap, sva, eva);
+
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
+ PT_UPDATES_FLUSH();
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
+ CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
+
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invltlb();
}
@@ -1141,18 +875,6 @@
wbinvd();
}
-
-static void
-pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
-{
-
- if (pmap == kernel_pmap)
- pmap_kenter_pde(va, newpde);
- else
- pde_store(pde, newpde);
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- pmap_update_pde_invalidate(va, newpde);
-}
#endif /* !SMP */
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
@@ -1242,8 +964,11 @@
mtx_lock(&PMAP2mutex);
newpf = *pde & PG_FRAME;
if ((*PMAP2 & PG_FRAME) != newpf) {
- *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
+ vm_page_lock_queues();
+ PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
+ vm_page_unlock_queues();
+ CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
+ pmap, va, (*PMAP2 & 0xffffffff));
}
return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
}
@@ -1258,8 +983,14 @@
pmap_pte_release(pt_entry_t *pte)
{
- if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
+ if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
+ CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
+ *PMAP2);
+ vm_page_lock_queues();
+ PT_SET_VA(PMAP2, 0, TRUE);
+ vm_page_unlock_queues();
mtx_unlock(&PMAP2mutex);
+ }
}
static __inline void
@@ -1267,6 +998,7 @@
{
invlpg((u_int)caddr);
+ PT_UPDATES_FLUSH();
}
/*
@@ -1296,11 +1028,13 @@
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
newpf = *pde & PG_FRAME;
if ((*PMAP1 & PG_FRAME) != newpf) {
- *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
+ PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
+ CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
+ pmap, va, (u_long)*PMAP1);
+
#ifdef SMP
PMAP1cpu = PCPU_GET(cpuid);
#endif
- invlcaddr(PADDR1);
PMAP1changed++;
} else
#ifdef SMP
@@ -1328,18 +1062,50 @@
vm_paddr_t rtval;
pt_entry_t *pte;
pd_entry_t pde;
+ pt_entry_t pteval;
rtval = 0;
PMAP_LOCK(pmap);
pde = pmap->pm_pdir[va >> PDRSHIFT];
if (pde != 0) {
- if ((pde & PG_PS) != 0)
- rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
- else {
- pte = pmap_pte(pmap, va);
- rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
+ if ((pde & PG_PS) != 0) {
+ rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
+ PMAP_UNLOCK(pmap);
+ return rtval;
+ }
+ pte = pmap_pte(pmap, va);
+ pteval = *pte ? xpmap_mtop(*pte) : 0;
+ rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
+ pmap_pte_release(pte);
+ }
+ PMAP_UNLOCK(pmap);
+ return (rtval);
+}
+
+/*
+ * Routine: pmap_extract_ma
+ * Function:
+ * Like pmap_extract, but returns machine address
+ */
+vm_paddr_t
+pmap_extract_ma(pmap_t pmap, vm_offset_t va)
+{
+ vm_paddr_t rtval;
+ pt_entry_t *pte;
+ pd_entry_t pde;
+
+ rtval = 0;
+ PMAP_LOCK(pmap);
+ pde = pmap->pm_pdir[va >> PDRSHIFT];
+ if (pde != 0) {
+ if ((pde & PG_PS) != 0) {
+ rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+ PMAP_UNLOCK(pmap);
+ return rtval;
}
+ pte = pmap_pte(pmap, va);
+ rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
+ pmap_pte_release(pte);
}
PMAP_UNLOCK(pmap);
return (rtval);
@@ -1356,7 +1122,7 @@
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pd_entry_t pde;
- pt_entry_t pte, *ptep;
+ pt_entry_t pte;
vm_page_t m;
vm_paddr_t pa;
@@ -1364,7 +1130,7 @@
m = NULL;
PMAP_LOCK(pmap);
retry:
- pde = *pmap_pde(pmap, va);
+ pde = PT_GET(pmap_pde(pmap, va));
if (pde != 0) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
@@ -1376,16 +1142,18 @@
vm_page_hold(m);
}
} else {
- ptep = pmap_pte(pmap, va);
- pte = *ptep;
- pmap_pte_release(ptep);
- if (pte != 0 &&
+ sched_pin();
+ pte = PT_GET(pmap_pte_quick(pmap, va));
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
+ if ((pte & PG_V) &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
goto retry;
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
vm_page_hold(m);
}
+ sched_unpin();
}
}
PA_UNLOCK_COND(pa);
@@ -1403,22 +1171,27 @@
*
* This function may be used before pmap_bootstrap() is called.
*/
-PMAP_INLINE void
+void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
+
+ PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
+}
+
+void
+pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
+{
pt_entry_t *pte;
pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | pgeflag);
+ pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
}
static __inline void
pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
{
- pt_entry_t *pte;
- pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
+ PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
}
/*
@@ -1433,7 +1206,7 @@
pt_entry_t *pte;
pte = vtopte(va);
- pte_clear(pte);
+ PT_CLEAR_VA(pte, FALSE);
}
/*
@@ -1452,40 +1225,14 @@
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
{
vm_offset_t va, sva;
- vm_paddr_t superpage_offset;
- pd_entry_t newpde;
- va = *virt;
- /*
- * Does the physical address range's size and alignment permit at
- * least one superpage mapping to be created?
- */
- superpage_offset = start & PDRMASK;
- if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) {
- /*
- * Increase the starting virtual address so that its alignment
- * does not preclude the use of superpage mappings.
- */
- if ((va & PDRMASK) < superpage_offset)
- va = (va & ~PDRMASK) + superpage_offset;
- else if ((va & PDRMASK) > superpage_offset)
- va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset;
- }
- sva = va;
+ va = sva = *virt;
+ CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
+ va, start, end, prot);
while (start < end) {
- if ((start & PDRMASK) == 0 && end - start >= NBPDR &&
- pseflag) {
- KASSERT((va & PDRMASK) == 0,
- ("pmap_map: misaligned va %#x", va));
- newpde = start | PG_PS | pgeflag | PG_RW | PG_V;
- pmap_kenter_pde(va, newpde);
- va += NBPDR;
- start += NBPDR;
- } else {
- pmap_kenter(va, start);
- va += PAGE_SIZE;
- start += PAGE_SIZE;
- }
+ pmap_kenter(va, start);
+ va += PAGE_SIZE;
+ start += PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
*virt = va;
@@ -1505,24 +1252,47 @@
void
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
- pt_entry_t *endpte, oldpte, pa, *pte;
- vm_page_t m;
+ pt_entry_t *endpte, *pte;
+ vm_paddr_t pa;
+ vm_offset_t va = sva;
+ int mclcount = 0;
+ multicall_entry_t mcl[16];
+ multicall_entry_t *mclp = mcl;
+ int error;
- oldpte = 0;
+ CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
pte = vtopte(sva);
endpte = pte + count;
while (pte < endpte) {
- m = *ma++;
- pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
- if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
- oldpte |= *pte;
- pte_store(pte, pa | pgeflag | PG_RW | PG_V);
- }
- pte++;
+ pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
+
+ mclp->op = __HYPERVISOR_update_va_mapping;
+ mclp->args[0] = va;
+ mclp->args[1] = (uint32_t)(pa & 0xffffffff);
+ mclp->args[2] = (uint32_t)(pa >> 32);
+ mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
+
+ va += PAGE_SIZE;
+ pte++;
+ ma++;
+ mclp++;
+ mclcount++;
+ if (mclcount == 16) {
+ error = HYPERVISOR_multicall(mcl, mclcount);
+ mclp = mcl;
+ mclcount = 0;
+ KASSERT(error == 0, ("bad multicall %d", error));
+ }
+ }
+ if (mclcount) {
+ error = HYPERVISOR_multicall(mcl, mclcount);
+ KASSERT(error == 0, ("bad multicall %d", error));
}
- if (__predict_false((oldpte & PG_V) != 0))
- pmap_invalidate_range(kernel_pmap, sva, sva + count *
- PAGE_SIZE);
+
+#ifdef INVARIANTS
+ for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
+ KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
+#endif
}
/*
@@ -1535,12 +1305,18 @@
{
vm_offset_t va;
+ CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
va = sva;
+ vm_page_lock_queues();
+ critical_enter();
while (count-- > 0) {
pmap_kremove(va);
va += PAGE_SIZE;
}
+ PT_UPDATES_FLUSH();
pmap_invalidate_range(kernel_pmap, sva, va);
+ critical_exit();
+ vm_page_unlock_queues();
}
/***************************************************
@@ -1554,101 +1330,8 @@
while (free != NULL) {
m = free;
free = m->right;
- /* Preserve the page's PG_ZERO setting. */
- vm_page_free_toq(m);
- }
-}
-
-/*
- * Schedule the specified unused page table page to be freed. Specifically,
- * add the page to the specified list of pages that will be released to the
- * physical memory manager after the TLB has been updated.
- */
-static __inline void
-pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
-{
-
- if (set_PG_ZERO)
- m->flags |= PG_ZERO;
- else
- m->flags &= ~PG_ZERO;
- m->right = *free;
- *free = m;
-}
-
-/*
- * Inserts the specified page table page into the specified pmap's collection
- * of idle page table pages. Each of a pmap's page table pages is responsible
- * for mapping a distinct range of virtual addresses. The pmap's collection is
- * ordered by this virtual address range.
- */
-static void
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
-{
- vm_page_t root;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- root = pmap->pm_root;
- if (root == NULL) {
- mpte->left = NULL;
- mpte->right = NULL;
- } else {
- root = vm_page_splay(mpte->pindex, root);
- if (mpte->pindex < root->pindex) {
- mpte->left = root->left;
- mpte->right = root;
- root->left = NULL;
- } else if (mpte->pindex == root->pindex)
- panic("pmap_insert_pt_page: pindex already inserted");
- else {
- mpte->right = root->right;
- mpte->left = root;
- root->right = NULL;
- }
- }
- pmap->pm_root = mpte;
-}
-
-/*
- * Looks for a page table page mapping the specified virtual address in the
- * specified pmap's collection of idle page table pages. Returns NULL if there
- * is no page table page corresponding to the specified virtual address.
- */
-static vm_page_t
-pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
-{
- vm_page_t mpte;
- vm_pindex_t pindex = va >> PDRSHIFT;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
- mpte = vm_page_splay(pindex, mpte);
- if ((pmap->pm_root = mpte)->pindex != pindex)
- mpte = NULL;
- }
- return (mpte);
-}
-
-/*
- * Removes the specified page table page from the specified pmap's collection
- * of idle page table pages. The specified page table page must be a member of
- * the pmap's collection.
- */
-static void
-pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
-{
- vm_page_t root;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (mpte != pmap->pm_root)
- vm_page_splay(mpte->pindex, pmap->pm_root);
- if (mpte->left == NULL)
- root = mpte->right;
- else {
- root = vm_page_splay(mpte->pindex, mpte->left);
- root->right = mpte->right;
+ vm_page_free_zero(m);
}
- pmap->pm_root = root;
}
/*
@@ -1671,10 +1354,16 @@
{
vm_offset_t pteva;
+ PT_UPDATES_FLUSH();
/*
* unmap the page table page
*/
- pmap->pm_pdir[m->pindex] = 0;
+ xen_pt_unpin(pmap->pm_pdir[m->pindex]);
+ /*
+ * page *might* contain residual mapping :-/
+ */
+ PD_CLEAR_VA(pmap, m->pindex, TRUE);
+ pmap_zero_page(m);
--pmap->pm_stats.resident_count;
/*
@@ -1695,7 +1384,8 @@
* Put page on a list so that it is released after
* *ALL* TLB shootdown is done
*/
- pmap_add_delayed_free_list(m, free, TRUE);
+ m->right = *free;
+ *free = m;
return (1);
}
@@ -1712,7 +1402,7 @@
if (va >= VM_MAXUSER_ADDRESS)
return (0);
- ptepde = *pmap_pde(pmap, va);
+ ptepde = PT_GET(pmap_pde(pmap, va));
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
return (pmap_unwire_pte_hold(pmap, mpte, free));
}
@@ -1734,7 +1424,6 @@
#ifdef PAE
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
- pmap->pm_root = NULL;
CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1748,10 +1437,14 @@
int
pmap_pinit(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
- vm_paddr_t pa;
+ vm_page_t m, ptdpg[NPGPTD + 1];
+ int npgptd = NPGPTD + 1;
int i;
+#ifdef HAMFISTED_LOCKING
+ mtx_lock(&createdelete_lock);
+#endif
+
PMAP_LOCK_INIT(pmap);
/*
@@ -1763,25 +1456,20 @@
NBPTD);
if (pmap->pm_pdir == NULL) {
PMAP_LOCK_DESTROY(pmap);
+#ifdef HAMFISTED_LOCKING
+ mtx_unlock(&createdelete_lock);
+#endif
return (0);
}
#ifdef PAE
- pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
- KASSERT(((vm_offset_t)pmap->pm_pdpt &
- ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
- ("pmap_pinit: pdpt misaligned"));
- KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
- ("pmap_pinit: pdpt above 4g"));
+ pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1);
#endif
- pmap->pm_root = NULL;
}
- KASSERT(pmap->pm_root == NULL,
- ("pmap_pinit: pmap has reserved page table page(s)"));
/*
* allocate the page directory page(s)
*/
- for (i = 0; i < NPGPTD;) {
+ for (i = 0; i < npgptd;) {
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (m == NULL)
@@ -1803,19 +1491,49 @@
bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
mtx_unlock_spin(&allpmaps_lock);
- /* install self-referential address mapping entry(s) */
- for (i = 0; i < NPGPTD; i++) {
- pa = VM_PAGE_TO_PHYS(ptdpg[i]);
- pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
#ifdef PAE
- pmap->pm_pdpt[i] = pa | PG_V;
+ pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
+ if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
+ bzero(pmap->pm_pdpt, PAGE_SIZE);
+ for (i = 0; i < NPGPTD; i++) {
+ vm_paddr_t ma;
+
+ ma = VM_PAGE_TO_MACH(ptdpg[i]);
+ pmap->pm_pdpt[i] = ma | PG_V;
+
+ }
+#endif
+ for (i = 0; i < NPGPTD; i++) {
+ pt_entry_t *pd;
+ vm_paddr_t ma;
+
+ ma = VM_PAGE_TO_MACH(ptdpg[i]);
+ pd = pmap->pm_pdir + (i * NPDEPG);
+ PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
+#if 0
+ xen_pgd_pin(ma);
+#endif
+ }
+
+#ifdef PAE
+ PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
#endif
+ vm_page_lock_queues();
+ xen_flush_queue();
+ xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
+ for (i = 0; i < NPGPTD; i++) {
+ vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
+ PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
}
-
+ xen_flush_queue();
+ vm_page_unlock_queues();
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+#ifdef HAMFISTED_LOCKING
+ mtx_unlock(&createdelete_lock);
+#endif
return (1);
}
@@ -1826,7 +1544,7 @@
static vm_page_t
_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
{
- vm_paddr_t ptepa;
+ vm_paddr_t ptema;
vm_page_t m;
KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
@@ -1862,10 +1580,13 @@
pmap->pm_stats.resident_count++;
- ptepa = VM_PAGE_TO_PHYS(m);
- pmap->pm_pdir[ptepindex] =
- (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
-
+ ptema = VM_PAGE_TO_MACH(m);
+ xen_pt_pin(ptema);
+ PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
+ (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
+
+ KASSERT(pmap->pm_pdir[ptepindex],
+ ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
return (m);
}
@@ -1873,7 +1594,7 @@
pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
{
u_int ptepindex;
- pd_entry_t ptepa;
+ pd_entry_t ptema;
vm_page_t m;
KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
@@ -1888,32 +1609,41 @@
/*
* Get the page directory entry
*/
- ptepa = pmap->pm_pdir[ptepindex];
+ ptema = pmap->pm_pdir[ptepindex];
/*
* This supports switching from a 4MB page to a
* normal 4K page.
*/
- if (ptepa & PG_PS) {
- (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
- ptepa = pmap->pm_pdir[ptepindex];
+ if (ptema & PG_PS) {
+ /*
+ * XXX
+ */
+ pmap->pm_pdir[ptepindex] = 0;
+ ptema = 0;
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ pmap_invalidate_all(kernel_pmap);
}
/*
* If the page table page is mapped, we just increment the
* hold count, and activate it.
*/
- if (ptepa) {
- m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
+ if (ptema & PG_V) {
+ m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
m->wire_count++;
} else {
/*
* Here if the pte page isn't mapped, or if it has
* been deallocated.
*/
+ CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
+ pmap, va, flags);
m = _pmap_allocpte(pmap, ptepindex, flags);
if (m == NULL && (flags & M_WAITOK))
goto retry;
+
+ KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
}
return (m);
}
@@ -2033,14 +1763,23 @@
void
pmap_release(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
+ vm_page_t m, ptdpg[2*NPGPTD+1];
+ vm_paddr_t ma;
int i;
+#ifdef PAE
+ int npgptd = NPGPTD + 1;
+#else
+ int npgptd = NPGPTD;
+#endif
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
- KASSERT(pmap->pm_root == NULL,
- ("pmap_release: pmap has reserved page table page(s)"));
+ PT_UPDATES_FLUSH();
+
+#ifdef HAMFISTED_LOCKING
+ mtx_lock(&createdelete_lock);
+#endif
pmap_lazyfix(pmap);
mtx_lock_spin(&allpmaps_lock);
@@ -2048,25 +1787,39 @@
mtx_unlock_spin(&allpmaps_lock);
for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
- PG_FRAME);
-
- bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
- sizeof(*pmap->pm_pdir));
-
+ ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
+#ifdef PAE
+ ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
+#endif
- for (i = 0; i < NPGPTD; i++) {
+ for (i = 0; i < npgptd; i++) {
m = ptdpg[i];
+ ma = VM_PAGE_TO_MACH(m);
+ /* unpinning L1 and L2 treated the same */
+#if 0
+ xen_pgd_unpin(ma);
+#else
+ if (i == NPGPTD)
+ xen_pgd_unpin(ma);
+#endif
#ifdef PAE
- KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
+ if (i < NPGPTD)
+ KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+ ("pmap_release: got wrong ptd page"));
#endif
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
- vm_page_free_zero(m);
+ vm_page_free(m);
}
+#ifdef PAE
+ pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
+#endif
PMAP_LOCK_DESTROY(pmap);
+
+#ifdef HAMFISTED_LOCKING
+ mtx_unlock(&createdelete_lock);
+#endif
}
static int
@@ -2095,11 +1848,24 @@
void
pmap_growkernel(vm_offset_t addr)
{
+ struct pmap *pmap;
vm_paddr_t ptppaddr;
vm_page_t nkpg;
pd_entry_t newpdir;
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+ if (kernel_vm_end == 0) {
+ kernel_vm_end = KERNBASE;
+ nkpt = 0;
+ while (pdir_pde(PTD, kernel_vm_end)) {
+ kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ nkpt++;
+ if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+ kernel_vm_end = kernel_map->max_offset;
+ break;
+ }
+ }
+ }
addr = roundup2(addr, NBPDR);
if (addr - 1 >= kernel_map->max_offset)
addr = kernel_map->max_offset;
@@ -2125,9 +1891,15 @@
pmap_zero_page(nkpg);
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
+ vm_page_lock_queues();
+ PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_FOREACH(pmap, &allpmaps, pm_list)
+ PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
+
+ mtx_unlock_spin(&allpmaps_lock);
+ vm_page_unlock_queues();
- pmap_kenter_pde(kernel_vm_end, newpdir);
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
kernel_vm_end = kernel_map->max_offset;
@@ -2205,7 +1977,6 @@
static void
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
{
- pd_entry_t *pde;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t next_pv, pv;
@@ -2225,9 +1996,6 @@
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
continue;
pmap->pm_stats.resident_count--;
- pde = pmap_pde(pmap, va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
- " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, va);
tpte = pte_load_clear(pte);
KASSERT((tpte & PG_W) == 0,
@@ -2245,8 +2013,7 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
- if (TAILQ_EMPTY(&m->md.pv_list) &&
- TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list))
+ if (TAILQ_EMPTY(&m->md.pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
sched_unpin();
@@ -2373,6 +2140,8 @@
PV_STAT(pc_chunk_allocs++);
pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
pmap_qenter((vm_offset_t)pc, &m, 1);
+ if ((m->flags & PG_ZERO) == 0)
+ pagezero(pc);
pc->pc_pmap = pmap;
pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
for (field = 1; field < _NPCM; field++)
@@ -2399,73 +2168,6 @@
}
static void
-pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
-{
- struct md_page *pvh;
- pv_entry_t pv;
- vm_offset_t va_last;
- vm_page_t m;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- KASSERT((pa & PDRMASK) == 0,
- ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
-
- /*
- * Transfer the 4mpage's pv entry for this mapping to the first
- * page's pv list.
- */
- pvh = pa_to_pvh(pa);
- va = trunc_4mpage(va);
- pv = pmap_pvh_remove(pvh, pmap, va);
- KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- /* Instantiate the remaining NPTEPG - 1 pv entries. */
- va_last = va + NBPDR - PAGE_SIZE;
- do {
- m++;
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_pv_demote_pde: page %p is not managed", m));
- va += PAGE_SIZE;
- pmap_insert_entry(pmap, va, m);
- } while (va < va_last);
-}
-
-static void
-pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
-{
- struct md_page *pvh;
- pv_entry_t pv;
- vm_offset_t va_last;
- vm_page_t m;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- KASSERT((pa & PDRMASK) == 0,
- ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
-
- /*
- * Transfer the first page's pv entry for this mapping to the
- * 4mpage's pv list. Aside from avoiding the cost of a call
- * to get_pv_entry(), a transfer avoids the possibility that
- * get_pv_entry() calls pmap_collect() and that pmap_collect()
- * removes one of the mappings that is being promoted.
- */
- m = PHYS_TO_VM_PAGE(pa);
- va = trunc_4mpage(va);
- pv = pmap_pvh_remove(&m->md, pmap, va);
- KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
- pvh = pa_to_pvh(pa);
- TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
- /* Free the remaining NPTEPG - 1 pv entries. */
- va_last = va + NBPDR - PAGE_SIZE;
- do {
- m++;
- va += PAGE_SIZE;
- pmap_pvh_free(&m->md, pmap, va);
- } while (va < va_last);
-}
-
-static void
pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
@@ -2478,31 +2180,11 @@
static void
pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
{
- struct md_page *pvh;
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
pmap_pvh_free(&m->md, pmap, va);
- if (TAILQ_EMPTY(&m->md.pv_list)) {
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- if (TAILQ_EMPTY(&pvh->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- }
-}
-
-/*
- * Create a pv entry for page at pa for
- * (pmap, va).
- */
-static void
-pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
}
/*
@@ -2525,243 +2207,6 @@
}
/*
- * Create the pv entries for each of the pages within a superpage.
- */
-static boolean_t
-pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
-{
- struct md_page *pvh;
- pv_entry_t pv;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
- pv->pv_va = va;
- pvh = pa_to_pvh(pa);
- TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-/*
- * Fills a page table page with mappings to consecutive physical pages.
- */
-static void
-pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
-{
- pt_entry_t *pte;
-
- for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
- *pte = newpte;
- newpte += PAGE_SIZE;
- }
-}
-
-/*
- * Tries to demote a 2- or 4MB page mapping. If demotion fails, the
- * 2- or 4MB page mapping is invalidated.
- */
-static boolean_t
-pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
-{
- pd_entry_t newpde, oldpde;
- pt_entry_t *firstpte, newpte;
- vm_paddr_t mptepa;
- vm_page_t free, mpte;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpde = *pde;
- KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
- ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
- mpte = pmap_lookup_pt_page(pmap, va);
- if (mpte != NULL)
- pmap_remove_pt_page(pmap, mpte);
- else {
- KASSERT((oldpde & PG_W) == 0,
- ("pmap_demote_pde: page table page for a wired mapping"
- " is missing"));
-
- /*
- * Invalidate the 2- or 4MB page mapping and return
- * "failure" if the mapping was never accessed or the
- * allocation of the new page table page fails.
- */
- if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
- va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
- VM_ALLOC_WIRED)) == NULL) {
- free = NULL;
- pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
- pmap_invalidate_page(pmap, trunc_4mpage(va));
- pmap_free_zero_pages(free);
- CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return (FALSE);
- }
- if (va < VM_MAXUSER_ADDRESS)
- pmap->pm_stats.resident_count++;
- }
- mptepa = VM_PAGE_TO_PHYS(mpte);
-
- /*
- * If the page mapping is in the kernel's address space, then the
- * KPTmap can provide access to the page table page. Otherwise,
- * temporarily map the page table page (mpte) into the kernel's
- * address space at either PADDR1 or PADDR2.
- */
- if (va >= KERNBASE)
- firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
- else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
- if ((*PMAP1 & PG_FRAME) != mptepa) {
- *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
-#ifdef SMP
- PMAP1cpu = PCPU_GET(cpuid);
-#endif
- invlcaddr(PADDR1);
- PMAP1changed++;
- } else
-#ifdef SMP
- if (PMAP1cpu != PCPU_GET(cpuid)) {
- PMAP1cpu = PCPU_GET(cpuid);
- invlcaddr(PADDR1);
- PMAP1changedcpu++;
- } else
-#endif
- PMAP1unchanged++;
- firstpte = PADDR1;
- } else {
- mtx_lock(&PMAP2mutex);
- if ((*PMAP2 & PG_FRAME) != mptepa) {
- *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
- }
- firstpte = PADDR2;
- }
- newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
- KASSERT((oldpde & PG_A) != 0,
- ("pmap_demote_pde: oldpde is missing PG_A"));
- KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
- ("pmap_demote_pde: oldpde is missing PG_M"));
- newpte = oldpde & ~PG_PS;
- if ((newpte & PG_PDE_PAT) != 0)
- newpte ^= PG_PDE_PAT | PG_PTE_PAT;
-
- /*
- * If the page table page is new, initialize it.
- */
- if (mpte->wire_count == 1) {
- mpte->wire_count = NPTEPG;
- pmap_fill_ptp(firstpte, newpte);
- }
- KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
- ("pmap_demote_pde: firstpte and newpte map different physical"
- " addresses"));
-
- /*
- * If the mapping has changed attributes, update the page table
- * entries.
- */
- if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
- pmap_fill_ptp(firstpte, newpte);
-
- /*
- * Demote the mapping. This pmap is locked. The old PDE has
- * PG_A set. If the old PDE has PG_RW set, it also has PG_M
- * set. Thus, there is no danger of a race with another
- * processor changing the setting of PG_A and/or PG_M between
- * the read above and the store below.
- */
- if (workaround_erratum383)
- pmap_update_pde(pmap, va, pde, newpde);
- else if (pmap == kernel_pmap)
- pmap_kenter_pde(va, newpde);
- else
- pde_store(pde, newpde);
- if (firstpte == PADDR2)
- mtx_unlock(&PMAP2mutex);
-
- /*
- * Invalidate the recursive mapping of the page table page.
- */
- pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
-
- /*
- * Demote the pv entry. This depends on the earlier demotion
- * of the mapping. Specifically, the (re)creation of a per-
- * page pv entry might trigger the execution of pmap_collect(),
- * which might reclaim a newly (re)created per-page pv entry
- * and destroy the associated mapping. In order to destroy
- * the mapping, the PDE must have already changed from mapping
- * the 2mpage to referencing the page table page.
- */
- if ((oldpde & PG_MANAGED) != 0)
- pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
-
- pmap_pde_demotions++;
- CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
- " in pmap %p", va, pmap);
- return (TRUE);
-}
-
-/*
- * pmap_remove_pde: do the things to unmap a superpage in a process
- */
-static void
-pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free)
-{
- struct md_page *pvh;
- pd_entry_t oldpde;
- vm_offset_t eva, va;
- vm_page_t m, mpte;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- KASSERT((sva & PDRMASK) == 0,
- ("pmap_remove_pde: sva is not 4mpage aligned"));
- oldpde = pte_load_clear(pdq);
- if (oldpde & PG_W)
- pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
-
- /*
- * Machines that don't support invlpg, also don't support
- * PG_G.
- */
- if (oldpde & PG_G)
- pmap_invalidate_page(kernel_pmap, sva);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- if (oldpde & PG_MANAGED) {
- pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
- pmap_pvh_free(pvh, pmap, sva);
- eva = sva + NBPDR;
- for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
- va < eva; va += PAGE_SIZE, m++) {
- if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if (oldpde & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- if (TAILQ_EMPTY(&m->md.pv_list) &&
- TAILQ_EMPTY(&pvh->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- }
- }
- if (pmap == kernel_pmap) {
- if (!pmap_demote_pde(pmap, pdq, sva))
- panic("pmap_remove_pde: failed demotion");
- } else {
- mpte = pmap_lookup_pt_page(pmap, sva);
- if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
- pmap->pm_stats.resident_count--;
- KASSERT(mpte->wire_count == NPTEPG,
- ("pmap_remove_pde: pte page wire count error"));
- mpte->wire_count = 0;
- pmap_add_delayed_free_list(mpte, free, FALSE);
- atomic_subtract_int(&cnt.v_wire_count, 1);
- }
- }
-}
-
-/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
@@ -2770,9 +2215,13 @@
pt_entry_t oldpte;
vm_page_t m;
+ CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
+ pmap, (u_long)*ptq, va);
+
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpte = pte_load_clear(ptq);
+ oldpte = *ptq;
+ PT_SET_VA_MA(ptq, 0, TRUE);
if (oldpte & PG_W)
pmap->pm_stats.wired_count -= 1;
/*
@@ -2783,7 +2232,7 @@
pmap_invalidate_page(kernel_pmap, va);
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
+ m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if (oldpte & PG_A)
@@ -2801,13 +2250,19 @@
{
pt_entry_t *pte;
+ CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
+ pmap, va);
+
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
+ if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
return;
pmap_remove_pte(pmap, pte, va, free);
pmap_invalidate_page(pmap, va);
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
+
}
/*
@@ -2825,6 +2280,9 @@
vm_page_t free = NULL;
int anyvalid;
+ CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
+ pmap, sva, eva);
+
/*
* Perform an unsynchronized read. This is, however, safe.
*/
@@ -2855,8 +2313,6 @@
* Calculate index for next page table.
*/
pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
if (pmap->pm_stats.resident_count == 0)
break;
@@ -2874,25 +2330,10 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- /*
- * Are we removing the entire large page? If not,
- * demote the mapping and fall through.
- */
- if (sva + NBPDR == pdnxt && eva >= pdnxt) {
- /*
- * The TLB entry for a PG_G mapping is
- * invalidated by pmap_remove_pde().
- */
- if ((ptpaddr & PG_G) == 0)
- anyvalid = 1;
- pmap_remove_pde(pmap,
- &pmap->pm_pdir[pdirindex], sva, &free);
- continue;
- } else if (!pmap_demote_pde(pmap,
- &pmap->pm_pdir[pdirindex], sva)) {
- /* The large page mapping was destroyed. */
- continue;
- }
+ PD_CLEAR_VA(pmap, pdirindex, TRUE);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ anyvalid = 1;
+ continue;
}
/*
@@ -2905,7 +2346,7 @@
for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
sva += PAGE_SIZE) {
- if (*pte == 0)
+ if ((*pte & PG_V) == 0)
continue;
/*
@@ -2918,10 +2359,13 @@
break;
}
}
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_VA_MA(PMAP1, 0, TRUE);
out:
- sched_unpin();
if (anyvalid)
pmap_invalidate_all(pmap);
+ sched_unpin();
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(free);
@@ -2943,12 +2387,9 @@
void
pmap_remove_all(vm_page_t m)
{
- struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
pt_entry_t *pte, tpte;
- pd_entry_t *pde;
- vm_offset_t va;
vm_page_t free;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
@@ -2956,24 +2397,13 @@
free = NULL;
vm_page_lock_queues();
sched_pin();
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- (void)pmap_demote_pde(pmap, pde, va);
- PMAP_UNLOCK(pmap);
- }
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pmap->pm_stats.resident_count--;
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
- " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
- tpte = pte_load_clear(pte);
+ tpte = *pte;
+ PT_SET_VA_MA(pte, 0, TRUE);
if (tpte & PG_W)
pmap->pm_stats.wired_count--;
if (tpte & PG_A)
@@ -2991,53 +2421,15 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
sched_unpin();
vm_page_unlock_queues();
pmap_free_zero_pages(free);
}
/*
- * pmap_protect_pde: do the things to protect a 4mpage in a process
- */
-static boolean_t
-pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
-{
- pd_entry_t newpde, oldpde;
- vm_offset_t eva, va;
- vm_page_t m;
- boolean_t anychanged;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- KASSERT((sva & PDRMASK) == 0,
- ("pmap_protect_pde: sva is not 4mpage aligned"));
- anychanged = FALSE;
-retry:
- oldpde = newpde = *pde;
- if (oldpde & PG_MANAGED) {
- eva = sva + NBPDR;
- for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
- va < eva; va += PAGE_SIZE, m++)
- if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- }
- if ((prot & VM_PROT_WRITE) == 0)
- newpde &= ~(PG_RW | PG_M);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- newpde |= pg_nx;
-#endif
- if (newpde != oldpde) {
- if (!pde_cmpset(pde, oldpde, newpde))
- goto retry;
- if (oldpde & PG_G)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = TRUE;
- }
- return (anychanged);
-}
-
-/*
* Set the physical protection on the
* specified range of this map as requested.
*/
@@ -3049,6 +2441,9 @@
pt_entry_t *pte;
int anychanged;
+ CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
+ pmap, sva, eva, prot);
+
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
@@ -3073,8 +2468,6 @@
u_int pdirindex;
pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
pdirindex = sva >> PDRSHIFT;
ptpaddr = pmap->pm_pdir[pdirindex];
@@ -3090,24 +2483,14 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- /*
- * Are we protecting the entire large page? If not,
- * demote the mapping and fall through.
- */
- if (sva + NBPDR == pdnxt && eva >= pdnxt) {
- /*
- * The TLB entry for a PG_G mapping is
- * invalidated by pmap_protect_pde().
- */
- if (pmap_protect_pde(pmap,
- &pmap->pm_pdir[pdirindex], sva, prot))
- anychanged = 1;
- continue;
- } else if (!pmap_demote_pde(pmap,
- &pmap->pm_pdir[pdirindex], sva)) {
- /* The large page mapping was destroyed. */
- continue;
- }
+ if ((prot & VM_PROT_WRITE) == 0)
+ pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ pmap->pm_pdir[pdirindex] |= pg_nx;
+#endif
+ anychanged = 1;
+ continue;
}
if (pdnxt > eva)
@@ -3130,7 +2513,8 @@
if ((prot & VM_PROT_WRITE) == 0) {
if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
(PG_MANAGED | PG_M | PG_RW)) {
- m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
+ m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
+ PG_FRAME);
vm_page_dirty(m);
}
pbits &= ~(PG_RW | PG_M);
@@ -3141,14 +2525,10 @@
#endif
if (pbits != obits) {
-#ifdef PAE
- if (!atomic_cmpset_64(pte, obits, pbits))
+ obits = *pte;
+ PT_SET_VA_MA(pte, pbits, TRUE);
+ if (*pte != pbits)
goto retry;
-#else
- if (!atomic_cmpset_int((u_int *)pte, obits,
- pbits))
- goto retry;
-#endif
if (obits & PG_G)
pmap_invalidate_page(pmap, sva);
else
@@ -3156,145 +2536,17 @@
}
}
}
- sched_unpin();
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_VA_MA(PMAP1, 0, TRUE);
if (anychanged)
pmap_invalidate_all(pmap);
+ sched_unpin();
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
}
/*
- * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
- * within a single page table page (PTP) to a single 2- or 4MB page mapping.
- * For promotion to occur, two conditions must be met: (1) the 4KB page
- * mappings must map aligned, contiguous physical memory and (2) the 4KB page
- * mappings must have identical characteristics.
- *
- * Managed (PG_MANAGED) mappings within the kernel address space are not
- * promoted. The reason is that kernel PDEs are replicated in each pmap but
- * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
- * pmap.
- */
-static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
-{
- pd_entry_t newpde;
- pt_entry_t *firstpte, oldpte, pa, *pte;
- vm_offset_t oldpteva;
- vm_page_t mpte;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
- /*
- * Examine the first PTE in the specified PTP. Abort if this PTE is
- * either invalid, unused, or does not map the first 4KB physical page
- * within a 2- or 4MB page.
- */
- firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
-setpde:
- newpde = *firstpte;
- if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
- pmap_pde_p_failures++;
- CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return;
- }
- if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
- pmap_pde_p_failures++;
- CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return;
- }
- if ((newpde & (PG_M | PG_RW)) == PG_RW) {
- /*
- * When PG_M is already clear, PG_RW can be cleared without
- * a TLB invalidation.
- */
- if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde &
- ~PG_RW))
- goto setpde;
- newpde &= ~PG_RW;
- }
-
- /*
- * Examine each of the other PTEs in the specified PTP. Abort if this
- * PTE maps an unexpected 4KB physical page or does not have identical
- * characteristics to the first PTE.
- */
- pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
- for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
-setpte:
- oldpte = *pte;
- if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
- pmap_pde_p_failures++;
- CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return;
- }
- if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
- /*
- * When PG_M is already clear, PG_RW can be cleared
- * without a TLB invalidation.
- */
- if (!atomic_cmpset_int((u_int *)pte, oldpte,
- oldpte & ~PG_RW))
- goto setpte;
- oldpte &= ~PG_RW;
- oldpteva = (oldpte & PG_FRAME & PDRMASK) |
- (va & ~PDRMASK);
- CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x"
- " in pmap %p", oldpteva, pmap);
- }
- if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
- pmap_pde_p_failures++;
- CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return;
- }
- pa -= PAGE_SIZE;
- }
-
- /*
- * Save the page table page in its current state until the PDE
- * mapping the superpage is demoted by pmap_demote_pde() or
- * destroyed by pmap_remove_pde().
- */
- mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
- KASSERT(mpte >= vm_page_array &&
- mpte < &vm_page_array[vm_page_array_size],
- ("pmap_promote_pde: page table page is out of range"));
- KASSERT(mpte->pindex == va >> PDRSHIFT,
- ("pmap_promote_pde: page table page's pindex is wrong"));
- pmap_insert_pt_page(pmap, mpte);
-
- /*
- * Promote the pv entries.
- */
- if ((newpde & PG_MANAGED) != 0)
- pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
-
- /*
- * Propagate the PAT index to its proper position.
- */
- if ((newpde & PG_PTE_PAT) != 0)
- newpde ^= PG_PDE_PAT | PG_PTE_PAT;
-
- /*
- * Map the superpage.
- */
- if (workaround_erratum383)
- pmap_update_pde(pmap, va, pde, PG_PS | newpde);
- else if (pmap == kernel_pmap)
- pmap_kenter_pde(va, PG_PS | newpde);
- else
- pde_store(pde, PG_PS | newpde);
-
- pmap_pde_promotions++;
- CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
- " in pmap %p", va, pmap);
-}
-
-/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
@@ -3318,6 +2570,8 @@
vm_page_t mpte, om;
boolean_t invlva;
+ CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d",
+ pmap, va, access, VM_PAGE_TO_MACH(m), prot, wired);
va = trunc_page(va);
KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
@@ -3351,12 +2605,20 @@
*/
if (pte == NULL) {
panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
- (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
+ (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
}
pa = VM_PAGE_TO_PHYS(m);
om = NULL;
+ opa = origpte = 0;
+
+#if 0
+ KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
+ pte, *pte));
+#endif
origpte = *pte;
+ if (origpte)
+ origpte = xpmap_mtop(origpte);
opa = origpte & PG_FRAME;
/*
@@ -3399,7 +2661,9 @@
if (origpte & PG_MANAGED) {
om = PHYS_TO_VM_PAGE(opa);
pv = pmap_pvh_remove(&om->md, pmap, va);
- }
+ } else if (va < VM_MAXUSER_ADDRESS)
+ printf("va=0x%x is unmanaged :-( \n", va);
+
if (mpte != NULL) {
mpte->wire_count--;
KASSERT(mpte->wire_count > 0,
@@ -3433,7 +2697,7 @@
/*
* Now validate mapping with desired protection/wiring.
*/
- newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
+ newpte = (pt_entry_t)(pa | PG_V);
if ((prot & VM_PROT_WRITE) != 0) {
newpte |= PG_RW;
if ((newpte & PG_MANAGED) != 0)
@@ -3450,17 +2714,16 @@
if (pmap == kernel_pmap)
newpte |= pgeflag;
+ critical_enter();
/*
* if the mapping or permission bits are different, we need
* to update the pte.
*/
if ((origpte & ~(PG_M|PG_A)) != newpte) {
- newpte |= PG_A;
- if ((access & VM_PROT_WRITE) != 0)
- newpte |= PG_M;
- if (origpte & PG_V) {
+ if (origpte) {
invlva = FALSE;
- origpte = pte_load_store(pte, newpte);
+ origpte = *pte;
+ PT_SET_VA(pte, newpte | PG_A, FALSE);
if (origpte & PG_A) {
if (origpte & PG_MANAGED)
vm_page_aflag_set(om, PGA_REFERENCED);
@@ -3479,85 +2742,25 @@
invlva = TRUE;
}
if ((origpte & PG_MANAGED) != 0 &&
- TAILQ_EMPTY(&om->md.pv_list) &&
- TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))
+ TAILQ_EMPTY(&om->md.pv_list))
vm_page_aflag_clear(om, PGA_WRITEABLE);
if (invlva)
pmap_invalidate_page(pmap, va);
- } else
- pte_store(pte, newpte);
+ } else{
+ PT_SET_VA(pte, newpte | PG_A, FALSE);
+ }
+
}
-
- /*
- * If both the page table page and the reservation are fully
- * populated, then attempt promotion.
- */
- if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
- pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0)
- pmap_promote_pde(pmap, pde, va);
-
+ PT_UPDATES_FLUSH();
+ critical_exit();
+ if (*PMAP1)
+ PT_SET_VA_MA(PMAP1, 0, TRUE);
sched_unpin();
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
}
/*
- * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and
- * FALSE otherwise. Fails if (1) a page table page cannot be allocated without
- * blocking, (2) a mapping already exists at the specified virtual address, or
- * (3) a pv entry cannot be allocated without reclaiming another pv entry.
- */
-static boolean_t
-pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
-{
- pd_entry_t *pde, newpde;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- pde = pmap_pde(pmap, va);
- if (*pde != 0) {
- CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
- " in pmap %p", va, pmap);
- return (FALSE);
- }
- newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
- PG_PS | PG_V;
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- newpde |= PG_MANAGED;
-
- /*
- * Abort this mapping if its PV entry could not be created.
- */
- if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
- CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
- " in pmap %p", va, pmap);
- return (FALSE);
- }
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- newpde |= pg_nx;
-#endif
- if (va < VM_MAXUSER_ADDRESS)
- newpde |= PG_U;
-
- /*
- * Increment counters.
- */
- pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
-
- /*
- * Map the superpage.
- */
- pde_store(pde, newpde);
-
- pmap_pde_mappings++;
- CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
- " in pmap %p", va, pmap);
- return (TRUE);
-}
-
-/*
* Maps a sequence of resident pages belonging to the same object.
* The sequence begins with the given page m_start. This page is
* mapped at the given virtual address start. Each subsequent page is
@@ -3573,9 +2776,11 @@
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
- vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
+ multicall_entry_t mcl[16];
+ multicall_entry_t *mclp = mcl;
+ int error, count = 0;
VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
psize = atop(end - start);
@@ -3584,16 +2789,19 @@
vm_page_lock_queues();
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
- va = start + ptoa(diff);
- if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
- (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
- pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
- pmap_enter_pde(pmap, va, m, prot))
- m = &m[NBPDR / PAGE_SIZE - 1];
- else
- mpte = pmap_enter_quick_locked(pmap, va, m, prot,
- mpte);
+ mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
+ prot, mpte);
m = TAILQ_NEXT(m, listq);
+ if (count == 16) {
+ error = HYPERVISOR_multicall(mcl, count);
+ KASSERT(error == 0, ("bad multicall %d", error));
+ mclp = mcl;
+ count = 0;
+ }
+ }
+ if (count) {
+ error = HYPERVISOR_multicall(mcl, count);
+ KASSERT(error == 0, ("bad multicall %d", error));
}
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
@@ -3611,21 +2819,60 @@
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
+ multicall_entry_t mcl, *mclp;
+ int count = 0;
+ mclp = &mcl;
+ CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
+ pmap, va, m, prot);
+
vm_page_lock_queues();
PMAP_LOCK(pmap);
- (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
+ (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
+ if (count)
+ HYPERVISOR_multicall(&mcl, count);
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
}
+#ifdef notyet
+void
+pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
+{
+ int i, error, index = 0;
+ multicall_entry_t mcl[16];
+ multicall_entry_t *mclp = mcl;
+
+ PMAP_LOCK(pmap);
+ for (i = 0; i < count; i++, addrs++, pages++, prots++) {
+ if (!pmap_is_prefaultable_locked(pmap, *addrs))
+ continue;
+
+ (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
+ if (index == 16) {
+ error = HYPERVISOR_multicall(mcl, index);
+ mclp = mcl;
+ index = 0;
+ KASSERT(error == 0, ("bad multicall %d", error));
+ }
+ }
+ if (index) {
+ error = HYPERVISOR_multicall(mcl, index);
+ KASSERT(error == 0, ("bad multicall %d", error));
+ }
+
+ PMAP_UNLOCK(pmap);
+}
+#endif
+
static vm_page_t
-pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
+pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, vm_page_t mpte)
{
pt_entry_t *pte;
vm_paddr_t pa;
vm_page_t free;
+ multicall_entry_t *mcl = *mclpp;
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
@@ -3639,7 +2886,7 @@
*/
if (va < VM_MAXUSER_ADDRESS) {
u_int ptepindex;
- pd_entry_t ptepa;
+ pd_entry_t ptema;
/*
* Calculate pagetable page index
@@ -3651,16 +2898,16 @@
/*
* Get the page directory entry
*/
- ptepa = pmap->pm_pdir[ptepindex];
+ ptema = pmap->pm_pdir[ptepindex];
/*
* If the page table page is mapped, we just increment
* the hold count, and activate it.
*/
- if (ptepa) {
- if (ptepa & PG_PS)
- return (NULL);
- mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
+ if (ptema & PG_V) {
+ if (ptema & PG_PS)
+ panic("pmap_enter_quick: unexpected mapping into 4MB page");
+ mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
mpte->wire_count++;
} else {
mpte = _pmap_allocpte(pmap, ptepindex,
@@ -3679,8 +2926,9 @@
* quick entry into any pmap, one would likely use pmap_pte_quick.
* But that isn't as quick as vtopte.
*/
+ KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
pte = vtopte(va);
- if (*pte) {
+ if (*pte & PG_V) {
if (mpte != NULL) {
mpte->wire_count--;
mpte = NULL;
@@ -3710,12 +2958,13 @@
*/
pmap->pm_stats.resident_count++;
- pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
+ pa = VM_PAGE_TO_PHYS(m);
#ifdef PAE
if ((prot & VM_PROT_EXECUTE) == 0)
pa |= pg_nx;
#endif
+#if 0
/*
* Now validate mapping with RO protection
*/
@@ -3723,6 +2972,23 @@
pte_store(pte, pa | PG_V | PG_U);
else
pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+#else
+ /*
+ * Now validate mapping with RO protection
+ */
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ pa = xpmap_ptom(pa | PG_V | PG_U);
+ else
+ pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
+
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = va;
+ mcl->args[1] = (uint32_t)(pa & 0xffffffff);
+ mcl->args[2] = (uint32_t)(pa >> 32);
+ mcl->args[3] = 0;
+ *mclpp = mcl + 1;
+ *count = *count + 1;
+#endif
return (mpte);
}
@@ -3734,9 +3000,10 @@
pmap_kenter_temporary(vm_paddr_t pa, int i)
{
vm_offset_t va;
+ vm_paddr_t ma = xpmap_ptom(pa);
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
- pmap_kenter(va, pa);
+ PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
invlpg(va);
return ((void *)crashdumpmap);
}
@@ -3824,46 +3091,27 @@
void
pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
{
- pd_entry_t *pde;
pt_entry_t *pte;
- boolean_t are_queues_locked;
- are_queues_locked = FALSE;
-retry:
+ vm_page_lock_queues();
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_PS) != 0) {
- if (!wired != ((*pde & PG_W) == 0)) {
- if (!are_queues_locked) {
- are_queues_locked = TRUE;
- if (!mtx_trylock(&vm_page_queue_mtx)) {
- PMAP_UNLOCK(pmap);
- vm_page_lock_queues();
- goto retry;
- }
- }
- if (!pmap_demote_pde(pmap, pde, va))
- panic("pmap_change_wiring: demotion failed");
- } else
- goto out;
- }
pte = pmap_pte(pmap, va);
- if (wired && !pmap_pte_w(pte))
+ if (wired && !pmap_pte_w(pte)) {
+ PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE);
pmap->pm_stats.wired_count++;
- else if (!wired && pmap_pte_w(pte))
+ } else if (!wired && pmap_pte_w(pte)) {
+ PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE);
pmap->pm_stats.wired_count--;
-
+ }
+
/*
* Wiring is not a hardware characteristic so there is no need to
* invalidate TLB.
*/
- pmap_pte_set_w(pte, wired);
pmap_pte_release(pte);
-out:
- if (are_queues_locked)
- vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
+ vm_page_unlock_queues();
}
@@ -3888,8 +3136,19 @@
if (dst_addr != src_addr)
return;
- if (!pmap_is_current(src_pmap))
+ if (!pmap_is_current(src_pmap)) {
+ CTR2(KTR_PMAP,
+ "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
+ (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
+
return;
+ }
+ CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
+ dst_pmap, src_pmap, dst_addr, len, src_addr);
+
+#ifdef HAMFISTED_LOCKING
+ mtx_lock(&createdelete_lock);
+#endif
vm_page_lock_queues();
if (dst_pmap < src_pmap) {
@@ -3910,21 +3169,15 @@
("pmap_copy: invalid to pmap_copy page tables"));
pdnxt = (addr + NBPDR) & ~PDRMASK;
- if (pdnxt < addr)
- pdnxt = end_addr;
ptepindex = addr >> PDRSHIFT;
- srcptepaddr = src_pmap->pm_pdir[ptepindex];
+ srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
if (srcptepaddr == 0)
continue;
if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0 &&
- ((srcptepaddr & PG_MANAGED) == 0 ||
- pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
- PG_PS_FRAME))) {
- dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
- ~PG_W;
+ if (dst_pmap->pm_pdir[ptepindex] == 0) {
+ PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
dst_pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
}
@@ -3953,14 +3206,17 @@
dst_pte = pmap_pte_quick(dst_pmap, addr);
if (*dst_pte == 0 &&
pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
+ PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
/*
* Clear the wired, modified, and
* accessed (referenced) bits
* during the copy.
*/
- *dst_pte = ptetemp & ~(PG_W | PG_M |
- PG_A);
+ KASSERT(ptetemp != 0, ("src_pte not set"));
+ PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
+ KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
+ ("no pmap copy expected: 0x%jx saw: 0x%jx",
+ ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte));
dst_pmap->pm_stats.resident_count++;
} else {
free = NULL;
@@ -3980,10 +3236,15 @@
}
}
out:
+ PT_UPDATES_FLUSH();
sched_unpin();
vm_page_unlock_queues();
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
+
+#ifdef HAMFISTED_LOCKING
+ mtx_unlock(&createdelete_lock);
+#endif
}
static __inline void
@@ -4016,11 +3277,9 @@
if (*sysmaps->CMAP2)
panic("pmap_zero_page: CMAP2 busy");
sched_pin();
- *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0);
- invlcaddr(sysmaps->CADDR2);
+ PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
pagezero(sysmaps->CADDR2);
- *sysmaps->CMAP2 = 0;
+ PT_SET_MA(sysmaps->CADDR2, 0);
sched_unpin();
mtx_unlock(&sysmaps->lock);
}
@@ -4041,14 +3300,13 @@
if (*sysmaps->CMAP2)
panic("pmap_zero_page_area: CMAP2 busy");
sched_pin();
- *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0);
- invlcaddr(sysmaps->CADDR2);
+ PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
+
if (off == 0 && size == PAGE_SIZE)
pagezero(sysmaps->CADDR2);
else
bzero((char *)sysmaps->CADDR2 + off, size);
- *sysmaps->CMAP2 = 0;
+ PT_SET_MA(sysmaps->CADDR2, 0);
sched_unpin();
mtx_unlock(&sysmaps->lock);
}
@@ -4066,11 +3324,9 @@
if (*CMAP3)
panic("pmap_zero_page_idle: CMAP3 busy");
sched_pin();
- *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0);
- invlcaddr(CADDR3);
+ PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
pagezero(CADDR3);
- *CMAP3 = 0;
+ PT_SET_MA(CADDR3, 0);
sched_unpin();
}
@@ -4092,15 +3348,11 @@
if (*sysmaps->CMAP2)
panic("pmap_copy_page: CMAP2 busy");
sched_pin();
- invlpg((u_int)sysmaps->CADDR1);
- invlpg((u_int)sysmaps->CADDR2);
- *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
- pmap_cache_bits(src->md.pat_mode, 0);
- *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
- pmap_cache_bits(dst->md.pat_mode, 0);
+ PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
+ PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
- *sysmaps->CMAP1 = 0;
- *sysmaps->CMAP2 = 0;
+ PT_SET_MA(sysmaps->CADDR1, 0);
+ PT_SET_MA(sysmaps->CADDR2, 0);
sched_unpin();
mtx_unlock(&sysmaps->lock);
}
@@ -4115,7 +3367,6 @@
boolean_t
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
- struct md_page *pvh;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
@@ -4133,18 +3384,6 @@
if (loops >= 16)
break;
}
- if (!rv && loops < 16) {
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
- if (PV_PMAP(pv) == pmap) {
- rv = TRUE;
- break;
- }
- loops++;
- if (loops >= 16)
- break;
- }
- }
vm_page_unlock_queues();
return (rv);
}
@@ -4158,33 +3397,17 @@
int
pmap_page_wired_mappings(vm_page_t m)
{
+ pv_entry_t pv;
+ pt_entry_t *pte;
+ pmap_t pmap;
int count;
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
vm_page_lock_queues();
- count = pmap_pvh_wired_mappings(&m->md, count);
- count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count);
- vm_page_unlock_queues();
- return (count);
-}
-
-/*
- * pmap_pvh_wired_mappings:
- *
- * Return the updated number "count" of managed mappings that are wired.
- */
-static int
-pmap_pvh_wired_mappings(struct md_page *pvh, int count)
-{
- pmap_t pmap;
- pt_entry_t *pte;
- pv_entry_t pv;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4193,25 +3416,20 @@
PMAP_UNLOCK(pmap);
}
sched_unpin();
+ vm_page_unlock_queues();
return (count);
}
/*
- * Returns TRUE if the given page is mapped individually or as part of
- * a 4mpage. Otherwise, returns FALSE.
+ * Returns TRUE if the given page is mapped. Otherwise, returns FALSE.
*/
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
- boolean_t rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- vm_page_lock_queues();
- rv = !TAILQ_EMPTY(&m->md.pv_list) ||
- !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
- vm_page_unlock_queues();
- return (rv);
+ return (!TAILQ_EMPTY(&m->md.pv_list));
}
/*
@@ -4226,21 +3444,22 @@
pmap_remove_pages(pmap_t pmap)
{
pt_entry_t *pte, tpte;
- vm_page_t free = NULL;
- vm_page_t m, mpte, mt;
+ vm_page_t m, free = NULL;
pv_entry_t pv;
- struct md_page *pvh;
struct pv_chunk *pc, *npc;
int field, idx;
int32_t bit;
uint32_t inuse, bitmask;
int allfree;
- if (pmap != PCPU_GET(curpmap)) {
+ CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
+
+ if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
}
vm_page_lock_queues();
+ KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
PMAP_LOCK(pmap);
sched_pin();
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@@ -4254,12 +3473,8 @@
pv = &pc->pc_pventry[idx];
inuse &= ~bitmask;
- pte = pmap_pde(pmap, pv->pv_va);
- tpte = *pte;
- if ((tpte & PG_PS) == 0) {
- pte = vtopte(pv->pv_va);
- tpte = *pte & ~PG_PTE_PAT;
- }
+ pte = vtopte(pv->pv_va);
+ tpte = *pte ? xpmap_mtop(*pte) : 0;
if (tpte == 0) {
printf(
@@ -4286,55 +3501,30 @@
("pmap_remove_pages: bad tpte %#jx",
(uintmax_t)tpte));
- pte_clear(pte);
+ PT_CLEAR_VA(pte, FALSE);
+
/*
* Update the vm_page_t clean/reference bits.
*/
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((tpte & PG_PS) != 0) {
- for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
- vm_page_dirty(mt);
- } else
- vm_page_dirty(m);
- }
+ if (tpte & PG_M)
+ vm_page_dirty(m);
+
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+
+ pmap_unuse_pt(pmap, pv->pv_va, &free);
/* Mark free */
PV_STAT(pv_entry_frees++);
PV_STAT(pv_entry_spare++);
pv_entry_count--;
pc->pc_map[field] |= bitmask;
- if ((tpte & PG_PS) != 0) {
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pvh = pa_to_pvh(tpte & PG_PS_FRAME);
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&pvh->pv_list)) {
- for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
- if (TAILQ_EMPTY(&mt->md.pv_list))
- vm_page_aflag_clear(mt, PGA_WRITEABLE);
- }
- mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
- if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
- pmap->pm_stats.resident_count--;
- KASSERT(mpte->wire_count == NPTEPG,
- ("pmap_remove_pages: pte page wire count error"));
- mpte->wire_count = 0;
- pmap_add_delayed_free_list(mpte, &free, FALSE);
- atomic_subtract_int(&cnt.v_wire_count, 1);
- }
- } else {
- pmap->pm_stats.resident_count--;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list)) {
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- if (TAILQ_EMPTY(&pvh->pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- }
- pmap_unuse_pt(pmap, pv->pv_va, &free);
- }
+ pmap->pm_stats.resident_count--;
}
}
+ PT_UPDATES_FLUSH();
if (allfree) {
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
@@ -4347,6 +3537,10 @@
pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
}
}
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
+
sched_unpin();
pmap_invalidate_all(pmap);
vm_page_unlock_queues();
@@ -4363,10 +3557,14 @@
boolean_t
pmap_is_modified(vm_page_t m)
{
+ pv_entry_t pv;
+ pt_entry_t *pte;
+ pmap_t pmap;
boolean_t rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_modified: page %p is not managed", m));
+ rv = FALSE;
/*
* If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
@@ -4376,40 +3574,22 @@
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
- return (FALSE);
+ return (rv);
vm_page_lock_queues();
- rv = pmap_is_modified_pvh(&m->md) ||
- pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
- vm_page_unlock_queues();
- return (rv);
-}
-
-/*
- * Returns TRUE if any of the given mappings were used to modify
- * physical memory. Otherwise, returns FALSE. Both page and 2mpage
- * mappings are supported.
- */
-static boolean_t
-pmap_is_modified_pvh(struct md_page *pvh)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- rv = FALSE;
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
+ rv = (*pte & PG_M) != 0;
PMAP_UNLOCK(pmap);
if (rv)
break;
}
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
sched_unpin();
+ vm_page_unlock_queues();
return (rv);
}
@@ -4419,60 +3599,46 @@
* Return whether or not the specified virtual address is elgible
* for prefault.
*/
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+static boolean_t
+pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
{
- pd_entry_t *pde;
pt_entry_t *pte;
- boolean_t rv;
+ boolean_t rv = FALSE;
- rv = FALSE;
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, addr);
- if (*pde != 0 && (*pde & PG_PS) == 0) {
+ return (rv);
+
+ if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
pte = vtopte(addr);
- rv = *pte == 0;
+ rv = (*pte == 0);
}
- PMAP_UNLOCK(pmap);
return (rv);
}
-/*
- * pmap_is_referenced:
- *
- * Return whether or not the specified physical page was referenced
- * in any physical maps.
- */
boolean_t
-pmap_is_referenced(vm_page_t m)
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
{
boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_referenced: page %p is not managed", m));
- vm_page_lock_queues();
- rv = pmap_is_referenced_pvh(&m->md) ||
- pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)));
- vm_page_unlock_queues();
+
+ PMAP_LOCK(pmap);
+ rv = pmap_is_prefaultable_locked(pmap, addr);
+ PMAP_UNLOCK(pmap);
return (rv);
}
-/*
- * Returns TRUE if any of the given mappings were referenced and FALSE
- * otherwise. Both page and 4mpage mappings are supported.
- */
-static boolean_t
-pmap_is_referenced_pvh(struct md_page *pvh)
+boolean_t
+pmap_is_referenced(vm_page_t m)
{
pv_entry_t pv;
pt_entry_t *pte;
pmap_t pmap;
boolean_t rv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_is_referenced: page %p is not managed", m));
rv = FALSE;
+ vm_page_lock_queues();
sched_pin();
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
@@ -4481,22 +3647,52 @@
if (rv)
break;
}
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
sched_unpin();
+ vm_page_unlock_queues();
return (rv);
}
+void
+pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
+{
+ int i, npages = round_page(len) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ pt_entry_t *pte;
+ pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+ vm_page_lock_queues();
+ pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
+ vm_page_unlock_queues();
+ PMAP_MARK_PRIV(xpmap_mtop(*pte));
+ pmap_pte_release(pte);
+ }
+}
+
+void
+pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
+{
+ int i, npages = round_page(len) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ pt_entry_t *pte;
+ pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+ PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
+ vm_page_lock_queues();
+ pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
+ vm_page_unlock_queues();
+ pmap_pte_release(pte);
+ }
+}
+
/*
* Clear the write and modified bits in each of the given page's mappings.
*/
void
pmap_remove_write(vm_page_t m)
{
- struct md_page *pvh;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
pmap_t pmap;
- pd_entry_t *pde;
pt_entry_t oldpte, *pte;
- vm_offset_t va;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_write: page %p is not managed", m));
@@ -4512,34 +3708,24 @@
return;
vm_page_lock_queues();
sched_pin();
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_RW) != 0)
- (void)pmap_demote_pde(pmap, pde, va);
- PMAP_UNLOCK(pmap);
- }
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
- " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
retry:
oldpte = *pte;
if ((oldpte & PG_RW) != 0) {
+ vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
+
/*
* Regardless of whether a pte is 32 or 64 bits
* in size, PG_RW and PG_M are among the least
* significant 32 bits.
*/
- if (!atomic_cmpset_int((u_int *)pte, oldpte,
- oldpte & ~(PG_RW | PG_M)))
+ PT_SET_VA_MA(pte, newpte, TRUE);
+ if (*pte != newpte)
goto retry;
+
if ((oldpte & PG_M) != 0)
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va);
@@ -4547,6 +3733,9 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
sched_unpin();
vm_page_unlock_queues();
}
@@ -4566,49 +3755,15 @@
int
pmap_ts_referenced(vm_page_t m)
{
- struct md_page *pvh;
pv_entry_t pv, pvf, pvn;
pmap_t pmap;
- pd_entry_t oldpde, *pde;
pt_entry_t *pte;
- vm_offset_t va;
int rtval = 0;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
vm_page_lock_queues();
sched_pin();
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- oldpde = *pde;
- if ((oldpde & PG_A) != 0) {
- if (pmap_demote_pde(pmap, pde, va)) {
- if ((oldpde & PG_W) == 0) {
- /*
- * Remove the mapping to a single page
- * so that a subsequent access may
- * repromote. Since the underlying
- * page table page is fully populated,
- * this removal never frees a page
- * table page.
- */
- va += VM_PAGE_TO_PHYS(m) - (oldpde &
- PG_PS_FRAME);
- pmap_remove_page(pmap, va, NULL);
- rtval++;
- if (rtval > 4) {
- PMAP_UNLOCK(pmap);
- goto out;
- }
- }
- }
- }
- PMAP_UNLOCK(pmap);
- }
if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pvf = pv;
do {
@@ -4617,12 +3772,9 @@
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
- " found a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
if ((*pte & PG_A) != 0) {
- atomic_clear_int((u_int *)pte, PG_A);
+ PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
pmap_invalidate_page(pmap, pv->pv_va);
rtval++;
if (rtval > 4)
@@ -4631,7 +3783,9 @@
PMAP_UNLOCK(pmap);
} while ((pv = pvn) != NULL && pv != pvf);
}
-out:
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_MA(PADDR1, 0);
sched_unpin();
vm_page_unlock_queues();
return (rtval);
@@ -4643,12 +3797,9 @@
void
pmap_clear_modify(vm_page_t m)
{
- struct md_page *pvh;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
pmap_t pmap;
- pd_entry_t oldpde, *pde;
- pt_entry_t oldpte, *pte;
- vm_offset_t va;
+ pt_entry_t *pte;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_modify: page %p is not managed", m));
@@ -4665,49 +3816,9 @@
return;
vm_page_lock_queues();
sched_pin();
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- oldpde = *pde;
- if ((oldpde & PG_RW) != 0) {
- if (pmap_demote_pde(pmap, pde, va)) {
- if ((oldpde & PG_W) == 0) {
- /*
- * Write protect the mapping to a
- * single page so that a subsequent
- * write access may repromote.
- */
- va += VM_PAGE_TO_PHYS(m) - (oldpde &
- PG_PS_FRAME);
- pte = pmap_pte_quick(pmap, va);
- oldpte = *pte;
- if ((oldpte & PG_V) != 0) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
- while (!atomic_cmpset_int((u_int *)pte,
- oldpte,
- oldpte & ~(PG_M | PG_RW)))
- oldpte = *pte;
- vm_page_dirty(m);
- pmap_invalidate_page(pmap, va);
- }
- }
- }
- }
- PMAP_UNLOCK(pmap);
- }
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
- " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
/*
@@ -4715,7 +3826,7 @@
* in size, PG_M is among the least significant
* 32 bits.
*/
- atomic_clear_int((u_int *)pte, PG_M);
+ PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
pmap_invalidate_page(pmap, pv->pv_va);
}
PMAP_UNLOCK(pmap);
@@ -4732,46 +3843,17 @@
void
pmap_clear_reference(vm_page_t m)
{
- struct md_page *pvh;
- pv_entry_t next_pv, pv;
+ pv_entry_t pv;
pmap_t pmap;
- pd_entry_t oldpde, *pde;
pt_entry_t *pte;
- vm_offset_t va;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_reference: page %p is not managed", m));
vm_page_lock_queues();
sched_pin();
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, va);
- oldpde = *pde;
- if ((oldpde & PG_A) != 0) {
- if (pmap_demote_pde(pmap, pde, va)) {
- /*
- * Remove the mapping to a single page so
- * that a subsequent access may repromote.
- * Since the underlying page table page is
- * fully populated, this removal never frees
- * a page table page.
- */
- va += VM_PAGE_TO_PHYS(m) - (oldpde &
- PG_PS_FRAME);
- pmap_remove_page(pmap, va, NULL);
- }
- }
- PMAP_UNLOCK(pmap);
- }
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
- pde = pmap_pde(pmap, pv->pv_va);
- KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
- " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
if ((*pte & PG_A) != 0) {
/*
@@ -4779,7 +3861,7 @@
* in size, PG_A is among the least significant
* 32 bits.
*/
- atomic_clear_int((u_int *)pte, PG_A);
+ PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
pmap_invalidate_page(pmap, pv->pv_va);
}
PMAP_UNLOCK(pmap);
@@ -4792,40 +3874,6 @@
* Miscellaneous support routines follow
*/
-/* Adjust the cache mode for a 4KB page mapped via a PTE. */
-static __inline void
-pmap_pte_attr(pt_entry_t *pte, int cache_bits)
-{
- u_int opte, npte;
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PTE, so we can just spin on updating the low 32-bits.
- */
- do {
- opte = *(u_int *)pte;
- npte = opte & ~PG_PTE_CACHE;
- npte |= cache_bits;
- } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
-}
-
-/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
-static __inline void
-pmap_pde_attr(pd_entry_t *pde, int cache_bits)
-{
- u_int opde, npde;
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PDE, so we can just spin on updating the low 32-bits.
- */
- do {
- opde = *(u_int *)pde;
- npde = opde & ~PG_PDE_CACHE;
- npde |= cache_bits;
- } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
-}
-
/*
* Map a set of physical memory pages into the kernel virtual
* address space. Return a pointer to where it is mapped. This
@@ -4880,9 +3928,11 @@
base = trunc_page(va);
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
+ critical_enter();
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
pmap_kremove(tmpva);
pmap_invalidate_range(kernel_pmap, va, tmpva);
+ critical_exit();
kmem_free(kernel_map, base, size);
}
@@ -4930,8 +3980,9 @@
if (*sysmaps->CMAP2)
panic("pmap_flush_page: CMAP2 busy");
sched_pin();
- *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
- PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
+ PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
+ VM_PAGE_TO_MACH(m) | PG_A | PG_M |
+ pmap_cache_bits(m->md.pat_mode, 0));
invlcaddr(sysmaps->CADDR2);
sva = (vm_offset_t)sysmaps->CADDR2;
eva = sva + PAGE_SIZE;
@@ -4945,7 +3996,7 @@
for (; sva < eva; sva += cpu_clflush_line_size)
clflush(sva);
mfence();
- *sysmaps->CMAP2 = 0;
+ PT_SET_MA(sysmaps->CADDR2, 0);
sched_unpin();
mtx_unlock(&sysmaps->lock);
} else
@@ -4966,93 +4017,54 @@
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
{
vm_offset_t base, offset, tmpva;
- pd_entry_t *pde;
pt_entry_t *pte;
- int cache_bits_pte, cache_bits_pde;
+ u_int opte, npte;
+ pd_entry_t *pde;
boolean_t changed;
base = trunc_page(va);
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
- /*
- * Only supported on kernel virtual addresses above the recursive map.
- */
- if (base < VM_MIN_KERNEL_ADDRESS)
+ /* Only supported on kernel virtual addresses. */
+ if (base <= VM_MAXUSER_ADDRESS)
return (EINVAL);
- cache_bits_pde = pmap_cache_bits(mode, 1);
- cache_bits_pte = pmap_cache_bits(mode, 0);
- changed = FALSE;
-
- /*
- * Pages that aren't mapped aren't supported. Also break down
- * 2/4MB pages into 4KB pages if required.
- */
- PMAP_LOCK(kernel_pmap);
- for (tmpva = base; tmpva < base + size; ) {
+ /* 4MB pages and pages that aren't mapped aren't supported. */
+ for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde == 0) {
- PMAP_UNLOCK(kernel_pmap);
+ if (*pde & PG_PS)
return (EINVAL);
- }
- if (*pde & PG_PS) {
- /*
- * If the current 2/4MB page already has
- * the required memory type, then we need not
- * demote this page. Just increment tmpva to
- * the next 2/4MB page frame.
- */
- if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
- tmpva = trunc_4mpage(tmpva) + NBPDR;
- continue;
- }
-
- /*
- * If the current offset aligns with a 2/4MB
- * page frame and there is at least 2/4MB left
- * within the range, then we need not break
- * down this page into 4KB pages.
- */
- if ((tmpva & PDRMASK) == 0 &&
- tmpva + PDRMASK < base + size) {
- tmpva += NBPDR;
- continue;
- }
- if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
- PMAP_UNLOCK(kernel_pmap);
- return (ENOMEM);
- }
- }
- pte = vtopte(tmpva);
- if (*pte == 0) {
- PMAP_UNLOCK(kernel_pmap);
+ if ((*pde & PG_V) == 0)
+ return (EINVAL);
+ pte = vtopte(va);
+ if ((*pte & PG_V) == 0)
return (EINVAL);
- }
- tmpva += PAGE_SIZE;
}
- PMAP_UNLOCK(kernel_pmap);
+
+ changed = FALSE;
/*
- * Ok, all the pages exist, so run through them updating their
- * cache mode if required.
+ * Ok, all the pages exist and are 4k, so run through them updating
+ * their cache mode.
*/
- for (tmpva = base; tmpva < base + size; ) {
- pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde & PG_PS) {
- if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
- pmap_pde_attr(pde, cache_bits_pde);
- changed = TRUE;
- }
- tmpva = trunc_4mpage(tmpva) + NBPDR;
- } else {
- pte = vtopte(tmpva);
- if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
- pmap_pte_attr(pte, cache_bits_pte);
- changed = TRUE;
- }
- tmpva += PAGE_SIZE;
- }
+ for (tmpva = base; size > 0; ) {
+ pte = vtopte(tmpva);
+
+ /*
+ * The cache mode bits are all in the low 32-bits of the
+ * PTE, so we can just spin on updating the low 32-bits.
+ */
+ do {
+ opte = *(u_int *)pte;
+ npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
+ npte |= pmap_cache_bits(mode, 0);
+ PT_SET_VA_MA(pte, npte, TRUE);
+ } while (npte != opte && (*pte != npte));
+ if (npte != opte)
+ changed = TRUE;
+ tmpva += PAGE_SIZE;
+ size -= PAGE_SIZE;
}
/*
@@ -5072,33 +4084,16 @@
int
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
{
- pd_entry_t *pdep;
pt_entry_t *ptep, pte;
vm_paddr_t pa;
int val;
PMAP_LOCK(pmap);
retry:
- pdep = pmap_pde(pmap, addr);
- if (*pdep != 0) {
- if (*pdep & PG_PS) {
- pte = *pdep;
- /* Compute the physical address of the 4KB page. */
- pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
- PG_FRAME;
- val = MINCORE_SUPER;
- } else {
- ptep = pmap_pte(pmap, addr);
- pte = *ptep;
- pmap_pte_release(ptep);
- pa = pte & PG_FRAME;
- val = 0;
- }
- } else {
- pte = 0;
- pa = 0;
- val = 0;
- }
+ ptep = pmap_pte(pmap, addr);
+ pte = (ptep != NULL) ? PT_GET(ptep) : 0;
+ pmap_pte_release(ptep);
+ val = 0;
if ((pte & PG_V) != 0) {
val |= MINCORE_INCORE;
if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
@@ -5109,6 +4104,7 @@
if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
(MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
(pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
+ pa = pte & PG_FRAME;
/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
goto retry;
@@ -5145,6 +4141,7 @@
* pmap_activate is for the current thread on the current cpu
*/
td->td_pcb->pcb_cr3 = cr3;
+ PT_UPDATES_FLUSH();
load_cr3(cr3);
PCPU_SET(curpmap, pmap);
critical_exit();
@@ -5179,6 +4176,67 @@
*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
}
+void
+pmap_suspend()
+{
+ pmap_t pmap;
+ int i, pdir, offset;
+ vm_paddr_t pdirma;
+ mmu_update_t mu[4];
+
+ /*
+ * We need to remove the recursive mapping structure from all
+ * our pmaps so that Xen doesn't get confused when it restores
+ * the page tables. The recursive map lives at page directory
+ * index PTDPTDI. We assume that the suspend code has stopped
+ * the other vcpus (if any).
+ */
+ LIST_FOREACH(pmap, &allpmaps, pm_list) {
+ for (i = 0; i < 4; i++) {
+ /*
+ * Figure out which page directory (L2) page
+ * contains this bit of the recursive map and
+ * the offset within that page of the map
+ * entry
+ */
+ pdir = (PTDPTDI + i) / NPDEPG;
+ offset = (PTDPTDI + i) % NPDEPG;
+ pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
+ mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
+ mu[i].val = 0;
+ }
+ HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
+ }
+}
+
+void
+pmap_resume()
+{
+ pmap_t pmap;
+ int i, pdir, offset;
+ vm_paddr_t pdirma;
+ mmu_update_t mu[4];
+
+ /*
+ * Restore the recursive map that we removed on suspend.
+ */
+ LIST_FOREACH(pmap, &allpmaps, pm_list) {
+ for (i = 0; i < 4; i++) {
+ /*
+ * Figure out which page directory (L2) page
+ * contains this bit of the recursive map and
+ * the offset within that page of the map
+ * entry
+ */
+ pdir = (PTDPTDI + i) / NPDEPG;
+ offset = (PTDPTDI + i) % NPDEPG;
+ pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
+ mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
+ mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
+ }
+ HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
+ }
+}
#if defined(PMAP_DEBUG)
pmap_pid_dump(int pid)
@@ -5218,7 +4276,7 @@
if (pte && pmap_pte_v(pte)) {
pt_entry_t pa;
vm_page_t m;
- pa = *pte;
+ pa = PT_GET(pte);
m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
va, pa, m->hold_count, m->wire_count, m->flags);
More information about the freebsd-xen
mailing list