PERFORCE change 138710 for review
John Birrell
jb at FreeBSD.org
Wed Mar 26 22:21:11 PDT 2008
http://perforce.freebsd.org/chv.cgi?CH=138710
Change 138710 by jb at jb_freebsd1 on 2008/03/27 05:20:19
IFC
Affected files ...
.. //depot/projects/dtrace/src/etc/newsyslog.conf#4 integrate
.. //depot/projects/dtrace/src/sys/amd64/include/param.h#6 integrate
.. //depot/projects/dtrace/src/sys/arm/include/param.h#6 integrate
.. //depot/projects/dtrace/src/sys/i386/i386/pmap.c#31 integrate
.. //depot/projects/dtrace/src/sys/i386/include/param.h#6 integrate
.. //depot/projects/dtrace/src/sys/i386/include/pmap.h#13 integrate
.. //depot/projects/dtrace/src/sys/ia64/include/param.h#6 integrate
.. //depot/projects/dtrace/src/sys/powerpc/include/param.h#6 integrate
.. //depot/projects/dtrace/src/sys/sparc64/include/param.h#7 integrate
.. //depot/projects/dtrace/src/sys/sun4v/include/param.h#7 integrate
Differences ...
==== //depot/projects/dtrace/src/etc/newsyslog.conf#4 (text+ko) ====
@@ -1,5 +1,5 @@
# configuration file for newsyslog
-# $FreeBSD: src/etc/newsyslog.conf,v 1.50 2005/03/02 00:40:55 brooks Exp $
+# $FreeBSD: src/etc/newsyslog.conf,v 1.51 2008/03/27 03:30:14 brooks Exp $
#
# Entries which do not specify the '/pid_file' field will cause the
# syslogd process to be signalled when that log file is rotated. This
@@ -33,7 +33,7 @@
/var/log/ppp.log root:network 640 3 100 * JC
/var/log/security 600 10 100 * JC
/var/log/sendmail.st 640 10 * 168 B
-/var/log/slip.log root:network 640 3 100 * JC
+/var/log/slip.log root:network 640 3 100 * J
/var/log/weekly.log 640 5 1 $W6D0 JN
/var/log/wtmp 644 3 * @01T05 B
/var/log/xferlog 600 7 100 * JC
==== //depot/projects/dtrace/src/sys/amd64/include/param.h#6 (text+ko) ====
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)param.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/amd64/include/param.h,v 1.20 2006/01/09 06:05:56 imp Exp $
+ * $FreeBSD: src/sys/amd64/include/param.h,v 1.21 2008/03/27 05:03:24 jb Exp $
*/
/*
==== //depot/projects/dtrace/src/sys/arm/include/param.h#6 (text+ko) ====
@@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)param.h 5.8 (Berkeley) 6/28/91
- * $FreeBSD: src/sys/arm/include/param.h,v 1.11 2006/01/09 06:05:56 imp Exp $
+ * $FreeBSD: src/sys/arm/include/param.h,v 1.12 2008/03/27 05:03:25 jb Exp $
*/
/*
@@ -76,7 +76,7 @@
#define MAXCPU 2
#else
#define MAXCPU 1
-#endif /* SMP */
+#endif /* SMP || KLD_MODULE */
#define ALIGNBYTES _ALIGNBYTES
#define ALIGN(p) _ALIGN(p)
==== //depot/projects/dtrace/src/sys/i386/i386/pmap.c#31 (text+ko) ====
@@ -5,7 +5,7 @@
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
- * Copyright (c) 2005 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005-2008 Alan L. Cox <alc at cs.rice.edu>
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
@@ -75,7 +75,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.609 2008/03/23 07:07:27 kib Exp $");
+__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.610 2008/03/27 04:34:17 alc Exp $");
/*
* Manages physical address maps.
@@ -112,6 +112,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
@@ -135,6 +136,7 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_reserv.h>
#include <vm/uma.h>
#include <machine/cpu.h>
@@ -171,6 +173,9 @@
#define PV_STAT(x) do { } while (0)
#endif
+#define pa_index(pa) ((pa) >> PDRSHIFT)
+#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
+
/*
* Get PDEs and PTEs for user/kernel address space
*/
@@ -206,10 +211,17 @@
static uma_zone_t pdptzone;
#endif
+SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
+static int pg_ps_enabled;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RD, &pg_ps_enabled, 0,
+ "Are large page mappings enabled?");
+
/*
* Data for the pv entry allocation mechanism
*/
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
+static struct md_page *pv_table;
static int shpgperproc = PMAP_SHPGPERPROC;
struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
@@ -259,11 +271,29 @@
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
+static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_page_t m);
+static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
+static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
+ vm_offset_t va);
+static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_prot_t prot);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
+static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
+ vm_prot_t prot);
+static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+ vm_page_t *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
vm_page_t *free);
+static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
vm_page_t *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
@@ -351,6 +381,7 @@
#ifdef PAE
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
+ kernel_pmap->pm_root = NULL;
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
@@ -599,8 +630,24 @@
void
pmap_init(void)
{
+ vm_page_t mpte;
+ vm_size_t s;
+ int i, pv_npg;
/*
+ * Initialize the vm page array entries for the kernel pmap's
+ * page table pages.
+ */
+ for (i = 0; i < nkpt; i++) {
+ mpte = PHYS_TO_VM_PAGE(PTD[i + KPTDI] & PG_FRAME);
+ KASSERT(mpte >= vm_page_array &&
+ mpte < &vm_page_array[vm_page_array_size],
+ ("pmap_init: page table page is out of range"));
+ mpte->pindex = i + KPTDI;
+ mpte->phys_addr = PTD[i + KPTDI] & PG_FRAME;
+ }
+
+ /*
* Initialize the address space (zone) for the pv entries. Set a
* high water mark so that the system can recover from excessive
* numbers of pv entries.
@@ -611,6 +658,26 @@
pv_entry_max = roundup(pv_entry_max, _NPCPV);
pv_entry_high_water = 9 * (pv_entry_max / 10);
+ /*
+ * Are large page mappings enabled?
+ */
+ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
+
+ /*
+ * Calculate the size of the pv head table for superpages.
+ */
+ for (i = 0; phys_avail[i + 1]; i += 2);
+ pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
+
+ /*
+ * Allocate memory for the pv head table for superpages.
+ */
+ s = (vm_size_t)(pv_npg * sizeof(struct md_page));
+ s = round_page(s);
+ pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
+ for (i = 0; i < pv_npg; i++)
+ TAILQ_INIT(&pv_table[i].pv_list);
+
pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
PAGE_SIZE * pv_maxchunks);
@@ -626,12 +693,30 @@
}
-SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
"Max number of PV entries");
SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
"Page share factor per proc");
+SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
+ "2/4MB page mapping counters");
+
+static u_long pmap_pde_demotions;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
+ &pmap_pde_demotions, 0, "2/4MB page demotions");
+
+static u_long pmap_pde_mappings;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
+ &pmap_pde_mappings, 0, "2/4MB page mappings");
+
+static u_long pmap_pde_p_failures;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
+ &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
+
+static u_long pmap_pde_promotions;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
+ &pmap_pde_promotions, 0, "2/4MB page promotions");
+
/***************************************************
* Low level helper routines.....
***************************************************/
@@ -1154,8 +1239,101 @@
while (free != NULL) {
m = free;
free = m->right;
- vm_page_free_zero(m);
+ /* Preserve the page's PG_ZERO setting. */
+ vm_page_free_toq(m);
+ }
+}
+
+/*
+ * Schedule the specified unused page table page to be freed. Specifically,
+ * add the page to the specified list of pages that will be released to the
+ * physical memory manager after the TLB has been updated.
+ */
+static __inline void
+pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
+{
+
+ if (set_PG_ZERO)
+ m->flags |= PG_ZERO;
+ else
+ m->flags &= ~PG_ZERO;
+ m->right = *free;
+ *free = m;
+}
+
+/*
+ * Inserts the specified page table page into the specified pmap's collection
+ * of idle page table pages. Each of a pmap's page table pages is responsible
+ * for mapping a distinct range of virtual addresses. The pmap's collection is
+ * ordered by this virtual address range.
+ */
+static void
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
+{
+ vm_page_t root;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ root = pmap->pm_root;
+ if (root == NULL) {
+ mpte->left = NULL;
+ mpte->right = NULL;
+ } else {
+ root = vm_page_splay(mpte->pindex, root);
+ if (mpte->pindex < root->pindex) {
+ mpte->left = root->left;
+ mpte->right = root;
+ root->left = NULL;
+ } else if (mpte->pindex == root->pindex)
+ panic("pmap_insert_pt_page: pindex already inserted");
+ else {
+ mpte->right = root->right;
+ mpte->left = root;
+ root->right = NULL;
+ }
+ }
+ pmap->pm_root = mpte;
+}
+
+/*
+ * Looks for a page table page mapping the specified virtual address in the
+ * specified pmap's collection of idle page table pages. Returns NULL if there
+ * is no page table page corresponding to the specified virtual address.
+ */
+static vm_page_t
+pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
+{
+ vm_page_t mpte;
+ vm_pindex_t pindex = va >> PDRSHIFT;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
+ mpte = vm_page_splay(pindex, mpte);
+ if ((pmap->pm_root = mpte)->pindex != pindex)
+ mpte = NULL;
+ }
+ return (mpte);
+}
+
+/*
+ * Removes the specified page table page from the specified pmap's collection
+ * of idle page table pages. The specified page table page must be a member of
+ * the pmap's collection.
+ */
+static void
+pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
+{
+ vm_page_t root;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (mpte != pmap->pm_root)
+ vm_page_splay(mpte->pindex, pmap->pm_root);
+ if (mpte->left == NULL)
+ root = mpte->right;
+ else {
+ root = vm_page_splay(mpte->pindex, mpte->left);
+ root->right = mpte->right;
}
+ pmap->pm_root = root;
}
/*
@@ -1202,8 +1380,7 @@
* Put page on a list so that it is released after
* *ALL* TLB shootdown is done
*/
- m->right = *free;
- *free = m;
+ pmap_add_delayed_free_list(m, free, TRUE);
return 1;
}
@@ -1234,6 +1411,7 @@
#ifdef PAE
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
+ pmap->pm_root = NULL;
pmap->pm_active = 0;
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1277,7 +1455,10 @@
KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
("pmap_pinit: pdpt above 4g"));
#endif
+ pmap->pm_root = NULL;
}
+ KASSERT(pmap->pm_root == NULL,
+ ("pmap_pinit: pmap has reserved page table page(s)"));
/*
* allocate the page directory page(s)
@@ -1398,10 +1579,8 @@
* normal 4K page.
*/
if (ptepa & PG_PS) {
- pmap->pm_pdir[ptepindex] = 0;
- ptepa = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_invalidate_all(kernel_pmap);
+ (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
+ ptepa = pmap->pm_pdir[ptepindex];
}
/*
@@ -1535,6 +1714,8 @@
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
+ KASSERT(pmap->pm_root == NULL,
+ ("pmap_release: pmap has reserved page table page(s)"));
pmap_lazyfix(pmap);
mtx_lock_spin(&allpmaps_lock);
@@ -1716,6 +1897,8 @@
static void
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
{
+ struct md_page *pvh;
+ pd_entry_t *pde;
pmap_t pmap;
pt_entry_t *pte, tpte;
pv_entry_t next_pv, pv;
@@ -1735,25 +1918,27 @@
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
continue;
pmap->pm_stats.resident_count--;
+ pde = pmap_pde(pmap, va);
+ KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
+ " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, va);
tpte = pte_load_clear(pte);
KASSERT((tpte & PG_W) == 0,
("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
if (tpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
- if (tpte & PG_M) {
- KASSERT((tpte & PG_RW),
- ("pmap_collect: modified page not writable: va: %#x, pte: %#jx",
- va, (uintmax_t)tpte));
+ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
- }
free = NULL;
pmap_unuse_pt(pmap, va, &free);
pmap_invalidate_page(pmap, va);
pmap_free_zero_pages(free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_flag_clear(m, PG_WRITEABLE);
+ if (TAILQ_EMPTY(&m->md.pv_list)) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ }
free_pv_entry(pmap, pv);
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
@@ -1895,24 +2080,112 @@
return (pv);
}
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
+static __inline pv_entry_t
+pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pmap == PV_PMAP(pv) && va == pv->pv_va)
+ TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
+ if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
+ TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
break;
+ }
}
- KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_flag_clear(m, PG_WRITEABLE);
+ return (pv);
+}
+
+static void
+pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_offset_t va_last;
+ vm_page_t m;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT((pa & PDRMASK) == 0,
+ ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
+
+ /*
+ * Transfer the 4mpage's pv entry for this mapping to the first
+ * page's pv list.
+ */
+ pvh = pa_to_pvh(pa);
+ va = trunc_4mpage(va);
+ pv = pmap_pvh_remove(pvh, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
+ m = PHYS_TO_VM_PAGE(pa);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ /* Instantiate the remaining NPTEPG - 1 pv entries. */
+ va_last = va + NBPDR - PAGE_SIZE;
+ do {
+ m++;
+ KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
+ ("pmap_pv_demote_pde: page %p is not managed", m));
+ va += PAGE_SIZE;
+ pmap_insert_entry(pmap, va, m);
+ } while (va < va_last);
+}
+
+static void
+pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_offset_t va_last;
+ vm_page_t m;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT((pa & PDRMASK) == 0,
+ ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
+
+ /*
+ * Transfer the first page's pv entry for this mapping to the
+ * 4mpage's pv list. Aside from avoiding the cost of a call
+ * to get_pv_entry(), a transfer avoids the possibility that
+ * get_pv_entry() calls pmap_collect() and that pmap_collect()
+ * removes one of the mappings that is being promoted.
+ */
+ m = PHYS_TO_VM_PAGE(pa);
+ va = trunc_4mpage(va);
+ pv = pmap_pvh_remove(&m->md, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
+ pvh = pa_to_pvh(pa);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+ /* Free the remaining NPTEPG - 1 pv entries. */
+ va_last = va + NBPDR - PAGE_SIZE;
+ do {
+ m++;
+ va += PAGE_SIZE;
+ pmap_pvh_free(&m->md, pmap, va);
+ } while (va < va_last);
+}
+
+static void
+pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
+{
+ pv_entry_t pv;
+
+ pv = pmap_pvh_remove(pvh, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
free_pv_entry(pmap, pv);
}
+static void
+pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
+{
+ struct md_page *pvh;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ pmap_pvh_free(&m->md, pmap, va);
+ if (TAILQ_EMPTY(&m->md.pv_list)) {
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ if (TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ }
+}
+
/*
* Create a pv entry for page at pa for
* (pmap, va).
@@ -1949,6 +2222,222 @@
}
/*
+ * Create the pv entries for each of the pages within a superpage.
+ */
+static boolean_t
+pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ if (pv_entry_count < pv_entry_high_water &&
+ (pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ pv->pv_va = va;
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
+ return (TRUE);
+ } else
+ return (FALSE);
+}
+
+/*
+ * Tries to demote a 2- or 4MB page mapping.
+ */
+static boolean_t
+pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+ pd_entry_t newpde, oldpde;
+ pmap_t allpmaps_entry;
+ pt_entry_t *firstpte, newpte, *pte;
+ vm_paddr_t mptepa;
+ vm_page_t free, mpte;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mpte = pmap_lookup_pt_page(pmap, va);
+ if (mpte != NULL)
+ pmap_remove_pt_page(pmap, mpte);
+ else {
+ KASSERT((*pde & PG_W) == 0,
+ ("pmap_demote_pde: page table page for a wired mapping"
+ " is missing"));
+ free = NULL;
+ pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
+ pmap_invalidate_page(pmap, trunc_4mpage(va));
+ pmap_free_zero_pages(free);
+ CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
+ " in pmap %p", va, pmap);
+ return (FALSE);
+ }
+ mptepa = VM_PAGE_TO_PHYS(mpte);
+
+ /*
+ * Temporarily map the page table page (mpte) into the kernel's
+ * address space at either PADDR1 or PADDR2.
+ */
+ if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
+ if ((*PMAP1 & PG_FRAME) != mptepa) {
+ *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
+#ifdef SMP
+ PMAP1cpu = PCPU_GET(cpuid);
+#endif
+ invlcaddr(PADDR1);
+ PMAP1changed++;
+ } else
+#ifdef SMP
+ if (PMAP1cpu != PCPU_GET(cpuid)) {
+ PMAP1cpu = PCPU_GET(cpuid);
+ invlcaddr(PADDR1);
+ PMAP1changedcpu++;
+ } else
+#endif
+ PMAP1unchanged++;
+ firstpte = PADDR1;
+ } else {
+ mtx_lock(&PMAP2mutex);
+ if ((*PMAP2 & PG_FRAME) != mptepa) {
+ *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
+ pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
+ }
+ firstpte = PADDR2;
+ }
+ oldpde = *pde;
+ newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
+ KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
+ ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
+ KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
+ ("pmap_demote_pde: oldpde is missing PG_M"));
+ KASSERT((oldpde & PG_PS) != 0,
+ ("pmap_demote_pde: oldpde is missing PG_PS"));
+ newpte = oldpde & ~PG_PS;
+ if ((newpte & PG_PDE_PAT) != 0)
+ newpte ^= PG_PDE_PAT | PG_PTE_PAT;
+
+ /*
+ * If the mapping has changed attributes, update the page table
+ * entries.
+ */
+ KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
+ ("pmap_demote_pde: firstpte and newpte map different physical"
+ " addresses"));
+ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
+ for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+ *pte = newpte;
+ newpte += PAGE_SIZE;
+ }
+
+ /*
+ * Demote the mapping. This pmap is locked. The old PDE has
+ * PG_A set. If the old PDE has PG_RW set, it also has PG_M
+ * set. Thus, there is no danger of a race with another
+ * processor changing the setting of PG_A and/or PG_M between
+ * the read above and the store below.
+ */
+ if (pmap == kernel_pmap) {
+ /*
+ * A harmless race exists between this loop and the bcopy()
+ * in pmap_pinit() that initializes the kernel segment of
+ * the new page table. Specifically, that bcopy() may copy
+ * the new PDE from the PTD, which is first in allpmaps, to
+ * the new page table before this loop updates that new
+ * page table.
+ */
+ mtx_lock_spin(&allpmaps_lock);
+ LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
+ pde = pmap_pde(allpmaps_entry, va);
+ KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) ==
+ (oldpde & PG_PTE_PROMOTE),
+ ("pmap_demote_pde: pde was %#jx, expected %#jx",
+ (uintmax_t)*pde, (uintmax_t)oldpde));
+ pde_store(pde, newpde);
+ }
+ mtx_unlock_spin(&allpmaps_lock);
+ } else
+ pde_store(pde, newpde);
+ if (firstpte == PADDR2)
+ mtx_unlock(&PMAP2mutex);
+
+ /*
+ * Invalidate the recursive mapping of the page table page.
+ */
+ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+
+ /*
+ * Demote the pv entry. This depends on the earlier demotion
+ * of the mapping. Specifically, the (re)creation of a per-
+ * page pv entry might trigger the execution of pmap_collect(),
+ * which might reclaim a newly (re)created per-page pv entry
+ * and destroy the associated mapping. In order to destroy
+ * the mapping, the PDE must have already changed from mapping
+ * the 2mpage to referencing the page table page.
+ */
+ if ((oldpde & PG_MANAGED) != 0)
+ pmap_pv_demote_pde(pmap, va, oldpde & PG_FRAME);
+
+ pmap_pde_demotions++;
+ CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
+ " in pmap %p", va, pmap);
+ return (TRUE);
+}
+
+/*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static void
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+ vm_page_t *free)
+{
+ struct md_page *pvh;
+ pd_entry_t oldpde;
+ vm_offset_t eva, va;
+ vm_page_t m, mpte;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & PDRMASK) == 0,
+ ("pmap_remove_pde: sva is not 4mpage aligned"));
+ oldpde = pte_load_clear(pdq);
+ if (oldpde & PG_W)
+ pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+ /*
+ * Machines that don't support invlpg, also don't support
+ * PG_G.
+ */
+ if (oldpde & PG_G)
+ pmap_invalidate_page(kernel_pmap, sva);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ if (oldpde & PG_MANAGED) {
+ pvh = pa_to_pvh(oldpde & PG_FRAME);
+ pmap_pvh_free(pvh, pmap, sva);
+ eva = sva + NBPDR;
+ for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ va < eva; va += PAGE_SIZE, m++) {
+ if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+ if (oldpde & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ }
+ }
+ if (pmap == kernel_pmap) {
+ if (!pmap_demote_pde(pmap, pdq, sva))
+ panic("pmap_remove_pde: failed demotion");
+ } else {
+ mpte = pmap_lookup_pt_page(pmap, sva);
+ if (mpte != NULL) {
+ pmap_remove_pt_page(pmap, mpte);
+ KASSERT(mpte->wire_count == NPTEPG,
+ ("pmap_remove_pde: pte page wire count error"));
+ mpte->wire_count = 0;
+ pmap_add_delayed_free_list(mpte, free, FALSE);
+ atomic_subtract_int(&cnt.v_wire_count, 1);
+ }
+ }
+}
+
+/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
@@ -1971,12 +2460,8 @@
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
- if (oldpte & PG_M) {
- KASSERT((oldpte & PG_RW),
- ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx",
- va, (uintmax_t)oldpte));
+ if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
- }
if (oldpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
pmap_remove_entry(pmap, m, va);
@@ -2065,10 +2550,25 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
+ /*
+ * Are we removing the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+ /*
+ * The TLB entry for a PG_G mapping is
+ * invalidated by pmap_remove_pde().
+ */
+ if ((ptpaddr & PG_G) == 0)
+ anyvalid = 1;
+ pmap_remove_pde(pmap,
+ &pmap->pm_pdir[pdirindex], sva, &free);
+ continue;
+ } else if (!pmap_demote_pde(pmap,
+ &pmap->pm_pdir[pdirindex], sva)) {
+ /* The large page mapping was destroyed. */
+ continue;
+ }
}
/*
@@ -2119,19 +2619,34 @@
void
pmap_remove_all(vm_page_t m)
{
+ struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
pt_entry_t *pte, tpte;
+ pd_entry_t *pde;
+ vm_offset_t va;
vm_page_t free;
KASSERT((m->flags & PG_FICTITIOUS) == 0,
("pmap_remove_all: page %p is fictitious", m));
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
sched_pin();
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
+ va = pv->pv_va;
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ pde = pmap_pde(pmap, va);
+ (void)pmap_demote_pde(pmap, pde, va);
+ PMAP_UNLOCK(pmap);
+ }
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pmap->pm_stats.resident_count--;
+ pde = pmap_pde(pmap, pv->pv_va);
+ KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
+ " a 4mpage in page %p's pv list", m));
pte = pmap_pte_quick(pmap, pv->pv_va);
tpte = pte_load_clear(pte);
if (tpte & PG_W)
@@ -2142,12 +2657,8 @@
/*
* Update the vm_page_t clean and reference bits.
*/
- if (tpte & PG_M) {
- KASSERT((tpte & PG_RW),
- ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx",
- pv->pv_va, (uintmax_t)tpte));
+ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
- }
free = NULL;
pmap_unuse_pt(pmap, pv->pv_va, &free);
pmap_invalidate_page(pmap, pv->pv_va);
@@ -2161,6 +2672,56 @@
}
/*
+ * pmap_protect_pde: do the things to protect a 4mpage in a process
+ */
+static boolean_t
+pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
+{
+ pd_entry_t newpde, oldpde;
+ vm_offset_t eva, va;
+ vm_page_t m;
+ boolean_t anychanged;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & PDRMASK) == 0,
+ ("pmap_protect_pde: sva is not 4mpage aligned"));
+ anychanged = FALSE;
+retry:
+ oldpde = newpde = *pde;
+ if (oldpde & PG_MANAGED) {
+ eva = sva + NBPDR;
+ for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ va < eva; va += PAGE_SIZE, m++) {
+ /*
+ * In contrast to the analogous operation on a 4KB page
+ * mapping, the mapping's PG_A flag is not cleared and
+ * the page's PG_REFERENCED flag is not set. The
+ * reason is that pmap_demote_pde() expects that a 2/4MB
+ * page mapping with a stored page table page has PG_A
+ * set.
+ */
+ if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+ }
+ }
+ if ((prot & VM_PROT_WRITE) == 0)
+ newpde &= ~(PG_RW | PG_M);
+#ifdef PAE
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ newpde |= pg_nx;
+#endif
+ if (newpde != oldpde) {
+ if (!pde_cmpset(pde, oldpde, newpde))
+ goto retry;
+ if (oldpde & PG_G)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ return (anychanged);
+}
+
+/*
* Set the physical protection on the
* specified range of this map as requested.
*/
@@ -2213,14 +2774,24 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- if ((prot & VM_PROT_WRITE) == 0)
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pmap->pm_pdir[pdirindex] |= pg_nx;
-#endif
- anychanged = 1;
- continue;
+ /*
+ * Are we protecting the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+ /*
+ * The TLB entry for a PG_G mapping is
+ * invalidated by pmap_protect_pde().
+ */
+ if (pmap_protect_pde(pmap,
+ &pmap->pm_pdir[pdirindex], sva, prot))
+ anychanged = 1;
+ continue;
+ } else if (!pmap_demote_pde(pmap,
+ &pmap->pm_pdir[pdirindex], sva)) {
+ /* The large page mapping was destroyed. */
+ continue;
+ }
}
if (pdnxt > eva)
@@ -2246,7 +2817,7 @@
vm_page_flag_set(m, PG_REFERENCED);
pbits &= ~PG_A;
}
- if ((pbits & PG_M) != 0) {
+ if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
if (m == NULL)
m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
vm_page_dirty(m);
@@ -2284,6 +2855,127 @@
}
/*
+ * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
+ * within a single page table page to a single 2- or 4MB page mapping. For
+ * promotion to occur, two conditions must be met: (1) the 4KB page mappings
+ * must map aligned, contiguous physical memory and (2) the 4KB page mappings
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list