PERFORCE change 84641 for review
Alan Cox
alc at FreeBSD.org
Sat Oct 1 21:54:59 PDT 2005
http://perforce.freebsd.org/chv.cgi?CH=84641
Change 84641 by alc at alc_home on 2005/10/02 04:54:30
Integrate changes from my private branch.
Affected files ...
.. //depot/projects/superpages/src/sys/alpha/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 integrate
.. //depot/projects/superpages/src/sys/amd64/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/arm/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/conf/NOTES#2 integrate
.. //depot/projects/superpages/src/sys/conf/files#2 integrate
.. //depot/projects/superpages/src/sys/conf/options#2 integrate
.. //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 integrate
.. //depot/projects/superpages/src/sys/i386/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/ia64/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/kern/vfs_bio.c#2 integrate
.. //depot/projects/superpages/src/sys/modules/linux/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/nwfs/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/smbfs/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/svr4/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_buddy.c#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_buddy.h#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_contig.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_fault.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_map.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_object.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_object.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_page.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_page.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_pageout.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_pageq.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_reserve.c#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_reserve.h#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_zeroidle.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vnode_pager.c#2 integrate
Differences ...
==== //depot/projects/superpages/src/sys/alpha/include/param.h#2 (text+ko) ====
@@ -103,6 +103,23 @@
#define PAGE_MASK (PAGE_SIZE-1)
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
+#define BUDDY_QUEUES 10 /* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define SP_LEVELS 3
+
+/*
+ * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE).
+ */
+#define SP_SMALL_SHIFT 3
+
+/*
+ * XXX
+ */
+#define SP_FACTOR_SHIFT 3
+
#define KERNBASE 0xfffffc0000300000LL /* start of kernel virtual */
#define BTOPKERNBASE ((u_long)KERNBASE >> PGSHIFT)
==== //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 (text+ko) ====
@@ -133,6 +133,7 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_reserve.h>
#include <vm/uma.h>
#include <machine/cpu.h>
@@ -205,6 +206,8 @@
static pv_entry_t get_pv_entry(void);
static void pmap_clear_ptes(vm_page_t m, long bit);
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva);
+static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
vm_offset_t sva, pd_entry_t ptepde);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde);
@@ -220,6 +223,9 @@
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+static void mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv);
+static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
@@ -358,21 +364,6 @@
}
-static __inline pt_entry_t *
-pmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde)
-{
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (pde == NULL || (*pde & PG_V) == 0)
- return NULL;
- *ptepde = *pde;
- if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */
- return ((pt_entry_t *)pde);
- return (pmap_pde_to_pte(pde, va));
-}
-
-
PMAP_INLINE pt_entry_t *
vtopte(vm_offset_t va)
{
@@ -1297,11 +1288,13 @@
* normal 4K page.
*/
if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
- *pd = 0;
- pd = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_unuse_pt(pmap, va, *pmap_pdpe(pmap, va));
- pmap_invalidate_all(kernel_pmap);
+ if (!pmap_demote(pmap, pd, va)) {
+ /*
+ * Invalidation of the 2MB page mapping may have caused
+ * the deallocation of the underlying PD page.
+ */
+ pd = NULL;
+ }
}
/*
@@ -1519,6 +1512,53 @@
}
/*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static int
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva)
+{
+ pd_entry_t oldpde;
+ vm_offset_t eva, va;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & PDRMASK) == 0,
+ ("pmap_remove_pde: sva is not 2mpage aligned"));
+ oldpde = pte_load_clear(pdq);
+ if (oldpde & PG_W)
+ pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+ /*
+ * Machines that don't support invlpg, also don't support
+ * PG_G.
+ */
+ if (oldpde & PG_G)
+ pmap_invalidate_page(kernel_pmap, sva);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ if (oldpde & PG_MANAGED) {
+ eva = sva + NBPDR;
+ for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ va < eva; va += PAGE_SIZE, m++) {
+ if (oldpde & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+ if (pmap_nw_modified((pt_entry_t) oldpde)) {
+ printf(
+ "pmap_remove_pde: modified 2mpage not writable: va: 0x%lx, pde: 0x%lx\n",
+ va, oldpde);
+ }
+#endif
+ if (pmap_track_modified(va))
+ vm_page_dirty(m);
+ }
+ if (oldpde & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+ pmap_remove_entry(pmap, m, va);
+ }
+ }
+ return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva)));
+}
+
+/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
@@ -1651,11 +1691,25 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- *pde = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_unuse_pt(pmap, sva, *pdpe);
- anyvalid = 1;
- continue;
+ if (sva + NBPDR == va_next && eva >= va_next) {
+#ifdef INVARIANTS
+ printf("pmap_remove: superpage at %lx to destroy.\n",
+ sva);
+#endif
+ pmap_remove_pde(pmap, pde, sva);
+ anyvalid = 1;
+ continue;
+ } else {
+#ifdef INVARIANTS
+ printf("pmap_remove: superpage at %lx to demote !!!\n",
+ sva);
+#endif
+ if (!pmap_demote(pmap, pde, sva)) {
+ anyvalid = 1; /* XXX */
+ continue;
+ }
+ ptpaddr = *pde;
+ }
}
/*
@@ -1698,9 +1752,10 @@
void
pmap_remove_all(vm_page_t m)
{
+ pmap_t pmap;
register pv_entry_t pv;
pt_entry_t *pte, tpte;
- pd_entry_t ptepde;
+ pd_entry_t *pde;
#if defined(PMAP_DIAGNOSTIC)
/*
@@ -1713,12 +1768,25 @@
#endif
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- PMAP_LOCK(pv->pv_pmap);
- pv->pv_pmap->pm_stats.resident_count--;
- pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde);
+ pmap = pv->pv_pmap;
+ PMAP_LOCK(pmap);
+ pmap->pm_stats.resident_count--;
+ pde = pmap_pde(pmap, pv->pv_va);
+ if (*pde & PG_PS) {
+ printf("pmap_remove_all: superpage to demote !!!\n");
+ if (!pmap_demote(pmap, pde, pv->pv_va)) {
+ /*
+ * All mappings within the same 2mpage were
+ * destroyed and pv was freed.
+ */
+ PMAP_UNLOCK(pmap);
+ continue;
+ }
+ }
+ pte = pmap_pde_to_pte(pde, pv->pv_va);
tpte = pte_load_clear(pte);
if (tpte & PG_W)
- pv->pv_pmap->pm_stats.wired_count--;
+ pmap->pm_stats.wired_count--;
if (tpte & PG_A)
vm_page_flag_set(m, PG_REFERENCED);
@@ -1736,18 +1804,60 @@
if (pmap_track_modified(pv->pv_va))
vm_page_dirty(m);
}
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
- TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
m->md.pv_list_count--;
- pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde);
- PMAP_UNLOCK(pv->pv_pmap);
+ pmap_unuse_pt(pmap, pv->pv_va, *pde);
+ PMAP_UNLOCK(pmap);
free_pv_entry(pv);
}
vm_page_flag_clear(m, PG_WRITEABLE);
}
/*
+ * pmap_protect_pde: do the things to protect a 2mpage in a process
+ */
+static boolean_t
+pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva)
+{
+ pd_entry_t newpde, oldpde;
+ vm_offset_t eva, va;
+ vm_page_t m;
+ boolean_t anychanged;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & PDRMASK) == 0,
+ ("pmap_protect_pde: sva is not 2mpage aligned"));
+ anychanged = FALSE;
+retry:
+ oldpde = newpde = *pde;
+ if (oldpde & PG_MANAGED) {
+ eva = sva + NBPDR;
+ for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ va < eva; va += PAGE_SIZE, m++) {
+ if (oldpde & PG_A) {
+ vm_page_flag_set(m, PG_REFERENCED);
+ newpde &= ~PG_A;
+ }
+ if ((oldpde & PG_M) != 0 &&
+ pmap_track_modified(va))
+ vm_page_dirty(m);
+ }
+ }
+ newpde &= ~(PG_RW | PG_M);
+ if (newpde != oldpde) {
+ if (!atomic_cmpset_long(pde, oldpde, newpde))
+ goto retry;
+ if (oldpde & PG_G)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ return (anychanged);
+}
+
+/*
* Set the physical protection on the
* specified range of this map as requested.
*/
@@ -1802,9 +1912,16 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- *pde &= ~(PG_M|PG_RW);
- anychanged = 1;
- continue;
+ if (sva + NBPDR == va_next && eva >= va_next) {
+ if (pmap_protect_pde(pmap, pde, sva))
+ anychanged = 1;
+ continue;
+ } else {
+ if (!pmap_demote(pmap, pde, sva)) {
+ anychanged = 1; /* XXX */
+ continue;
+ }
+ }
}
if (va_next > eva)
@@ -2032,6 +2149,24 @@
} else
pte_store(pte, newpte | PG_A);
}
+
+ /*
+ * Promotion condition:
+ * 1) Page has to be part of a fully populated reservation
+ * 2) Virtual adress corresponding to the reservation has to
+ * be superpage aligned
+ */
+ if (((mpte != NULL && mpte->wire_count == NPTEPG) ||
+ m->object == kernel_object || (m->object == kmem_object && FALSE)) &&
+ m->reserv != NULL &&
+ m->reserv->refcnt == NBPDR / PAGE_SIZE) {
+#ifdef INVARIANTS
+ printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va);
+#endif
+ KASSERT(m->object->flags & OBJ_SUPERPAGES, ("pmap_enter: xxx"));
+ mach_promote(pmap, pmap_pde(pmap, va), m->reserv);
+ }
+
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
}
@@ -2146,6 +2281,23 @@
pte_store(pte, pa | PG_V | PG_U);
else
pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+
+ /*
+ * Promotion condition:
+ * 1) Page has to be part of a fully populated reservation
+ * 2) Virtual adress corresponding to the reservation has to
+ * be superpage aligned
+ */
+ if (m->reserv != NULL &&
+ m->reserv->refcnt == NBPDR / PAGE_SIZE &&
+ mpte->wire_count == NPTEPG) {
+#ifdef INVARIANTS
+ printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va);
+#endif
+ KASSERT(m->object->flags & OBJ_SUPERPAGES,
+ ("pmap_enter_quick: xxx"));
+ mach_promote(pmap, pmap_pde(pmap, va), m->reserv);
+ }
out:
PMAP_UNLOCK(pmap);
return mpte;
@@ -2372,6 +2524,13 @@
*pde = srcptepaddr;
dst_pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
+ if (srcptepaddr & PG_MANAGED) {
+ m = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
+ for (; addr < va_next; addr += PAGE_SIZE) {
+ pmap_insert_entry(dst_pmap, addr, m);
+ m++;
+ }
+ }
} else
pmap_unwire_pte_hold(dst_pmap, addr, dstmpde);
continue;
@@ -2530,6 +2689,7 @@
pmap_t pmap;
vm_offset_t sva, eva;
{
+ pd_entry_t *pde;
pt_entry_t *pte, tpte;
vm_page_t m;
pv_entry_t pv, npv;
@@ -2550,9 +2710,43 @@
}
#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+ pde = vtopde(pv->pv_va);
+#else
+ pde = pmap_pde(pmap, pv->pv_va);
+#endif
+ if (*pde & PG_PS) {
+ if (*pde & PG_W) {
+ npv = TAILQ_NEXT(pv, pv_plist);
+ continue;
+ }
+ if (sva <= trunc_2mpage(pv->pv_va) &&
+ eva >= round_2mpage(pv->pv_va + 1)) {
+#ifdef INVARIANTS
+ printf("pmap_remove_pages: superpage at %lx to destroy.\n",
+ trunc_2mpage(pv->pv_va));
+#endif
+ pmap_remove_pde(pmap, pde, trunc_2mpage(pv->pv_va));
+ npv = TAILQ_FIRST(&pmap->pm_pvlist);
+ continue;
+ }
+#ifdef INVARIANTS
+ printf("pmap_remove_pages: superpage at %lx to demote !!!\n",
+ pv->pv_va);
+#endif
+ if (!pmap_demote(pmap, pde, pv->pv_va)) {
+ /*
+ * All mappings within the same 2mpage were
+ * destroyed and pv was freed.
+ */
+ npv = TAILQ_FIRST(&pmap->pm_pvlist);
+ continue;
+ }
+ }
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
pte = vtopte(pv->pv_va);
#else
- pte = pmap_pte(pmap, pv->pv_va);
+ pte = pmap_pde_to_pte(pde, pv->pv_va);
#endif
tpte = *pte;
@@ -2597,7 +2791,7 @@
if (TAILQ_EMPTY(&m->md.pv_list))
vm_page_flag_clear(m, PG_WRITEABLE);
- pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va));
+ pmap_unuse_pt(pmap, pv->pv_va, *pde);
free_pv_entry(pv);
}
pmap_invalidate_all(pmap);
@@ -2657,7 +2851,7 @@
rv = FALSE;
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, addr);
- if (pde != NULL && (*pde & PG_V)) {
+ if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
pte = vtopte(addr);
rv = (*pte & PG_V) == 0;
}
@@ -2671,7 +2865,9 @@
static __inline void
pmap_clear_ptes(vm_page_t m, long bit)
{
- register pv_entry_t pv;
+ pmap_t pmap;
+ pv_entry_t npv, pv;
+ pd_entry_t *pde;
pt_entry_t pbits, *pte;
if ((m->flags & PG_FICTITIOUS) ||
@@ -2683,7 +2879,7 @@
* Loop over all current mappings setting/clearing as appropos If
* setting RO do we need to clear the VAC?
*/
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, npv) {
/*
* don't write protect pager mappings
*/
@@ -2692,8 +2888,22 @@
continue;
}
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+ pmap = pv->pv_pmap;
+ PMAP_LOCK(pmap);
+ pde = pmap_pde(pmap, pv->pv_va);
+ if (*pde & PG_PS) {
+ printf("pmap_clear_ptes: superpage to demote !!!\n");
+ if ((*pde & bit) == 0 ||
+ !pmap_demote(pmap, pde, pv->pv_va)) {
+ /*
+ * All mappings within the same 2mpage were
+ * destroyed and pv was freed.
+ */
+ PMAP_UNLOCK(pmap);
+ continue;
+ }
+ }
+ pte = pmap_pde_to_pte(pde, pv->pv_va);
retry:
pbits = *pte;
if (pbits & bit) {
@@ -2707,9 +2917,9 @@
} else {
atomic_clear_long(pte, bit);
}
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ pmap_invalidate_page(pmap, pv->pv_va);
}
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
if (bit == PG_RW)
vm_page_flag_clear(m, PG_WRITEABLE);
@@ -2747,6 +2957,7 @@
int
pmap_ts_referenced(vm_page_t m)
{
+ pmap_t pmap;
register pv_entry_t pv, pvf, pvn;
pt_entry_t *pte;
pt_entry_t v;
@@ -2770,20 +2981,21 @@
if (!pmap_track_modified(pv->pv_va))
continue;
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+ pmap = pv->pv_pmap;
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, pv->pv_va);
if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
atomic_clear_long(pte, PG_A);
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ pmap_invalidate_page(pmap, pv->pv_va);
rtval++;
if (rtval > 4) {
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
break;
}
}
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
} while ((pv = pvn) != NULL && pv != pvf);
}
@@ -2873,24 +3085,35 @@
pmap_t pmap;
vm_offset_t addr;
{
- pt_entry_t *ptep, pte;
+ pd_entry_t *pdep;
+ pt_entry_t pte;
+ vm_paddr_t pa;
vm_page_t m;
int val = 0;
PMAP_LOCK(pmap);
- ptep = pmap_pte(pmap, addr);
- pte = (ptep != NULL) ? *ptep : 0;
+ pdep = pmap_pde(pmap, addr);
+ if (pdep != NULL && (*pdep & PG_V)) {
+ if (*pdep & PG_PS) {
+ KASSERT((*pdep & PG_FRAME & PDRMASK) == 0,
+ ("pmap_mincore: bad pde"));
+ pte = *pdep;
+ pa = (*pdep & PG_FRAME) | (addr & PDRMASK);
+ } else {
+ pte = *pmap_pde_to_pte(pdep, addr);
+ pa = pte & PG_FRAME;
+ }
+ } else {
+ pte = 0;
+ pa = 0;
+ }
PMAP_UNLOCK(pmap);
if (pte != 0) {
- vm_paddr_t pa;
-
val = MINCORE_INCORE;
if ((pte & PG_MANAGED) == 0)
return val;
- pa = pte & PG_FRAME;
-
m = PHYS_TO_VM_PAGE(pa);
/*
@@ -2975,3 +3198,131 @@
addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
return addr;
}
+
+#define COMPATIBLE_PTE_MASK (PG_NX | PG_U | PG_RW)
+#define COMPATIBLE_PTE(a,b) ((a & COMPATIBLE_PTE_MASK) == (b & COMPATIBLE_PTE_MASK))
+
+static void
+mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv)
+{
+ vm_paddr_t pa;
+ pt_entry_t *pte, *first_pte, flags;
+ vm_page_t page_pa;
+ vm_page_t tofree = PHYS_TO_VM_PAGE(*pde & PG_FRAME); /*pte page to free after promotion*/
+
+ first_pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+ flags = *first_pte;
+ pa = VM_PAGE_TO_PHYS(reserv->first_page);
+
+ /*
+ * Check all the ptes before promotion
+ */
+ for (pte = first_pte; pte < first_pte + NPTEPG; pte++) {
+ if (pa != (*pte & PG_FRAME))
+ return;
+ pa += PAGE_SIZE;
+
+ page_pa = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+ KASSERT(page_pa->reserv,("mach_promote: page has no reservation"));
+ KASSERT(page_pa->reserv == reserv,("mach_promote: reservation mismatch"));
+
+ if ((*pte & PG_V) == 0 || !COMPATIBLE_PTE(*pte, flags))
+ return;
+
+ /* Add dirty bit and accsd bit to the flags, if necessary */
+ flags |= *pte & (PG_A | PG_M);
+ }
+
+ /* Create a superpage: add PG_PS bit to the pde */
+ *pde = PG_PS | flags;
+
+ if (pmap != kernel_pmap)
+ pmap->pm_stats.resident_count--;
+
+ /* Invalidate old TLB entries */
+ pmap_invalidate_all(pmap);
+
+ /*
+ * XXX
+ *
+ * File system corruption occurs if pte pages belonging to the
+ * kernel pmap are freed.
+ */
+ if (pmap != kernel_pmap) {
+ KASSERT(tofree->wire_count == NPTEPG,
+ ("pmap_promote: pte page wire count error"));
+ tofree->wire_count = 0;
+ vm_page_free(tofree);
+ atomic_subtract_int(&cnt.v_wire_count, 1);
+ }
+
+#ifdef INVARIANTS
+ printf("Promotion successful XXX\n");
+#endif
+}
+
+static boolean_t
+pmap_demote(pmap_t pmap, pd_entry_t *pde0, vm_offset_t va)
+{
+ pd_entry_t save_pde_value, new_pte_value ;
+ pt_entry_t *pte_page_va, *new_pte_va;
+ vm_paddr_t pte_page_pa;
+ vm_page_t pte_page;
+
+ KASSERT((*pde0 & PG_PS) != 0,
+ ("pmap_demote: not a superpage, impossible to demote"));
+
+ /* STEP 1
+ * Allocate the PTE page
+ */
+ if ((pte_page = vm_page_alloc(NULL, pmap_pde_pindex(va),
+ VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) {
+ pmap_remove_pde(pmap, pde0, trunc_2mpage(va));
+ pmap_invalidate_all(pmap);
+ return (FALSE);
+ }
+ pte_page->wire_count += NPTEPG - 1;
+ KASSERT(pte_page->wire_count == NPTEPG,
+ ("pmap_demote: page table page %p has wire count %d",
+ pte_page, pte_page->wire_count));
+ if (pmap != kernel_pmap)
+ pmap->pm_stats.resident_count++;
+
+ pte_page_pa = VM_PAGE_TO_PHYS(pte_page);
+ pte_page_va = (vm_offset_t *) PHYS_TO_DMAP(pte_page_pa);
+ pte_page_pa |= PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+repeat:
+
+ /* STEP 2
+ * Save the value of the pde entry
+ * Define the value of the first pte entry
+ */
+ save_pde_value = *pde0;
+
+ /* STEP 3
+ * Fill the PTE page with the physical address of the base pages
+ */
+ for ( new_pte_va = pte_page_va, new_pte_value = save_pde_value & ~PG_PS;
+ new_pte_va < pte_page_va + NPTEPG;
+ new_pte_va++ , new_pte_value += PAGE_SIZE) {
+
+ *new_pte_va = new_pte_value ;
+
+ }
+
+ /* STEP 4:
+ * Check if pde value has changed
+ * If not, assign the new pde value.
+ * If yes, repeat the pte assignment loop.
+ */
+ if (!atomic_cmpset_long(pde0, save_pde_value, pte_page_pa))
+ goto repeat;
+
+ /*
+ * Some implementations of the amd64 architecture prefetch TLB
+ * entries.
+ */
+ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+ return (TRUE);
+}
==== //depot/projects/superpages/src/sys/amd64/include/param.h#2 (text+ko) ====
@@ -119,6 +119,23 @@
#define NBPML4 (1ul<<PML4SHIFT)/* bytes/page map lev4 table */
#define PML4MASK (NBPML4-1)
+#define BUDDY_QUEUES 10 /* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define SP_LEVELS 1
+
+/*
+ * SP_SMALL_SHIFT is LOG2(NBPDR / PAGE_SIZE).
+ */
+#define SP_SMALL_SHIFT 9
+
+/*
+ * If there is only one superpage size, then SP_FACTOR_SHIFT is zero.
+ */
+#define SP_FACTOR_SHIFT 0
+
#define IOPAGES 2 /* pages of i/o permission bitmap */
#ifndef KSTACK_PAGES
==== //depot/projects/superpages/src/sys/arm/include/param.h#2 (text+ko) ====
@@ -94,6 +94,23 @@
#define NBPDR (1 << PDR_SHIFT)
#define NPDEPG (1 << (32 - PDR_SHIFT))
+#define BUDDY_QUEUES 9 /* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define SP_LEVELS 2
+
+/*
+ * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE).
+ */
+#define SP_SMALL_SHIFT 4
+
+/*
+ * XXX
+ */
+#define SP_FACTOR_SHIFT 4
+
#ifndef KSTACK_PAGES
#define KSTACK_PAGES 2
#endif /* !KSTACK_PAGES */
==== //depot/projects/superpages/src/sys/conf/NOTES#2 (text+ko) ====
@@ -110,16 +110,6 @@
#
options BLKDEV_IOSIZE=8192
-# Options for the VM subsystem
-# L2 cache size (in KB) can be specified in PQ_CACHESIZE
-options PQ_CACHESIZE=512 # color for 512k cache
-# Deprecated options supported for backwards compatibility
-#options PQ_NOOPT # No coloring
-#options PQ_LARGECACHE # color for 512k cache
-#options PQ_HUGECACHE # color for 1024k cache
-#options PQ_MEDIUMCACHE # color for 256k cache
-#options PQ_NORMALCACHE # color for 64k cache
-
# This allows you to actually store this configuration file into
# the kernel binary itself, where it may be later read by saying:
# strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL
==== //depot/projects/superpages/src/sys/conf/files#2 (text+ko) ====
@@ -1890,6 +1890,7 @@
vm/swap_pager.c standard
vm/uma_core.c standard
vm/uma_dbg.c standard
+vm/vm_buddy.c standard
vm/vm_contig.c standard
vm/memguard.c optional DEBUG_MEMGUARD
vm/vm_fault.c standard
@@ -1904,6 +1905,7 @@
vm/vm_pageout.c standard
vm/vm_pageq.c standard
vm/vm_pager.c standard
+vm/vm_reserve.c standard
vm/vm_unix.c standard
vm/vm_zeroidle.c standard
vm/vnode_pager.c standard
==== //depot/projects/superpages/src/sys/conf/options#2 (text+ko) ====
@@ -515,12 +515,6 @@
NO_SWAPPING opt_vm.h
MALLOC_MAKE_FAILURES opt_vm.h
MALLOC_PROFILE opt_vm.h
-PQ_NOOPT opt_vmpage.h
-PQ_NORMALCACHE opt_vmpage.h
-PQ_MEDIUMCACHE opt_vmpage.h
-PQ_LARGECACHE opt_vmpage.h
-PQ_HUGECACHE opt_vmpage.h
-PQ_CACHESIZE opt_vmpage.h
# The MemGuard replacement allocator used for tamper-after-free detection
DEBUG_MEMGUARD opt_vm.h
==== //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 (text+ko) ====
@@ -133,6 +133,7 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_reserve.h>
#include <vm/uma.h>
#include <machine/cpu.h>
@@ -255,6 +256,8 @@
static pv_entry_t get_pv_entry(void);
static void pmap_clear_ptes(vm_page_t m, int bit);
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva);
+static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
@@ -273,6 +276,9 @@
static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
#endif
+static void mach_promote(pmap_t pmap, vm_offset_t va, reservation_t reserv);
+static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
@@ -1199,10 +1205,8 @@
* normal 4K page.
*/
if (ptepa & PG_PS) {
- pmap->pm_pdir[ptepindex] = 0;
- ptepa = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_invalidate_all(kernel_pmap);
+ pmap_demote(pmap, &pmap->pm_pdir[ptepindex], va);
+ ptepa = pmap->pm_pdir[ptepindex];
}
/*
@@ -1521,6 +1525,52 @@
}
/*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static void
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva)
+{
+ pd_entry_t oldpde;
+ vm_offset_t eva, va;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & PDRMASK) == 0,
+ ("pmap_remove_pde: sva is not 4mpage aligned"));
+ oldpde = pte_load_clear(pdq);
+ if (oldpde & PG_W)
+ pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+ /*
+ * Machines that don't support invlpg, also don't support
+ * PG_G.
+ */
+ if (oldpde & PG_G)
+ pmap_invalidate_page(kernel_pmap, sva);
+ pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+ if (oldpde & PG_MANAGED) {
+ eva = sva + NBPDR;
+ for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+ va < eva; va += PAGE_SIZE, m++) {
+ if (oldpde & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+ if (pmap_nw_modified((pt_entry_t) oldpde)) {
+ printf(
+ "pmap_remove_pde: modified 4mpage not writable: va: 0x%x, pde: 0x%x\n",
+ va, oldpde);
+ }
+#endif
+ if (pmap_track_modified(va))
+ vm_page_dirty(m);
+ }
+ if (oldpde & PG_A)
+ vm_page_flag_set(m, PG_REFERENCED);
+ pmap_remove_entry(pmap, m, va);
+ }
+ }
+}
+
+/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
static int
@@ -1639,10 +1689,24 @@
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
+ if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+#ifdef INVARIANTS
+ printf("pmap_remove: superpage at %x to destroy.\n",
+ sva);
+#endif
+ pmap_remove_pde(pmap, pmap_pde(pmap, sva), sva);
+ anyvalid = 1;
+ continue;
+ } else {
+#ifdef INVARIANTS
+ printf("pmap_remove: superpage at %x to demote !!!\n",
+ sva);
+#endif
+ if (!pmap_demote(pmap, pmap_pde(pmap, sva), sva)) {
+ anyvalid = 1; /* XXX */
+ continue;
+ }
+ }
}
/*
@@ -1686,8 +1750,10 @@
void
pmap_remove_all(vm_page_t m)
{
+ pmap_t pmap;
register pv_entry_t pv;
pt_entry_t *pte, tpte;
+ pd_entry_t *pde;
#if defined(PMAP_DIAGNOSTIC)
/*
@@ -1701,12 +1767,25 @@
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
sched_pin();
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- PMAP_LOCK(pv->pv_pmap);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list