svn commit: r204855 - in user/kmacy/releng_8_page_lock/sys:
amd64/amd64 amd64/conf amd64/include
cddl/contrib/opensolaris/uts/common/fs/zfs dev/md kern
nfsclient ufs/ffs vm
Kip Macy
kmacy at FreeBSD.org
Mon Mar 8 05:03:25 UTC 2010
Author: kmacy
Date: Mon Mar 8 05:03:24 2010
New Revision: 204855
URL: http://svn.freebsd.org/changeset/base/204855
Log:
integrate page lock patch from HEAD
Modified:
user/kmacy/releng_8_page_lock/sys/amd64/amd64/pmap.c
user/kmacy/releng_8_page_lock/sys/amd64/conf/GENERIC
user/kmacy/releng_8_page_lock/sys/amd64/include/pmap.h
user/kmacy/releng_8_page_lock/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
user/kmacy/releng_8_page_lock/sys/dev/md/md.c
user/kmacy/releng_8_page_lock/sys/kern/kern_exec.c
user/kmacy/releng_8_page_lock/sys/kern/kern_subr.c
user/kmacy/releng_8_page_lock/sys/kern/subr_witness.c
user/kmacy/releng_8_page_lock/sys/kern/sys_pipe.c
user/kmacy/releng_8_page_lock/sys/kern/sys_process.c
user/kmacy/releng_8_page_lock/sys/kern/uipc_cow.c
user/kmacy/releng_8_page_lock/sys/kern/uipc_shm.c
user/kmacy/releng_8_page_lock/sys/kern/uipc_syscalls.c
user/kmacy/releng_8_page_lock/sys/kern/vfs_bio.c
user/kmacy/releng_8_page_lock/sys/nfsclient/nfs_bio.c
user/kmacy/releng_8_page_lock/sys/ufs/ffs/ffs_vnops.c
user/kmacy/releng_8_page_lock/sys/vm/device_pager.c
user/kmacy/releng_8_page_lock/sys/vm/pmap.h
user/kmacy/releng_8_page_lock/sys/vm/sg_pager.c
user/kmacy/releng_8_page_lock/sys/vm/swap_pager.c
user/kmacy/releng_8_page_lock/sys/vm/uma_core.c
user/kmacy/releng_8_page_lock/sys/vm/vm_contig.c
user/kmacy/releng_8_page_lock/sys/vm/vm_fault.c
user/kmacy/releng_8_page_lock/sys/vm/vm_glue.c
user/kmacy/releng_8_page_lock/sys/vm/vm_kern.c
user/kmacy/releng_8_page_lock/sys/vm/vm_map.c
user/kmacy/releng_8_page_lock/sys/vm/vm_mmap.c
user/kmacy/releng_8_page_lock/sys/vm/vm_object.c
user/kmacy/releng_8_page_lock/sys/vm/vm_page.c
user/kmacy/releng_8_page_lock/sys/vm/vm_page.h
user/kmacy/releng_8_page_lock/sys/vm/vm_pageout.c
user/kmacy/releng_8_page_lock/sys/vm/vnode_pager.c
Modified: user/kmacy/releng_8_page_lock/sys/amd64/amd64/pmap.c
==============================================================================
--- user/kmacy/releng_8_page_lock/sys/amd64/amd64/pmap.c Mon Mar 8 04:56:39 2010 (r204854)
+++ user/kmacy/releng_8_page_lock/sys/amd64/amd64/pmap.c Mon Mar 8 05:03:24 2010 (r204855)
@@ -165,9 +165,29 @@ __FBSDID("$FreeBSD$");
#define PV_STAT(x) do { } while (0)
#endif
+#define CACHE_LINE_FETCH_SIZE 128
+#define PA_LOCK_PAD CACHE_LINE_FETCH_SIZE
+
+struct vp_lock {
+ struct mtx vp_lock;
+ unsigned char pad[(PA_LOCK_PAD - sizeof(struct mtx))];
+};
+
#define pa_index(pa) ((pa) >> PDRSHIFT)
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
+#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT].vp_lock
+#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa))
+#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa))
+#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa))
+#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a))
+
+#define PA_LOCK_COUNT 64
+
+struct mtx pv_lock __aligned(128);
+struct vp_lock pa_lock[PA_LOCK_COUNT] __aligned(128);
+
+
struct pmap kernel_pmap_store;
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
@@ -184,6 +204,15 @@ static int pg_ps_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
"Are large page mappings enabled?");
+static uint64_t pmap_tryrelock_calls;
+SYSCTL_QUAD(_vm_pmap, OID_AUTO, tryrelock_calls, CTLFLAG_RD,
+ &pmap_tryrelock_calls, 0, "Number of tryrelock calls");
+
+static int pmap_tryrelock_restart;
+SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
+ &pmap_tryrelock_restart, 0, "Number of tryrelock restarts");
+
+
static u_int64_t KPTphys; /* phys addr of kernel level 1 */
static u_int64_t KPDphys; /* phys addr of kernel level 2 */
u_int64_t KPDPphys; /* phys addr of kernel level 3 */
@@ -212,8 +241,9 @@ struct msgbuf *msgbufp = 0;
static caddr_t crashdumpmap;
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
-static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static pv_entry_t get_pv_entry(pmap_t locked_pmap);
+static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct pv_list_head *pv_list);
static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
@@ -222,7 +252,8 @@ static pv_entry_t pmap_pvh_remove(struct
static int pmap_pvh_wired_mappings(struct md_page *pvh, int count);
static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
-static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct pv_list_head *pv_list);
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
vm_offset_t va);
static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
@@ -241,7 +272,7 @@ static boolean_t pmap_protect_pde(pmap_t
vm_prot_t prot);
static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free);
+ vm_page_t *free, struct pv_list_head *pv_list);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free);
static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
@@ -249,14 +280,14 @@ static void pmap_remove_page(pmap_t pmap
vm_page_t *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
vm_offset_t va);
-static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m);
-static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+static vm_page_t pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags);
-static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_pindex_t ptepindex,
+ int flags);
static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_page_t* free);
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
@@ -265,6 +296,76 @@ static vm_offset_t pmap_kmem_choose(vm_o
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+
+#define LS_MAX 4
+struct lock_stack {
+ struct mtx *ls_array[LS_MAX];
+ int ls_top;
+};
+
+static void
+ls_init(struct lock_stack *ls)
+{
+
+ ls->ls_top = 0;
+}
+
+static void
+ls_push(struct lock_stack *ls, struct mtx *lock)
+{
+
+ KASSERT(ls->ls_top < LS_MAX, ("lock stack overflow"));
+
+ ls->ls_array[ls->ls_top] = lock;
+ ls->ls_top++;
+ mtx_lock(lock);
+}
+
+
+static int
+ls_trypush(struct lock_stack *ls, struct mtx *lock)
+{
+
+ KASSERT(ls->ls_top < LS_MAX, ("lock stack overflow"));
+
+ if (mtx_trylock(lock) == 0)
+ return (0);
+
+ ls->ls_array[ls->ls_top] = lock;
+ ls->ls_top++;
+ return (1);
+}
+
+#ifdef notyet
+static void
+ls_pop(struct lock_stack *ls)
+{
+ struct mtx *lock;
+
+ KASSERT(ls->ls_top > 0, ("lock stack underflow"));
+
+ ls->ls_top--;
+ lock = ls->ls_array[ls->ls_top];
+ mtx_unlock(lock);
+}
+#endif
+
+static void
+ls_popa(struct lock_stack *ls)
+{
+ struct mtx *lock;
+
+ KASSERT(ls->ls_top > 0, ("lock stack underflow"));
+
+ while (ls->ls_top > 0) {
+ ls->ls_top--;
+ lock = ls->ls_array[ls->ls_top];
+ mtx_unlock(lock);
+ }
+}
+#ifdef INVARIANTS
+extern void kdb_backtrace(void);
+#endif
/*
* Move the kernel virtual free pointer to the next
* 2MB. This is used to help improve performance
@@ -414,6 +515,37 @@ vtopde(vm_offset_t va)
return (PDmap + ((va >> PDRSHIFT) & mask));
}
+/*
+ * Try to acquire a physical address lock while a pmap is locked. If we
+ * fail to trylock we unlock and lock the pmap directly and cache the
+ * locked pa in *locked. The caller should then restart their loop in case
+ * the virtual to physical mapping has changed.
+ */
+static int
+pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked)
+{
+ vm_paddr_t lockpa;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ atomic_add_long((volatile long *)&pmap_tryrelock_calls, 1);
+ lockpa = *locked;
+ *locked = pa;
+ if (lockpa) {
+ PA_LOCK_ASSERT(lockpa, MA_OWNED);
+ if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa))
+ return (0);
+ PA_UNLOCK(lockpa);
+ }
+ if (PA_TRYLOCK(pa))
+ return 0;
+ PMAP_UNLOCK(pmap);
+ PA_LOCK(pa);
+ PMAP_LOCK(pmap);
+ atomic_add_int((volatile int *)&pmap_tryrelock_restart, 1);
+
+ return (EAGAIN);
+}
+
static u_int64_t
allocpages(vm_paddr_t *firstaddr, int n)
{
@@ -523,6 +655,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
{
vm_offset_t va;
pt_entry_t *pte, *unused;
+ int i;
/*
* Create an initial set of page tables to run the kernel in.
@@ -581,6 +714,13 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* Initialize the PAT MSR. */
pmap_init_pat();
+
+ /* Setup page locks. */
+ for (i = 0; i < PA_LOCK_COUNT; i++)
+ mtx_init(&pa_lock[i].vp_lock, "page lock", NULL,
+ MTX_DEF | MTX_RECURSE | MTX_DUPOK);
+ mtx_init(&pv_lock, "pv list lock", NULL, MTX_DEF);
+
}
/*
@@ -618,6 +758,14 @@ pmap_page_init(vm_page_t m)
m->md.pat_mode = PAT_WRITE_BACK;
}
+struct mtx *
+pmap_page_lockptr(vm_page_t m)
+{
+
+ KASSERT(m != NULL, ("pmap_page_lockptr: NULL page"));
+ return (PA_LOCKPTR(VM_PAGE_TO_PHYS(m)));
+}
+
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
@@ -1017,15 +1165,20 @@ pmap_extract_and_hold(pmap_t pmap, vm_of
{
pd_entry_t pde, *pdep;
pt_entry_t pte;
+ vm_paddr_t pa;
vm_page_t m;
+ pa = 0;
m = NULL;
- vm_page_lock_queues();
PMAP_LOCK(pmap);
+retry:
pdep = pmap_pde(pmap, va);
if (pdep != NULL && (pde = *pdep)) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
+ if (pa_tryrelock(pmap, pde & PG_PS_FRAME, &pa))
+ goto retry;
+
m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
(va & PDRMASK));
vm_page_hold(m);
@@ -1034,12 +1187,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_of
pte = *pmap_pde_to_pte(pdep, va);
if ((pte & PG_V) &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
+ if (pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+ goto retry;
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
vm_page_hold(m);
}
}
}
- vm_page_unlock_queues();
+ if (pa)
+ PA_UNLOCK(pa);
PMAP_UNLOCK(pmap);
return (m);
}
@@ -1437,7 +1593,7 @@ pmap_pinit(pmap_t pmap)
* race conditions.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_pindex_t ptepindex, int flags)
{
vm_page_t m, pdppg, pdpg;
@@ -1452,9 +1608,9 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
if (flags & M_WAITOK) {
PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
+ PA_UNLOCK(pa);
VM_WAIT;
- vm_page_lock_queues();
+ PA_LOCK(pa);
PMAP_LOCK(pmap);
}
@@ -1494,7 +1650,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t
pml4 = &pmap->pm_pml4[pml4index];
if ((*pml4 & PG_V) == 0) {
/* Have to allocate a new pdp, recurse */
- if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
+ if (_pmap_allocpte(pmap, pa, NUPDE + NUPDPE + pml4index,
flags) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count, 1);
@@ -1527,7 +1683,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t
pml4 = &pmap->pm_pml4[pml4index];
if ((*pml4 & PG_V) == 0) {
/* Have to allocate a new pd, recurse */
- if (_pmap_allocpte(pmap, NUPDE + pdpindex,
+ if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex,
flags) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count, 1);
@@ -1541,7 +1697,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t
pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
if ((*pdp & PG_V) == 0) {
/* Have to allocate a new pd, recurse */
- if (_pmap_allocpte(pmap, NUPDE + pdpindex,
+ if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex,
flags) == NULL) {
--m->wire_count;
atomic_subtract_int(&cnt.v_wire_count,
@@ -1568,7 +1724,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t
}
static vm_page_t
-pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags)
{
vm_pindex_t pdpindex, ptepindex;
pdp_entry_t *pdpe;
@@ -1587,7 +1743,7 @@ retry:
/* Allocate a pd page. */
ptepindex = pmap_pde_pindex(va);
pdpindex = ptepindex >> NPDPEPGSHIFT;
- pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags);
+ pdpg = _pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags);
if (pdpg == NULL && (flags & M_WAITOK))
goto retry;
}
@@ -1595,11 +1751,12 @@ retry:
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags)
{
vm_pindex_t ptepindex;
pd_entry_t *pd;
vm_page_t m;
+ struct pv_list_head pv_list;
KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
(flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
@@ -1620,7 +1777,8 @@ retry:
* normal 4K page.
*/
if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
- if (!pmap_demote_pde(pmap, pd, va)) {
+ TAILQ_INIT(&pv_list);
+ if (!pmap_demote_pde(pmap, pd, va, &pv_list)) {
/*
* Invalidation of the 2MB page mapping may have caused
* the deallocation of the underlying PD page.
@@ -1641,7 +1799,7 @@ retry:
* Here if the pte page isn't mapped, or if it has been
* deallocated.
*/
- m = _pmap_allocpte(pmap, ptepindex, flags);
+ m = _pmap_allocpte(pmap, pa, ptepindex, flags);
if (m == NULL && (flags & M_WAITOK))
goto retry;
}
@@ -1847,6 +2005,7 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_coll
* allocate per-page pv entries until repromotion occurs, thereby
* exacerbating the shortage of free pv entries.
*/
+#ifdef nomore
static void
pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
{
@@ -1862,8 +2021,8 @@ pmap_collect(pmap_t locked_pmap, struct
if (m->hold_count || m->busy)
continue;
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- va = pv->pv_va;
pmap = PV_PMAP(pv);
+ va = pv->pv_va;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap)
PMAP_LOCK(pmap);
@@ -1897,7 +2056,7 @@ pmap_collect(pmap_t locked_pmap, struct
}
}
}
-
+#endif
/*
* free the pv_entry back to the free list
@@ -1909,8 +2068,8 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
struct pv_chunk *pc;
int idx, field, bit;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ mtx_lock(&pv_lock);
PV_STAT(pv_entry_frees++);
PV_STAT(pv_entry_spare++);
pv_entry_count--;
@@ -1924,6 +2083,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
pc->pc_map[2] != PC_FREE2) {
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ mtx_unlock(&pv_lock);
return;
}
PV_STAT(pv_entry_spare -= _NPCPV);
@@ -1932,7 +2092,10 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
/* entire chunk is free, return it */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
- vm_page_unwire(m, 0);
+ mtx_unlock(&pv_lock);
+ KASSERT(m->wire_count == 1, ("wire_count == %d", m->wire_count));
+ m->wire_count--;
+ atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free(m);
}
@@ -1941,7 +2104,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
* when needed.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, int try)
+get_pv_entry(pmap_t pmap)
{
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
@@ -1953,7 +2116,7 @@ get_pv_entry(pmap_t pmap, int try)
vm_page_t m;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ mtx_lock(&pv_lock);
PV_STAT(pv_entry_allocs++);
pv_entry_count++;
if (pv_entry_count > pv_entry_high_water)
@@ -1962,7 +2125,6 @@ get_pv_entry(pmap_t pmap, int try)
"increasing either the vm.pmap.shpgperproc or the "
"vm.pmap.pv_entry_max sysctl.\n");
pq = NULL;
-retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
for (field = 0; field < _NPCM; field++) {
@@ -1981,6 +2143,7 @@ retry:
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
}
PV_STAT(pv_entry_spare--);
+ mtx_unlock(&pv_lock);
return (pv);
}
}
@@ -1989,26 +2152,10 @@ retry:
VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
- if (try) {
- pv_entry_count--;
- PV_STAT(pc_chunk_tryfail++);
- return (NULL);
- }
- /*
- * Reclaim pv entries: At first, destroy mappings to inactive
- * pages. After that, if a pv chunk entry is still needed,
- * destroy mappings to active pages.
- */
- if (pq == NULL) {
- PV_STAT(pmap_collect_inactive++);
- pq = &vm_page_queues[PQ_INACTIVE];
- } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
- PV_STAT(pmap_collect_active++);
- pq = &vm_page_queues[PQ_ACTIVE];
- } else
- panic("get_pv_entry: increase vm.pmap.shpgperproc");
- pmap_collect(pmap, pq);
- goto retry;
+ pv_entry_count--;
+ PV_STAT(pc_chunk_tryfail++);
+ mtx_unlock(&pv_lock);
+ return (NULL);
}
PV_STAT(pc_chunk_count++);
PV_STAT(pc_chunk_allocs++);
@@ -2022,9 +2169,64 @@ retry:
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(pv_entry_spare += _NPCPV - 1);
+
+ mtx_unlock(&pv_lock);
return (pv);
}
+static void
+pmap_pv_list_free(pmap_t pmap, struct pv_list_head *pv_list)
+{
+ pv_entry_t pv;
+
+ while (!TAILQ_EMPTY(pv_list)) {
+ pv = TAILQ_FIRST(pv_list);
+ TAILQ_REMOVE(pv_list, pv, pv_list);
+ free_pv_entry(pmap, pv);
+ }
+}
+
+static boolean_t
+pmap_pv_list_alloc(pmap_t pmap, int count, struct pv_list_head *pv_list)
+{
+ pv_entry_t pv;
+ int i;
+ boolean_t slept;
+
+ slept = FALSE;
+ for (i = 0; i < count; i++) {
+ while ((pv = get_pv_entry(pmap)) == NULL) {
+ PMAP_UNLOCK(pmap);
+ slept = TRUE;
+ VM_WAIT;
+ PMAP_LOCK(pmap);
+ }
+ TAILQ_INSERT_HEAD(pv_list, pv, pv_list);
+ }
+
+ return (slept);
+}
+
+static boolean_t
+pmap_pv_list_try_alloc(pmap_t pmap, int count, struct pv_list_head *pv_list)
+{
+ pv_entry_t pv;
+ int i;
+ boolean_t success;
+
+ success = TRUE;
+ for (i = 0; i < count; i++) {
+ if ((pv = get_pv_entry(pmap)) == NULL) {
+ success = FALSE;
+ pmap_pv_list_free(pmap, pv_list);
+ goto done;
+ }
+ TAILQ_INSERT_HEAD(pv_list, pv, pv_list);
+ }
+done:
+ return (success);
+}
+
/*
* First find and then remove the pv entry for the specified pmap and virtual
* address from the specified pv list. Returns the pv entry if found and NULL
@@ -2036,7 +2238,8 @@ pmap_pvh_remove(struct md_page *pvh, pma
{
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
@@ -2052,27 +2255,37 @@ pmap_pvh_remove(struct md_page *pvh, pma
* entries for each of the 4KB page mappings.
*/
static void
-pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct pv_list_head *pv_list)
{
struct md_page *pvh;
pv_entry_t pv;
vm_offset_t va_last;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PA_LOCK_ASSERT(pa, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
- /*
- * Transfer the 2mpage's pv entry for this mapping to the first
- * page's pv list.
- */
+ /* Transfer the 2mpage's pv entry for this mapping to the first
+ * page's pv list.
+ */
pvh = pa_to_pvh(pa);
va = trunc_2mpage(va);
pv = pmap_pvh_remove(pvh, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
m = PHYS_TO_VM_PAGE(pa);
+#ifdef INVARIANTS
+ if (va == 0) {
+ printf("inserting va==0\n");
+ kdb_backtrace();
+ }
+#endif
+ vm_page_lock(m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ vm_page_unlock(m);
+
/* Instantiate the remaining NPTEPG - 1 pv entries. */
va_last = va + NBPDR - PAGE_SIZE;
do {
@@ -2080,8 +2293,20 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offse
KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
("pmap_pv_demote_pde: page %p is not managed", m));
va += PAGE_SIZE;
- pmap_insert_entry(pmap, va, m);
+ pv = TAILQ_FIRST(pv_list);
+ TAILQ_REMOVE(pv_list, pv, pv_list);
+#ifdef INVARIANTS
+ if (va == 0) {
+ printf("inserting va==0\n");
+ kdb_backtrace();
+ }
+#endif
+ pv->pv_va = va;
+ vm_page_lock(m);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+ vm_page_unlock(m);
} while (va < va_last);
+
}
/*
@@ -2097,7 +2322,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offs
vm_offset_t va_last;
vm_page_t m;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PA_LOCK_ASSERT(pa, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
@@ -2143,7 +2368,8 @@ pmap_remove_entry(pmap_t pmap, vm_page_t
{
struct md_page *pvh;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ vm_page_lock_assert(m, MA_OWNED);
+
pmap_pvh_free(&m->md, pmap, va);
if (TAILQ_EMPTY(&m->md.pv_list)) {
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
@@ -2153,22 +2379,6 @@ pmap_remove_entry(pmap_t pmap, vm_page_t
}
/*
- * Create a pv entry for page at pa for
- * (pmap, va).
- */
-static void
-pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
-}
-
-/*
* Conditionally create a pv entry.
*/
static boolean_t
@@ -2177,9 +2387,15 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm
pv_entry_t pv;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ vm_page_lock_assert(m, MA_OWNED);
if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ (pv = get_pv_entry(pmap)) != NULL) {
+#ifdef INVARIANTS
+ if (va == 0) {
+ printf("inserting va==0\n");
+ kdb_backtrace();
+ }
+#endif
pv->pv_va = va;
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
return (TRUE);
@@ -2196,9 +2412,16 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse
struct md_page *pvh;
pv_entry_t pv;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ PA_LOCK_ASSERT(pa, MA_OWNED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
+ (pv = get_pv_entry(pmap)) != NULL) {
+#ifdef INVARIANTS
+ if (va == 0) {
+ printf("inserting va==0\n");
+ kdb_backtrace();
+ }
+#endif
pv->pv_va = va;
pvh = pa_to_pvh(pa);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
@@ -2226,7 +2449,8 @@ pmap_fill_ptp(pt_entry_t *firstpte, pt_e
* mapping is invalidated.
*/
static boolean_t
-pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct pv_list_head *pv_list)
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
@@ -2262,7 +2486,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
free = NULL;
- pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free);
+ pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free, pv_list);
pmap_invalidate_page(pmap, trunc_2mpage(va));
pmap_free_zero_pages(free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
@@ -2272,6 +2496,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
if (va < VM_MAXUSER_ADDRESS)
pmap->pm_stats.resident_count++;
}
+ if (TAILQ_EMPTY(pv_list) && ((oldpde & PG_MANAGED) != 0)) {
+ if (pmap_pv_list_try_alloc(pmap, NPTEPG-1, pv_list) == FALSE)
+ return (FALSE);
+ }
mptepa = VM_PAGE_TO_PHYS(mpte);
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
@@ -2326,7 +2554,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
* the 2mpage to referencing the page table page.
*/
if ((oldpde & PG_MANAGED) != 0)
- pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
+ pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, pv_list);
pmap_pde_demotions++;
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx"
@@ -2339,7 +2567,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
*/
static int
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- vm_page_t *free)
+ vm_page_t *free, struct pv_list_head *pv_list)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -2366,6 +2594,10 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t
eva = sva + NBPDR;
for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
va < eva; va += PAGE_SIZE, m++) {
+ /*
+ * XXX do we need to individually lock each page?
+ *
+ */
if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if (oldpde & PG_A)
@@ -2376,7 +2608,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t
}
}
if (pmap == kernel_pmap) {
- if (!pmap_demote_pde(pmap, pdq, sva))
+ if (!pmap_demote_pde(pmap, pdq, sva, pv_list))
panic("pmap_remove_pde: failed demotion");
} else {
mpte = pmap_lookup_pt_page(pmap, sva);
@@ -2393,6 +2625,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t
return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
}
+
/*
* pmap_remove_pte: do the things to unmap a page in a process
*/
@@ -2416,6 +2649,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
+ vm_page_lock_assert(m, MA_OWNED);
if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if (oldpte & PG_A)
@@ -2432,6 +2666,7 @@ static void
pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free)
{
pt_entry_t *pte;
+ vm_page_t m = NULL;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((*pde & PG_V) == 0)
@@ -2439,10 +2674,94 @@ pmap_remove_page(pmap_t pmap, vm_offset_
pte = pmap_pde_to_pte(pde, va);
if ((*pte & PG_V) == 0)
return;
+ if (*pte & PG_MANAGED) {
+ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+ if (vm_page_trylock(m) == 0) {
+ PMAP_UNLOCK(pmap);
+ vm_page_lock(m);
+ PMAP_LOCK(pmap);
+ }
+ }
pmap_remove_pte(pmap, pte, va, *pde, free);
+ if (m != NULL)
+ vm_page_unlock(m);
pmap_invalidate_page(pmap, va);
}
+static void
+pmap_prealloc_pv_list(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ struct pv_list_head *pv_list)
+{
+ vm_offset_t va_next;
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+ pd_entry_t ptpaddr, *pde;
+ pt_entry_t *pte;
+ int i, alloc_count;
+
+ alloc_count = 0;
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+
+ pml4e = pmap_pml4e(pmap, sva);
+ if ((*pml4e & PG_V) == 0) {
+ va_next = (sva + NBPML4) & ~PML4MASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+ if ((*pdpe & PG_V) == 0) {
+ va_next = (sva + NBPDP) & ~PDPMASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+
+ /*
+ * Calculate index for next page table.
+ */
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva)
+ va_next = eva;
+
+ pde = pmap_pdpe_to_pde(pdpe, sva);
+ ptpaddr = *pde;
+
+ /*
+ * Weed out invalid mappings.
+ */
+ if (ptpaddr == 0)
+ continue;
+
+ /*
+ * Check for large page.
+ */
+ if ((ptpaddr & PG_PS) != 0) {
+ alloc_count++;
+ continue;
+ }
+ /*
+ * Limit our scan to either the end of the va represented
+ * by the current page table page, or to the end of the
+ * range being removed.
+ */
+ if (va_next > eva)
+ va_next = eva;
+
+ for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+ sva += PAGE_SIZE) {
+ if (*pte == 0)
+ continue;
+ }
+ }
+ for (i = 0; i < alloc_count; i++)
+ pmap_pv_list_alloc(pmap, NPTEPG-1, pv_list);
+
+ PMAP_UNLOCK(pmap);
+}
+
/*
* Remove the given range of addresses from the specified map.
*
@@ -2457,7 +2776,9 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
pdp_entry_t *pdpe;
pd_entry_t ptpaddr, *pde;
pt_entry_t *pte;
+ vm_paddr_t pa;
vm_page_t free = NULL;
+ struct pv_list_head pv_list;
int anyvalid;
/*
@@ -2466,11 +2787,19 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
if (pmap->pm_stats.resident_count == 0)
return;
- anyvalid = 0;
+ pa = anyvalid = 0;
+ TAILQ_INIT(&pv_list);
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
+ /*
+ * pre-allocate pvs
+ *
+ */
+ if ((pmap == kernel_pmap) &&
+ (sva + PAGE_SIZE != eva))
+ pmap_prealloc_pv_list(pmap, sva, eva, &pv_list);
+ PMAP_LOCK(pmap);
+restart:
/*
* special handling of removing one page. a very
* common operation and easy to short circuit some
@@ -2525,6 +2854,11 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
+ if (pa_tryrelock(pmap, ptpaddr & PG_FRAME, &pa)) {
+ va_next = sva;
+ continue;
+ }
+
/*
* Are we removing the entire large page? If not,
* demote the mapping and fall through.
@@ -2536,9 +2870,9 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
*/
if ((ptpaddr & PG_G) == 0)
anyvalid = 1;
- pmap_remove_pde(pmap, pde, sva, &free);
+ pmap_remove_pde(pmap, pde, sva, &free, &pv_list);
continue;
- } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ } else if (!pmap_demote_pde(pmap, pde, sva, &pv_list)) {
/* The large page mapping was destroyed. */
continue;
} else
@@ -2555,23 +2889,39 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
sva += PAGE_SIZE) {
+ int ret;
+
if (*pte == 0)
continue;
+ if ((*pte & PG_MANAGED) &&
+ pa_tryrelock(pmap, *pte & PG_FRAME, &pa))
+ goto restart;
+
/*
* The TLB entry for a PG_G mapping is invalidated
* by pmap_remove_pte().
*/
if ((*pte & PG_G) == 0)
anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free))
+ ret = pmap_remove_pte(pmap, pte, sva, ptpaddr, &free);
+
+ if (pa) {
+ PA_UNLOCK(pa);
+ pa = 0;
+ }
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list