git: 902ed64fecbe - main - i386 pmap: Adapt recent amd64/arm64 superpage improvements
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 26 Sep 2023 17:41:55 UTC
The branch main has been updated by alc:
URL: https://cgit.FreeBSD.org/src/commit/?id=902ed64fecbe078e1cdd527b97af3958b413da11
commit 902ed64fecbe078e1cdd527b97af3958b413da11
Author: Alan Cox <alc@FreeBSD.org>
AuthorDate: 2023-09-24 18:21:36 +0000
Commit: Alan Cox <alc@FreeBSD.org>
CommitDate: 2023-09-26 17:41:20 +0000
i386 pmap: Adapt recent amd64/arm64 superpage improvements
Don't recompute mpte during promotion.
Optimize MADV_WILLNEED on existing superpages.
Standardize promotion conditions across amd64, arm64, and i386.
Stop requiring the accessed bit for superpage promotion.
Tidy up pmap_promote_pde() calls.
Retire PMAP_INLINE. It's no longer used.
Note: Some of these changes are a prerequisite to fixing a panic that
arises when attempting to create a wired superpage mapping by
pmap_enter(psind=1) (as opposed to promotion).
Reviewed by: kib
MFC after: 1 week
Differential Revision: https://reviews.freebsd.org/D41944
---
sys/i386/i386/pmap.c | 200 ++++++++++++++++++++++++++++++++----------------
sys/i386/include/pmap.h | 2 +-
2 files changed, 137 insertions(+), 65 deletions(-)
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 4198849b1a5a..2d19fc51dd53 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -159,16 +159,6 @@
#endif
#include <machine/pmap_base.h>
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINE extern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
@@ -311,13 +301,14 @@ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static bool pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+ bool allpte_PG_A_set);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
#if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ vm_page_t mpte);
#endif
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
@@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void)
*/
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
- pmap_insert_pt_page(kernel_pmap, mpte, true))
+ pmap_insert_pt_page(kernel_pmap, mpte, true, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
*/
static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+ bool allpte_PG_A_set)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+ KASSERT(promoted || !allpte_PG_A_set,
+ ("a zero-filled PTP can't have PG_A set in every PTE"));
+ mpte->valid = promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
@@ -2843,10 +2847,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
newpte ^= PG_PDE_PAT | PG_PTE_PAT;
/*
- * If the page table page is not leftover from an earlier promotion,
- * initialize it.
+ * If the PTP is not leftover from an earlier promotion or it does not
+ * have PG_A set in every PTE, then fill it. The new PTEs will all
+ * have PG_A set.
*/
- if (vm_page_none_valid(mpte))
+ if (!vm_page_all_valid(mpte))
pmap_fill_ptp(firstpte, newpte);
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
@@ -2854,8 +2859,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
" addresses"));
/*
- * If the mapping has changed attributes, update the page table
- * entries.
+ * If the mapping has changed attributes, update the PTEs.
*/
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
pmap_fill_ptp(firstpte, newpte);
@@ -2985,7 +2989,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
} else {
mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
- KASSERT(vm_page_all_valid(mpte),
+ KASSERT(vm_page_any_valid(mpte),
("pmap_remove_pde: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->ref_count == NPTEPG,
@@ -3469,38 +3473,56 @@ retry:
* pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
* pmap.
*/
-static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+static bool
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte)
{
pd_entry_t newpde;
- pt_entry_t *firstpte, oldpte, pa, *pte;
+ pt_entry_t allpte_PG_A, *firstpte, oldpte, pa, *pte;
#ifdef KTR
vm_offset_t oldpteva;
#endif
- vm_page_t mpte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (!pg_ps_enabled)
+ return (false);
/*
* Examine the first PTE in the specified PTP. Abort if this PTE is
- * either invalid, unused, or does not map the first 4KB physical page
+ * either invalid or does not map the first 4KB physical page
* within a 2- or 4MB page.
*/
firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
setpde:
newpde = *firstpte;
- if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
+ if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
- return;
+ return (false);
}
if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
- return;
+ return (false);
}
+
+ /*
+ * Both here and in the below "for" loop, to allow for repromotion
+ * after MADV_FREE, conditionally write protect a clean PTE before
+ * possibly aborting the promotion due to other PTE attributes. Why?
+ * Suppose that MADV_FREE is applied to a part of a superpage, the
+ * address range [S, E). pmap_advise() will demote the superpage
+ * mapping, destroy the 4KB page mapping at the end of [S, E), and
+ * clear PG_M and PG_A in the PTEs for the rest of [S, E). Later,
+ * imagine that the memory in [S, E) is recycled, but the last 4KB
+ * page in [S, E) is not the last to be rewritten, or simply accessed.
+ * In other words, there is still a 4KB page in [S, E), call it P,
+ * that is writeable but PG_M and PG_A are clear in P's PTE. Unless
+ * we write protect P before aborting the promotion, if and when P is
+ * finally rewritten, there won't be a page fault to trigger
+ * repromotion.
+ */
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
* When PG_M is already clear, PG_RW can be cleared without
@@ -3510,6 +3532,8 @@ setpde:
~PG_RW))
goto setpde;
newpde &= ~PG_RW;
+ CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
+ " in pmap %p", va & ~PDRMASK, pmap);
}
/*
@@ -3517,15 +3541,16 @@ setpde:
* PTE maps an unexpected 4KB physical page or does not have identical
* characteristics to the first PTE.
*/
- pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+ allpte_PG_A = newpde & PG_A;
+ pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
setpte:
oldpte = *pte;
- if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+ if ((oldpte & (PG_FRAME | PG_V)) != pa) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
- return;
+ return (false);
}
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
/*
@@ -3547,28 +3572,39 @@ setpte:
pmap_pde_p_failures++;
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
" in pmap %p", va, pmap);
- return;
+ return (false);
}
+ allpte_PG_A &= oldpte;
pa -= PAGE_SIZE;
}
/*
- * Save the page table page in its current state until the PDE
- * mapping the superpage is demoted by pmap_demote_pde() or
- * destroyed by pmap_remove_pde().
+ * Unless all PTEs have PG_A set, clear it from the superpage mapping,
+ * so that promotions triggered by speculative mappings, such as
+ * pmap_enter_quick(), don't automatically mark the underlying pages
+ * as referenced.
*/
- mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+ newpde &= ~PG_A | allpte_PG_A;
+
+ /*
+ * Save the PTP in its current state until the PDE mapping the
+ * superpage is demoted by pmap_demote_pde() or destroyed by
+ * pmap_remove_pde(). If PG_A is not set in every PTE, then request
+ * that the PTP be refilled on demotion.
+ */
+ if (mpte == NULL)
+ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_promote_pde: page table page is out of range"));
KASSERT(mpte->pindex == va >> PDRSHIFT,
("pmap_promote_pde: page table page's pindex is wrong"));
- if (pmap_insert_pt_page(pmap, mpte, true)) {
+ if (pmap_insert_pt_page(pmap, mpte, true, allpte_PG_A != 0)) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#x in pmap %p", va,
pmap);
- return;
+ return (false);
}
/*
@@ -3596,6 +3632,7 @@ setpte:
pmap_pde_promotions++;
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
" in pmap %p", va, pmap);
+ return (true);
}
#endif /* VM_NRESERVLEVEL > 0 */
@@ -3859,9 +3896,9 @@ unchanged:
* populated, then attempt promotion.
*/
if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
- pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
+ (m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
- pmap_promote_pde(pmap, pde, va);
+ (void)pmap_promote_pde(pmap, pde, va, mpte);
#endif
rv = KERN_SUCCESS;
@@ -3874,11 +3911,11 @@ out:
/*
* Tries to create a read- and/or execute-only 2 or 4 MB page mapping. Returns
- * true if successful. Returns false if (1) a mapping already exists at the
- * specified virtual address or (2) a PV entry cannot be allocated without
- * reclaiming another PV entry.
+ * KERN_SUCCESS if the mapping was created. Otherwise, returns an error
+ * value. See pmap_enter_pde() for the possible error values when "no sleep",
+ * "no replace", and "no reclaim" are specified.
*/
-static bool
+static int
pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
pd_entry_t newpde;
@@ -3895,8 +3932,7 @@ pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
if (pmap != kernel_pmap)
newpde |= PG_U;
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
- PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL) ==
- KERN_SUCCESS);
+ PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL));
}
/*
@@ -3919,11 +3955,14 @@ pmap_every_pte_zero(vm_offset_t va)
/*
* Tries to create the specified 2 or 4 MB page mapping. Returns KERN_SUCCESS
- * if the mapping was created, and either KERN_FAILURE or
- * KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if
- * PMAP_ENTER_NOREPLACE was specified and a mapping already exists at the
- * specified virtual address. Returns KERN_RESOURCE_SHORTAGE if
- * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ * if the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE,
+ * or KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if
+ * PMAP_ENTER_NOREPLACE was specified and a 4 KB page mapping already exists
+ * within the 2 or 4 MB virtual address range starting at the specified virtual
+ * address. Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
+ * 2 or 4 MB page mapping already exists at the specified virtual address.
+ * Returns KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NORECLAIM was specified and a
+ * PV entry allocation failed.
*
* The parameter "m" is only used when creating a managed, writeable mapping.
*/
@@ -3944,12 +3983,19 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
pde = pmap_pde(pmap, va);
oldpde = *pde;
if ((oldpde & PG_V) != 0) {
- if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (pmap !=
- kernel_pmap || (oldpde & PG_PS) != 0 ||
- !pmap_every_pte_zero(va))) {
- CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
- " in pmap %p", va, pmap);
- return (KERN_FAILURE);
+ if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
+ if ((oldpde & PG_PS) != 0) {
+ CTR2(KTR_PMAP,
+ "pmap_enter_pde: no space for va %#lx"
+ " in pmap %p", va, pmap);
+ return (KERN_NO_SPACE);
+ } else if (pmap != kernel_pmap ||
+ !pmap_every_pte_zero(va)) {
+ CTR2(KTR_PMAP,
+ "pmap_enter_pde: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return (KERN_FAILURE);
+ }
}
/* Break the existing mapping(s). */
SLIST_INIT(&free);
@@ -3978,7 +4024,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
- if (pmap_insert_pt_page(pmap, mt, false))
+ if (pmap_insert_pt_page(pmap, mt, false, false))
panic("pmap_enter_pde: trie insert failed");
}
}
@@ -4035,6 +4081,7 @@ __CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
+ int rv;
VM_OBJECT_ASSERT_LOCKED(m_start->object);
@@ -4047,7 +4094,8 @@ __CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end,
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
m->psind == 1 && pg_ps_enabled &&
- pmap_enter_4mpage(pmap, va, m, prot))
+ ((rv = pmap_enter_4mpage(pmap, va, m, prot)) ==
+ KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[NBPDR / PAGE_SIZE - 1];
else
mpte = pmap_enter_quick_locked(pmap, va, m, prot,
@@ -4084,12 +4132,14 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, vm_page_t mpte)
{
pt_entry_t newpte, *pte;
+ pd_entry_t *pde;
KASSERT(pmap != kernel_pmap || !VA_IS_CLEANMAP(va) ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ pde = NULL;
/*
* In the case that a page table page is not
@@ -4109,7 +4159,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
/*
* Get the page directory entry
*/
- ptepa = pmap->pm_pdir[ptepindex];
+ pde = &pmap->pm_pdir[ptepindex];
+ ptepa = *pde;
/*
* If the page table page is mapped, we just increment
@@ -4167,6 +4218,27 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
if (pmap != kernel_pmap)
newpte |= PG_U;
pte_store_zero(pte, newpte);
+
+#if VM_NRESERVLEVEL > 0
+ /*
+ * If both the PTP and the reservation are fully populated, then
+ * attempt promotion.
+ */
+ if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+ (m->flags & PG_FICTITIOUS) == 0 &&
+ vm_reserv_level_iffullpop(m) == 0) {
+ if (pde == NULL)
+ pde = pmap_pde(pmap, va);
+
+ /*
+ * If promotion succeeds, then the next call to this function
+ * should not be given the unmapped PTP as a hint.
+ */
+ if (pmap_promote_pde(pmap, pde, va, mpte))
+ mpte = NULL;
+ }
+#endif
+
sched_unpin();
return (mpte);
}
@@ -4836,7 +4908,7 @@ __CONCAT(PMTYPE, remove_pages)(pmap_t pmap)
}
mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
- KASSERT(vm_page_all_valid(mpte),
+ KASSERT(vm_page_any_valid(mpte),
("pmap_remove_pages: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->ref_count == NPTEPG,
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index daa1012da96d..48125b775619 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -83,7 +83,7 @@
* 4KB (PTE) page mappings have identical settings for the following fields:
*/
#define PG_PTE_PROMOTE (PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
- PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
+ PG_M | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
/*
* Page Protection Exception bits