git: 88c8cba04395 - main - amd64 pmap: preallocate another page table page in pmap_demote_DMAP()

From: Alan Cox <alc_at_FreeBSD.org>
Date: Sun, 06 Jul 2025 17:09:00 UTC
The branch main has been updated by alc:

URL: https://cgit.FreeBSD.org/src/commit/?id=88c8cba0439599c00d9d3d586a66a035004e6e84

commit 88c8cba0439599c00d9d3d586a66a035004e6e84
Author:     Alan Cox <alc@FreeBSD.org>
AuthorDate: 2025-06-30 07:36:12 +0000
Commit:     Alan Cox <alc@FreeBSD.org>
CommitDate: 2025-07-06 17:08:37 +0000

    amd64 pmap: preallocate another page table page in pmap_demote_DMAP()
    
    To avoid a possible panic in pmap_demote_DMAP(), preallocate the page
    table page that may be needed by pmap_demote_pde() before acquiring the
    kernel pmap lock, so that we can wait until the allocation succeeds.
    
    Reviewed by:    kib
    MFC after:      1 week
    Differential Revision:  https://reviews.freebsd.org/D51091
---
 sys/amd64/amd64/pmap.c | 121 +++++++++++++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 48 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 0044f27729f6..cae5436a1ff2 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1301,6 +1301,8 @@ static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
 static bool	pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static bool	pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
     vm_offset_t va, struct rwlock **lockp);
+static bool	pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde,
+    vm_offset_t va, struct rwlock **lockp, vm_page_t mpte);
 static bool	pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
     vm_offset_t va, vm_page_t m);
 static int	pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
@@ -6010,12 +6012,18 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
 static bool
 pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp)
+{
+	return (pmap_demote_pde_mpte(pmap, pde, va, lockp, NULL));
+}
+
+static bool
+pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp, vm_page_t mpte)
 {
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
 	vm_paddr_t mptepa;
-	vm_page_t mpte;
 	int PG_PTE_CACHE;
 	bool in_kernel;
 
@@ -6028,61 +6036,65 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 	PG_PKU_MASK = pmap_pku_mask_bit(pmap);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	in_kernel = va >= VM_MAXUSER_ADDRESS;
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
-
-	/*
-	 * Invalidate the 2MB page mapping and return "failure" if the
-	 * mapping was never accessed.
-	 */
-	if ((oldpde & PG_A) == 0) {
-		KASSERT((oldpde & PG_W) == 0,
-		    ("pmap_demote_pde: a wired mapping is missing PG_A"));
-		pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
-		return (false);
-	}
-
-	mpte = pmap_remove_pt_page(pmap, va);
+	KASSERT((oldpde & PG_MANAGED) == 0 || lockp != NULL,
+	    ("pmap_demote_pde: lockp for a managed mapping is NULL"));
+	in_kernel = va >= VM_MAXUSER_ADDRESS;
 	if (mpte == NULL) {
-		KASSERT((oldpde & PG_W) == 0,
-		    ("pmap_demote_pde: page table page for a wired mapping"
-		    " is missing"));
-
 		/*
-		 * If the page table page is missing and the mapping
-		 * is for a kernel address, the mapping must belong to
-		 * the direct map.  Page table pages are preallocated
-		 * for every other part of the kernel address space,
-		 * so the direct map region is the only part of the
-		 * kernel address space that must be handled here.
+		 * Invalidate the 2MB page mapping and return "failure" if the
+		 * mapping was never accessed.
 		 */
-		KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
-		    va < DMAP_MAX_ADDRESS),
-		    ("pmap_demote_pde: No saved mpte for va %#lx", va));
-
-		/*
-		 * If the 2MB page mapping belongs to the direct map
-		 * region of the kernel's address space, then the page
-		 * allocation request specifies the highest possible
-		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
-		 * priority is normal.
-		 */
-		mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
-		    (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED);
-
-		/*
-		 * If the allocation of the new page table page fails,
-		 * invalidate the 2MB page mapping and return "failure".
-		 */
-		if (mpte == NULL) {
+		if ((oldpde & PG_A) == 0) {
+			KASSERT((oldpde & PG_W) == 0,
+		    ("pmap_demote_pde: a wired mapping is missing PG_A"));
 			pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
 			return (false);
 		}
 
-		if (!in_kernel)
-			mpte->ref_count = NPTEPG;
+		mpte = pmap_remove_pt_page(pmap, va);
+		if (mpte == NULL) {
+			KASSERT((oldpde & PG_W) == 0,
+    ("pmap_demote_pde: page table page for a wired mapping is missing"));
+
+			/*
+			 * If the page table page is missing and the mapping
+			 * is for a kernel address, the mapping must belong to
+			 * the direct map.  Page table pages are preallocated
+			 * for every other part of the kernel address space,
+			 * so the direct map region is the only part of the
+			 * kernel address space that must be handled here.
+			 */
+			KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
+			    va < DMAP_MAX_ADDRESS),
+			    ("pmap_demote_pde: No saved mpte for va %#lx", va));
+
+			/*
+			 * If the 2MB page mapping belongs to the direct map
+			 * region of the kernel's address space, then the page
+			 * allocation request specifies the highest possible
+			 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
+			 * priority is normal.
+			 */
+			mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
+			    (in_kernel ? VM_ALLOC_INTERRUPT : 0) |
+			    VM_ALLOC_WIRED);
+
+			/*
+			 * If the allocation of the new page table page fails,
+			 * invalidate the 2MB page mapping and return "failure".
+			 */
+			if (mpte == NULL) {
+				pmap_demote_pde_abort(pmap, va, pde, oldpde,
+				    lockp);
+				return (false);
+			}
+
+			if (!in_kernel)
+				mpte->ref_count = NPTEPG;
+		}
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
@@ -9977,8 +9989,8 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
-	vm_page_t m;
 	vm_offset_t va;
+	vm_page_t m, mpte;
 	bool changed;
 
 	if (len == 0)
@@ -9998,6 +10010,11 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
 		 * x86_mr_split_dmap() function.
 		 */
 		m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
+		if (len < NBPDR) {
+			mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED |
+			    VM_ALLOC_WAITOK);
+		} else
+			mpte = NULL;
 
 		PMAP_LOCK(kernel_pmap);
 		pdpe = pmap_pdpe(kernel_pmap, va);
@@ -10014,9 +10031,13 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
 			if ((*pde & X86_PG_V) == 0)
 				panic("pmap_demote_DMAP: invalid PDE");
 			if ((*pde & PG_PS) != 0) {
-				if (!pmap_demote_pde(kernel_pmap, pde, va))
+				mpte->pindex = pmap_pde_pindex(va);
+				pmap_pt_page_count_adj(kernel_pmap, 1);
+				if (!pmap_demote_pde_mpte(kernel_pmap, pde, va,
+				    NULL, mpte))
 					panic("pmap_demote_DMAP: PDE failed");
 				changed = true;
+				mpte = NULL;
 			}
 		}
 		if (changed && invalidate)
@@ -10026,6 +10047,10 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate)
 			vm_page_unwire_noq(m);
 			vm_page_free(m);
 		}
+		if (mpte != NULL) {
+			vm_page_unwire_noq(mpte);
+			vm_page_free(mpte);
+		}
 	}
 }