svn commit: r336288 - in head/sys: i386/i386 i386/include vm

Alan Cox alc at FreeBSD.org
Sat Jul 14 17:20:28 UTC 2018


Author: alc
Date: Sat Jul 14 17:20:27 2018
New Revision: 336288
URL: https://svnweb.freebsd.org/changeset/base/336288

Log:
  Add support for pmap_enter(..., psind=1) to the i386 pmap.  In other words,
  add support for explicitly requesting that pmap_enter() create a 2 or 4 MB
  page mapping.  (Essentially, this feature allows the machine-independent
  layer to create superpage mappings preemptively, and not wait for automatic
  promotion to occur.)
  
  Export pmap_ps_enabled() to the machine-independent layer.
  
  Add a flag to pmap_pv_insert_pde() that specifies whether it should fail or
  reclaim a PV entry when one is not available.
  
  Refactor pmap_enter_pde() into two functions, one by the same name, that is
  a general-purpose function for creating PDE PG_PS mappings, and another,
  pmap_enter_4mpage(), that is used to prefault 2 or 4 MB read- and/or
  execute-only mappings for execve(2), mmap(2), and shmat(2).
  
  Reviewed by:	kib
  Tested by:	pho
  Differential Revision:	https://reviews.freebsd.org/D16246

Modified:
  head/sys/i386/i386/pmap.c
  head/sys/i386/include/pmap.h
  head/sys/vm/vm_fault.c

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c	Sat Jul 14 17:18:17 2018	(r336287)
+++ head/sys/i386/i386/pmap.c	Sat Jul 14 17:20:27 2018	(r336288)
@@ -285,11 +285,18 @@ static struct mtx PMAP2mutex;
 
 int pti;
 
+/*
+ * Internal flags for pmap_enter()'s helper functions.
+ */
+#define	PMAP_ENTER_NORECLAIM	0x1000000	/* Don't reclaim PV entries. */
+#define	PMAP_ENTER_NOREPLACE	0x2000000	/* Don't replace mappings. */
+
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
 static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
-static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static bool	pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde,
+		    u_int flags);
 #if VM_NRESERVLEVEL > 0
 static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 #endif
@@ -299,8 +306,10 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh,
 static int	pmap_pvh_wired_mappings(struct md_page *pvh, int count);
 
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
-    vm_prot_t prot);
+static bool	pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+		    vm_prot_t prot);
+static int	pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
+		    u_int flags, vm_page_t m);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_flush_page(vm_page_t m);
@@ -326,6 +335,8 @@ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *pt
 static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
     struct spglist *free);
+static bool	pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+		    struct spglist *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
@@ -1068,6 +1079,13 @@ pmap_cache_bits(int mode, boolean_t is_pde)
 	return (cache_bits);
 }
 
+bool
+pmap_ps_enabled(pmap_t pmap)
+{
+
+	return (pg_ps_enabled);
+}
+
 /*
  * The caller is responsible for maintaining TLB consistency.
  */
@@ -2702,21 +2720,22 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 
 /*
  * Create the pv entries for each of the pages within a superpage.
  */
-static boolean_t
-pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+static bool
+pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
+	bool noreclaim;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	if (pv_entry_count < pv_entry_high_water && 
-	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
-		pv->pv_va = va;
-		pvh = pa_to_pvh(pa);
-		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
-		return (TRUE);
-	} else
-		return (FALSE);
+	noreclaim = (flags & PMAP_ENTER_NORECLAIM) != 0;
+	if ((noreclaim && pv_entry_count >= pv_entry_high_water) ||
+	    (pv = get_pv_entry(pmap, noreclaim)) == NULL)
+		return (false);
+	pv->pv_va = va;
+	pvh = pa_to_pvh(pde & PG_PS_FRAME);
+	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+	return (true);
 }
 
 /*
@@ -3026,6 +3045,38 @@ pmap_remove_page(pmap_t pmap, vm_offset_t va, struct s
 }
 
 /*
+ * Removes the specified range of addresses from the page table page.
+ */
+static bool
+pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+    struct spglist *free)
+{
+	pt_entry_t *pte;
+	bool anyvalid;
+
+	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	anyvalid = false;
+	for (pte = pmap_pte_quick(pmap, sva); sva != eva; pte++,
+	    sva += PAGE_SIZE) {
+		if (*pte == 0)
+			continue;
+
+		/*
+		 * The TLB entry for a PG_G mapping is invalidated by
+		 * pmap_remove_pte().
+		 */
+		if ((*pte & PG_G) == 0)
+			anyvalid = true;
+
+		if (pmap_remove_pte(pmap, pte, sva, free))
+			break;
+	}
+	return (anyvalid);
+}
+
+/*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
@@ -3036,7 +3087,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t 
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
-	pt_entry_t *pte;
 	struct spglist free;
 	int anyvalid;
 
@@ -3119,20 +3169,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
-		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
-		    sva += PAGE_SIZE) {
-			if (*pte == 0)
-				continue;
-
-			/*
-			 * The TLB entry for a PG_G mapping is invalidated
-			 * by pmap_remove_pte().
-			 */
-			if ((*pte & PG_G) == 0)
-				anyvalid = 1;
-			if (pmap_remove_pte(pmap, pte, sva, &free))
-				break;
-		}
+		if (pmap_remove_ptes(pmap, sva, pdnxt, &free))
+			anyvalid = 1;
 	}
 out:
 	sched_unpin();
@@ -3614,6 +3652,13 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
+	if (psind == 1) {
+		/* Assert the required virtual and physical alignment. */ 
+		KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned"));
+		KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
+		rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m);
+		goto out;
+	}
 
 	pde = pmap_pde(pmap, va);
 	if (pmap != kernel_pmap) {
@@ -3812,48 +3857,111 @@ out:
 }
 
 /*
- * Tries to create a 2- or 4MB page mapping.  Returns TRUE if successful and
- * FALSE otherwise.  Fails if (1) a page table page cannot be allocated without
- * blocking, (2) a mapping already exists at the specified virtual address, or
- * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
+ * Tries to create a read- and/or execute-only 2 or 4 MB page mapping.  Returns
+ * true if successful.  Returns false if (1) a mapping already exists at the
+ * specified virtual address or (2) a PV entry cannot be allocated without
+ * reclaiming another PV entry.
  */
-static boolean_t
-pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+static bool
+pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
-	pd_entry_t *pde, newpde;
+	pd_entry_t newpde;
 
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	pde = pmap_pde(pmap, va);
-	if (*pde != 0) {
-		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
-		    " in pmap %p", va, pmap);
-		return (FALSE);
-	}
 	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 	    PG_PS | PG_V;
-	if ((m->oflags & VPO_UNMANAGED) == 0) {
+	if ((m->oflags & VPO_UNMANAGED) == 0)
 		newpde |= PG_MANAGED;
+#if defined(PAE) || defined(PAE_TABLES)
+	if ((prot & VM_PROT_EXECUTE) == 0)
+		newpde |= pg_nx;
+#endif
+	if (pmap != kernel_pmap)
+		newpde |= PG_U;
+	return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
+	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL) ==
+	    KERN_SUCCESS);
+}
 
+/*
+ * Tries to create the specified 2 or 4 MB page mapping.  Returns KERN_SUCCESS
+ * if the mapping was created, and either KERN_FAILURE or
+ * KERN_RESOURCE_SHORTAGE otherwise.  Returns KERN_FAILURE if
+ * PMAP_ENTER_NOREPLACE was specified and a mapping already exists at the
+ * specified virtual address.  Returns KERN_RESOURCE_SHORTAGE if
+ * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ *
+ * The parameter "m" is only used when creating a managed, writeable mapping.
+ */
+static int
+pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
+    vm_page_t m)
+{
+	struct spglist free;
+	pd_entry_t oldpde, *pde;
+	vm_page_t mt;
+
+	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	KASSERT((newpde & (PG_M | PG_RW)) != PG_RW,
+	    ("pmap_enter_pde: newpde is missing PG_M"));
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	pde = pmap_pde(pmap, va);
+	oldpde = *pde;
+	if ((oldpde & PG_V) != 0) {
+		if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
+			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
+			    " in pmap %p", va, pmap);
+			return (KERN_FAILURE);
+		}
+		/* Break the existing mapping(s). */
+		SLIST_INIT(&free);
+		if ((oldpde & PG_PS) != 0) {
+			/*
+			 * If the PDE resulted from a promotion, then a
+			 * reserved PT page could be freed.
+			 */
+			(void)pmap_remove_pde(pmap, pde, va, &free);
+			if ((oldpde & PG_G) == 0)
+				pmap_invalidate_pde_page(pmap, va, oldpde);
+		} else {
+			if (pmap_remove_ptes(pmap, va, va + NBPDR, &free))
+		               pmap_invalidate_all(pmap);
+		}
+		vm_page_free_pages_toq(&free, true);
+		if (pmap == kernel_pmap) {
+			mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+			if (pmap_insert_pt_page(pmap, mt)) {
+				/*
+				 * XXX Currently, this can't happen because
+				 * we do not perform pmap_enter(psind == 1)
+				 * on the kernel pmap.
+				 */
+				panic("pmap_enter_pde: trie insert failed");
+			}
+		} else
+			KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p",
+			    pde));
+	}
+	if ((newpde & PG_MANAGED) != 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
-		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
+		if (!pmap_pv_insert_pde(pmap, va, newpde, flags)) {
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
-			return (FALSE);
+			return (KERN_RESOURCE_SHORTAGE);
 		}
+		if ((newpde & PG_RW) != 0) {
+			for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
+				vm_page_aflag_set(mt, PGA_WRITEABLE);
+		}
 	}
-#if defined(PAE) || defined(PAE_TABLES)
-	if ((prot & VM_PROT_EXECUTE) == 0)
-		newpde |= pg_nx;
-#endif
-	if (va < VM_MAXUSER_ADDRESS)
-		newpde |= PG_U;
 
 	/*
 	 * Increment counters.
 	 */
+	if ((newpde & PG_W) != 0)
+		pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE;
 	pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 
 	/*
@@ -3865,7 +3973,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t 
 	pmap_pde_mappings++;
 	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
-	return (TRUE);
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -3899,7 +4007,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_o
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    m->psind == 1 && pg_ps_enabled &&
-		    pmap_enter_pde(pmap, va, m, prot))
+		    pmap_enter_4mpage(pmap, va, m, prot))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
@@ -4273,8 +4381,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_
 				continue;
 			if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 			    ((srcptepaddr & PG_MANAGED) == 0 ||
-			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
-			    PG_PS_FRAME))) {
+			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr,
+			    PMAP_ENTER_NORECLAIM))) {
 				dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
 				    ~PG_W;
 				dst_pmap->pm_stats.resident_count +=

Modified: head/sys/i386/include/pmap.h
==============================================================================
--- head/sys/i386/include/pmap.h	Sat Jul 14 17:18:17 2018	(r336287)
+++ head/sys/i386/include/pmap.h	Sat Jul 14 17:20:27 2018	(r336288)
@@ -387,6 +387,7 @@ void	*pmap_mapdev(vm_paddr_t, vm_size_t);
 void	*pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
 boolean_t pmap_page_is_mapped(vm_page_t m);
 void	pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
+bool	pmap_ps_enabled(pmap_t pmap);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
 void	pmap_invalidate_page(pmap_t, vm_offset_t);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Sat Jul 14 17:18:17 2018	(r336287)
+++ head/sys/vm/vm_fault.c	Sat Jul 14 17:20:27 2018	(r336288)
@@ -270,7 +270,7 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
     int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
 	vm_page_t m, m_map;
-#if defined(__amd64__) && VM_NRESERVLEVEL > 0
+#if (defined(__amd64__) || defined(__i386__)) && VM_NRESERVLEVEL > 0
 	vm_page_t m_super;
 	int flags;
 #endif
@@ -284,7 +284,7 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
 		return (KERN_FAILURE);
 	m_map = m;
 	psind = 0;
-#if defined(__amd64__) && VM_NRESERVLEVEL > 0
+#if (defined(__amd64__) || defined(__i386__)) && VM_NRESERVLEVEL > 0
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    (m_super = vm_reserv_to_superpage(m)) != NULL &&
 	    rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start &&
@@ -460,7 +460,7 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
 	    pidx <= pager_last;
 	    pidx += npages, m = vm_page_next(&m[npages - 1])) {
 		vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
-#if defined(__amd64__)
+#if defined(__amd64__) || defined(__i386__)
 		psind = m->psind;
 		if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
 		    pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||


More information about the svn-src-head mailing list