svn commit: r351198 - in head/sys: amd64/amd64 amd64/include vm

Jeff Roberson jeff at FreeBSD.org
Sun Aug 18 23:07:58 UTC 2019


Author: jeff
Date: Sun Aug 18 23:07:56 2019
New Revision: 351198
URL: https://svnweb.freebsd.org/changeset/base/351198

Log:
  Allocate amd64's page array using pages and page directory pages from the
  NUMA domain that the pages describe.  Patch original from gallatin.
  
  Reviewed by:	kib
  Tested by:	pho
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D21252

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/pmap.h
  head/sys/amd64/include/vmparam.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/amd64/amd64/pmap.c	Sun Aug 18 23:07:56 2019	(r351198)
@@ -383,6 +383,9 @@ static u_int64_t	DMPDphys;	/* phys addr of direct mapp
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
 static int		ndmpdpphys;	/* number of DMPDPphys pages */
 
+static uint64_t		PAPDPphys;	/* phys addr of page array level 3 */
+static int		npapdpphys;	/* number of PAPDPphys pages */
+
 static vm_paddr_t	KERNend;	/* phys addr of end of bootstrap data */
 
 /*
@@ -1427,6 +1430,16 @@ create_pagetables(vm_paddr_t *firstaddr)
 	pml4_entry_t *p4_p;
 	uint64_t DMPDkernphys;
 
+	npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4);
+	if (npapdpphys > NPAPML4E) {
+		printf("NDMPML4E limits system to %lu GB\n",
+		    (NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page)));
+		npapdpphys = NPAPML4E;
+		Maxmem = atop(NPAPML4E * NBPML4 *
+		    (PAGE_SIZE / sizeof(struct vm_page)));
+	}
+	PAPDPphys = allocpages(firstaddr, npapdpphys);
+
 	/* Allocate page table pages for the direct map */
 	ndmpdp = howmany(ptoa(Maxmem), NBPDP);
 	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
@@ -1573,6 +1586,12 @@ create_pagetables(vm_paddr_t *firstaddr)
 		p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
 		p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V;
 	}
+
+	/* Connect the page array slots up to the pml4. */
+	for (i = 0; i < npapdpphys; i++) {
+		p4_p[PAPML4I + i] = PAPDPphys + ptoa(i);
+		p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+	}
 }
 
 /*
@@ -3387,6 +3406,11 @@ pmap_pinit_pml4(vm_page_t pml4pg)
 		    X86_PG_V;
 	}
 
+	for (i = 0; i < npapdpphys; i++) {
+		pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW |
+		    X86_PG_V;
+	}
+
 	/* install self-referential address mapping entry(s) */
 	pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
 	    X86_PG_A | X86_PG_M;
@@ -3743,6 +3767,8 @@ pmap_release(pmap_t pmap)
 		pmap->pm_pml4[KPML4BASE + i] = 0;
 	for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
 		pmap->pm_pml4[DMPML4I + i] = 0;
+	for (i = 0; i < npapdpphys; i++)
+		pmap->pm_pml4[PAPML4I + i] = 0;
 	pmap->pm_pml4[PML4PML4I] = 0;	/* Recursive Mapping */
 	for (i = 0; i < lm_ents; i++)	/* Large Map */
 		pmap->pm_pml4[LMSPML4I + i] = 0;
@@ -3779,6 +3805,44 @@ kvm_free(SYSCTL_HANDLER_ARGS)
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
+
+void
+pmap_page_array_startup(long pages)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde, newpdir;
+	vm_offset_t va, start, end;
+	vm_paddr_t pa;
+	long pfn;
+	int domain, i;
+
+	vm_page_array_size = pages;
+
+	start = va = PA_MIN_ADDRESS;
+	end = va + (pages * sizeof(struct vm_page));
+	while (va < end) {
+		pfn = first_page + ((va - start) / sizeof(struct vm_page));
+		domain = _vm_phys_domain(ctob(pfn));
+		pdpe = pmap_pdpe(kernel_pmap, va);
+		if ((*pdpe & X86_PG_V) == 0) {
+			pa = vm_phys_early_alloc(domain, PAGE_SIZE);
+			bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
+			*pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW |
+			    X86_PG_A | X86_PG_M);
+			continue; /* try again */
+		}
+		pde = pmap_pdpe_to_pde(pdpe, va);
+		if ((*pde & X86_PG_V) != 0)
+			panic("Unexpected pde");
+		pa = vm_phys_early_alloc(domain, NBPDR);
+		for (i = 0; i < NPDEPG; i++)
+			dump_add_page(pa + (i * PAGE_SIZE));
+		newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A |
+		    X86_PG_M | PG_PS | pg_g | pg_nx);
+		pde_store(pde, newpdir);
+		va += NBPDR;
+	}
+}
 
 /*
  * grow the number of kernel page table entries, if needed

Modified: head/sys/amd64/include/pmap.h
==============================================================================
--- head/sys/amd64/include/pmap.h	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/amd64/include/pmap.h	Sun Aug 18 23:07:56 2019	(r351198)
@@ -201,6 +201,13 @@
 #define	NDMPML4E	8
 
 /*
+ * NPAPML4E is the maximum number of PML4 entries that will be
+ * used to implement the page array.  This should be roughly 3% of
+ * NPDPML4E owing to 3% overhead for struct vm_page.
+ */
+#define	NPAPML4E	1
+
+/*
  * These values control the layout of virtual memory.  The starting address
  * of the direct map, which is controlled by DMPML4I, must be a multiple of
  * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
@@ -219,7 +226,8 @@
 #define	PML4PML4I	(NPML4EPG/2)	/* Index of recursive pml4 mapping */
 
 #define	KPML4BASE	(NPML4EPG-NKPML4E) /* KVM at highest addresses */
-#define	DMPML4I		rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
+#define	PAPML4I		(KPML4BASE-1-NPAPML4E) /* Below KVM */
+#define	DMPML4I		rounddown(PAPML4I-NDMPML4E, NDMPML4E) /* Below pages */
 
 #define	KPML4I		(NPML4EPG-1)
 #define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
@@ -467,6 +475,7 @@ int	pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_off
 	    u_int keyidx, int flags);
 void	pmap_thread_init_invl_gen(struct thread *td);
 int	pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
+void	pmap_page_array_startup(long count);
 #endif /* _KERNEL */
 
 /* Return various clipped indexes for a given VA */

Modified: head/sys/amd64/include/vmparam.h
==============================================================================
--- head/sys/amd64/include/vmparam.h	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/amd64/include/vmparam.h	Sun Aug 18 23:07:56 2019	(r351198)
@@ -160,7 +160,9 @@
  * 0xffff808000000000 - 0xffff847fffffffff   large map (can be tuned up)
  * 0xffff848000000000 - 0xfffff7ffffffffff   unused (large map extends there)
  * 0xfffff80000000000 - 0xfffffbffffffffff   4TB direct map
- * 0xfffffc0000000000 - 0xfffffdffffffffff   unused
+ * 0xfffffc0000000000 - 0xfffffcffffffffff   unused
+ * 0xfffffd0000000000 - 0xfffffd7fffffffff   page array 512GB
+ * 0xfffffd8000000000 - 0xfffffdffffffffff   unused
  * 0xfffffe0000000000 - 0xffffffffffffffff   2TB kernel map
  *
  * Within the kernel map:
@@ -175,6 +177,8 @@
 #define	DMAP_MIN_ADDRESS	KVADDR(DMPML4I, 0, 0, 0)
 #define	DMAP_MAX_ADDRESS	KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
 
+#define	PA_MIN_ADDRESS		KVADDR(PAPML4I, 0, 0, 0)
+
 #define	LARGEMAP_MIN_ADDRESS	KVADDR(LMSPML4I, 0, 0, 0)
 #define	LARGEMAP_MAX_ADDRESS	KVADDR(LMEPML4I + 1, 0, 0, 0)
 
@@ -210,6 +214,12 @@
 	    ("virtual address %#jx not covered by the DMAP",		\
 	    (uintmax_t)x));						\
 	(x) & ~DMAP_MIN_ADDRESS; })
+
+/*
+ * amd64 statically allocates the page array address so that it can
+ * be more easily allocated on the correct memory domains.
+ */
+#define PMAP_HAS_PAGE_ARRAY	1
 
 /*
  * How many physical pages per kmem arena virtual page.

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/vm/vm_page.c	Sun Aug 18 23:07:56 2019	(r351198)
@@ -135,7 +135,11 @@ static int vm_pageproc_waiters;
  */
 vm_page_t bogus_page;
 
+#ifdef PMAP_HAS_PAGE_ARRAY
+vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS;
+#else
 vm_page_t vm_page_array;
+#endif
 long vm_page_array_size;
 long first_page;
 
@@ -522,6 +526,31 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi
 	pmap_page_init(m);
 }
 
+#ifndef PMAP_HAS_PAGE_ARRAY
+static vm_paddr_t
+vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
+{
+	vm_paddr_t new_end;
+
+	/*
+	 * Reserve an unmapped guard page to trap access to vm_page_array[-1].
+	 * However, because this page is allocated from KVM, out-of-bounds
+	 * accesses using the direct map will not be trapped.
+	 */
+	*vaddr += PAGE_SIZE;
+
+	/*
+	 * Allocate physical memory for the page structures, and map it.
+	 */
+	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
+	vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
+	    VM_PROT_READ | VM_PROT_WRITE);
+	vm_page_array_size = page_range;
+
+	return (new_end);
+}
+#endif
+
 /*
  *	vm_page_startup:
  *
@@ -693,6 +722,11 @@ vm_page_startup(vm_offset_t vaddr)
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 
+#ifdef PMAP_HAS_PAGE_ARRAY
+	pmap_page_array_startup(size / PAGE_SIZE);
+	biggestone = vm_phys_avail_largest();
+	end = new_end = phys_avail[biggestone + 1];
+#else
 #ifdef VM_PHYSSEG_DENSE
 	/*
 	 * In the VM_PHYSSEG_DENSE case, the number of pages can account for
@@ -723,31 +757,15 @@ vm_page_startup(vm_offset_t vaddr)
 		}
 	}
 	end = new_end;
+	new_end = vm_page_array_alloc(&vaddr, end, page_range);
+#endif
 
-	/*
-	 * Reserve an unmapped guard page to trap access to vm_page_array[-1].
-	 * However, because this page is allocated from KVM, out-of-bounds
-	 * accesses using the direct map will not be trapped.
-	 */
-	vaddr += PAGE_SIZE;
-
-	/*
-	 * Allocate physical memory for the page structures, and map it.
-	 */
-	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
-	mapped = pmap_map(&vaddr, new_end, end,
-	    VM_PROT_READ | VM_PROT_WRITE);
-	vm_page_array = (vm_page_t)mapped;
-	vm_page_array_size = page_range;
-
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Allocate physical memory for the reservation management system's
 	 * data structures, and map it.
 	 */
-	if (high_avail == end)
-		high_avail = new_end;
-	new_end = vm_reserv_startup(&vaddr, new_end, high_avail);
+	new_end = vm_reserv_startup(&vaddr, new_end);
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
     defined(__riscv)

Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/vm/vm_reserv.c	Sun Aug 18 23:07:56 2019	(r351198)
@@ -1360,10 +1360,23 @@ vm_reserv_size(int level)
  * management system's data structures, in particular, the reservation array.
  */
 vm_paddr_t
-vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
+vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
 {
-	vm_paddr_t new_end;
+	vm_paddr_t new_end, high_water;
 	size_t size;
+	int i;
+
+	high_water = phys_avail[1];
+	for (i = 0; i < vm_phys_nsegs; i++) {
+		if (vm_phys_segs[i].end > high_water)
+			high_water = vm_phys_segs[i].end;
+	}
+
+	/* Skip the first chunk.  It is already accounted for. */
+	for (i = 2; phys_avail[i + 1] != 0; i += 2) {
+		if (phys_avail[i + 1] > high_water)
+			high_water = phys_avail[i + 1];
+	}
 
 	/*
 	 * Calculate the size (in bytes) of the reservation array.  Round up

Modified: head/sys/vm/vm_reserv.h
==============================================================================
--- head/sys/vm/vm_reserv.h	Sun Aug 18 22:20:28 2019	(r351197)
+++ head/sys/vm/vm_reserv.h	Sun Aug 18 23:07:56 2019	(r351198)
@@ -66,8 +66,7 @@ boolean_t	vm_reserv_reclaim_inactive(int domain);
 void		vm_reserv_rename(vm_page_t m, vm_object_t new_object,
 		    vm_object_t old_object, vm_pindex_t old_object_offset);
 int		vm_reserv_size(int level);
-vm_paddr_t	vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
-		    vm_paddr_t high_water);
+vm_paddr_t	vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end);
 vm_page_t	vm_reserv_to_superpage(vm_page_t m);
 
 #endif	/* VM_NRESERVLEVEL > 0 */


More information about the svn-src-head mailing list