svn commit: r351198 - in head/sys: amd64/amd64 amd64/include vm
Jeff Roberson
jeff at FreeBSD.org
Sun Aug 18 23:07:58 UTC 2019
Author: jeff
Date: Sun Aug 18 23:07:56 2019
New Revision: 351198
URL: https://svnweb.freebsd.org/changeset/base/351198
Log:
Allocate amd64's page array using pages and page directory pages from the
NUMA domain that the pages describe. Patch original from gallatin.
Reviewed by: kib
Tested by: pho
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D21252
Modified:
head/sys/amd64/amd64/pmap.c
head/sys/amd64/include/pmap.h
head/sys/amd64/include/vmparam.h
head/sys/vm/vm_page.c
head/sys/vm/vm_reserv.c
head/sys/vm/vm_reserv.h
Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/amd64/amd64/pmap.c Sun Aug 18 23:07:56 2019 (r351198)
@@ -383,6 +383,9 @@ static u_int64_t DMPDphys; /* phys addr of direct mapp
static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
static int ndmpdpphys; /* number of DMPDPphys pages */
+static uint64_t PAPDPphys; /* phys addr of page array level 3 */
+static int npapdpphys; /* number of PAPDPphys pages */
+
static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */
/*
@@ -1427,6 +1430,16 @@ create_pagetables(vm_paddr_t *firstaddr)
pml4_entry_t *p4_p;
uint64_t DMPDkernphys;
+ npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4);
+ if (npapdpphys > NPAPML4E) {
+ printf("NDMPML4E limits system to %lu GB\n",
+ (NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page)));
+ npapdpphys = NPAPML4E;
+ Maxmem = atop(NPAPML4E * NBPML4 *
+ (PAGE_SIZE / sizeof(struct vm_page)));
+ }
+ PAPDPphys = allocpages(firstaddr, npapdpphys);
+
/* Allocate page table pages for the direct map */
ndmpdp = howmany(ptoa(Maxmem), NBPDP);
if (ndmpdp < 4) /* Minimum 4GB of dirmap */
@@ -1573,6 +1586,12 @@ create_pagetables(vm_paddr_t *firstaddr)
p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V;
}
+
+ /* Connect the page array slots up to the pml4. */
+ for (i = 0; i < npapdpphys; i++) {
+ p4_p[PAPML4I + i] = PAPDPphys + ptoa(i);
+ p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+ }
}
/*
@@ -3387,6 +3406,11 @@ pmap_pinit_pml4(vm_page_t pml4pg)
X86_PG_V;
}
+ for (i = 0; i < npapdpphys; i++) {
+ pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW |
+ X86_PG_V;
+ }
+
/* install self-referential address mapping entry(s) */
pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
X86_PG_A | X86_PG_M;
@@ -3743,6 +3767,8 @@ pmap_release(pmap_t pmap)
pmap->pm_pml4[KPML4BASE + i] = 0;
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pml4[DMPML4I + i] = 0;
+ for (i = 0; i < npapdpphys; i++)
+ pmap->pm_pml4[PAPML4I + i] = 0;
pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */
for (i = 0; i < lm_ents; i++) /* Large Map */
pmap->pm_pml4[LMSPML4I + i] = 0;
@@ -3779,6 +3805,44 @@ kvm_free(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_free, "LU", "Amount of KVM free");
+
+void
+pmap_page_array_startup(long pages)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde, newpdir;
+ vm_offset_t va, start, end;
+ vm_paddr_t pa;
+ long pfn;
+ int domain, i;
+
+ vm_page_array_size = pages;
+
+ start = va = PA_MIN_ADDRESS;
+ end = va + (pages * sizeof(struct vm_page));
+ while (va < end) {
+ pfn = first_page + ((va - start) / sizeof(struct vm_page));
+ domain = _vm_phys_domain(ctob(pfn));
+ pdpe = pmap_pdpe(kernel_pmap, va);
+ if ((*pdpe & X86_PG_V) == 0) {
+ pa = vm_phys_early_alloc(domain, PAGE_SIZE);
+ bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
+ *pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW |
+ X86_PG_A | X86_PG_M);
+ continue; /* try again */
+ }
+ pde = pmap_pdpe_to_pde(pdpe, va);
+ if ((*pde & X86_PG_V) != 0)
+ panic("Unexpected pde");
+ pa = vm_phys_early_alloc(domain, NBPDR);
+ for (i = 0; i < NPDEPG; i++)
+ dump_add_page(pa + (i * PAGE_SIZE));
+ newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A |
+ X86_PG_M | PG_PS | pg_g | pg_nx);
+ pde_store(pde, newpdir);
+ va += NBPDR;
+ }
+}
/*
* grow the number of kernel page table entries, if needed
Modified: head/sys/amd64/include/pmap.h
==============================================================================
--- head/sys/amd64/include/pmap.h Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/amd64/include/pmap.h Sun Aug 18 23:07:56 2019 (r351198)
@@ -201,6 +201,13 @@
#define NDMPML4E 8
/*
+ * NPAPML4E is the maximum number of PML4 entries that will be
+ * used to implement the page array. This should be roughly 3% of
+ * NPDPML4E owing to 3% overhead for struct vm_page.
+ */
+#define NPAPML4E 1
+
+/*
* These values control the layout of virtual memory. The starting address
* of the direct map, which is controlled by DMPML4I, must be a multiple of
* its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
@@ -219,7 +226,8 @@
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
-#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
+#define PAPML4I (KPML4BASE-1-NPAPML4E) /* Below KVM */
+#define DMPML4I rounddown(PAPML4I-NDMPML4E, NDMPML4E) /* Below pages */
#define KPML4I (NPML4EPG-1)
#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
@@ -467,6 +475,7 @@ int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_off
u_int keyidx, int flags);
void pmap_thread_init_invl_gen(struct thread *td);
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
+void pmap_page_array_startup(long count);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */
Modified: head/sys/amd64/include/vmparam.h
==============================================================================
--- head/sys/amd64/include/vmparam.h Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/amd64/include/vmparam.h Sun Aug 18 23:07:56 2019 (r351198)
@@ -160,7 +160,9 @@
* 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up)
* 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there)
* 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map
- * 0xfffffc0000000000 - 0xfffffdffffffffff unused
+ * 0xfffffc0000000000 - 0xfffffcffffffffff unused
+ * 0xfffffd0000000000 - 0xfffffd7fffffffff page array 512GB
+ * 0xfffffd8000000000 - 0xfffffdffffffffff unused
* 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map
*
* Within the kernel map:
@@ -175,6 +177,8 @@
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
+#define PA_MIN_ADDRESS KVADDR(PAPML4I, 0, 0, 0)
+
#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
@@ -210,6 +214,12 @@
("virtual address %#jx not covered by the DMAP", \
(uintmax_t)x)); \
(x) & ~DMAP_MIN_ADDRESS; })
+
+/*
+ * amd64 statically allocates the page array address so that it can
+ * be more easily allocated on the correct memory domains.
+ */
+#define PMAP_HAS_PAGE_ARRAY 1
/*
* How many physical pages per kmem arena virtual page.
Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/vm/vm_page.c Sun Aug 18 23:07:56 2019 (r351198)
@@ -135,7 +135,11 @@ static int vm_pageproc_waiters;
*/
vm_page_t bogus_page;
+#ifdef PMAP_HAS_PAGE_ARRAY
+vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS;
+#else
vm_page_t vm_page_array;
+#endif
long vm_page_array_size;
long first_page;
@@ -522,6 +526,31 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi
pmap_page_init(m);
}
+#ifndef PMAP_HAS_PAGE_ARRAY
+static vm_paddr_t
+vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
+{
+ vm_paddr_t new_end;
+
+ /*
+ * Reserve an unmapped guard page to trap access to vm_page_array[-1].
+ * However, because this page is allocated from KVM, out-of-bounds
+ * accesses using the direct map will not be trapped.
+ */
+ *vaddr += PAGE_SIZE;
+
+ /*
+ * Allocate physical memory for the page structures, and map it.
+ */
+ new_end = trunc_page(end - page_range * sizeof(struct vm_page));
+ vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
+ VM_PROT_READ | VM_PROT_WRITE);
+ vm_page_array_size = page_range;
+
+ return (new_end);
+}
+#endif
+
/*
* vm_page_startup:
*
@@ -693,6 +722,11 @@ vm_page_startup(vm_offset_t vaddr)
#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
#endif
+#ifdef PMAP_HAS_PAGE_ARRAY
+ pmap_page_array_startup(size / PAGE_SIZE);
+ biggestone = vm_phys_avail_largest();
+ end = new_end = phys_avail[biggestone + 1];
+#else
#ifdef VM_PHYSSEG_DENSE
/*
* In the VM_PHYSSEG_DENSE case, the number of pages can account for
@@ -723,31 +757,15 @@ vm_page_startup(vm_offset_t vaddr)
}
}
end = new_end;
+ new_end = vm_page_array_alloc(&vaddr, end, page_range);
+#endif
- /*
- * Reserve an unmapped guard page to trap access to vm_page_array[-1].
- * However, because this page is allocated from KVM, out-of-bounds
- * accesses using the direct map will not be trapped.
- */
- vaddr += PAGE_SIZE;
-
- /*
- * Allocate physical memory for the page structures, and map it.
- */
- new_end = trunc_page(end - page_range * sizeof(struct vm_page));
- mapped = pmap_map(&vaddr, new_end, end,
- VM_PROT_READ | VM_PROT_WRITE);
- vm_page_array = (vm_page_t)mapped;
- vm_page_array_size = page_range;
-
#if VM_NRESERVLEVEL > 0
/*
* Allocate physical memory for the reservation management system's
* data structures, and map it.
*/
- if (high_avail == end)
- high_avail = new_end;
- new_end = vm_reserv_startup(&vaddr, new_end, high_avail);
+ new_end = vm_reserv_startup(&vaddr, new_end);
#endif
#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
defined(__riscv)
Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/vm/vm_reserv.c Sun Aug 18 23:07:56 2019 (r351198)
@@ -1360,10 +1360,23 @@ vm_reserv_size(int level)
* management system's data structures, in particular, the reservation array.
*/
vm_paddr_t
-vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
+vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
{
- vm_paddr_t new_end;
+ vm_paddr_t new_end, high_water;
size_t size;
+ int i;
+
+ high_water = phys_avail[1];
+ for (i = 0; i < vm_phys_nsegs; i++) {
+ if (vm_phys_segs[i].end > high_water)
+ high_water = vm_phys_segs[i].end;
+ }
+
+ /* Skip the first chunk. It is already accounted for. */
+ for (i = 2; phys_avail[i + 1] != 0; i += 2) {
+ if (phys_avail[i + 1] > high_water)
+ high_water = phys_avail[i + 1];
+ }
/*
* Calculate the size (in bytes) of the reservation array. Round up
Modified: head/sys/vm/vm_reserv.h
==============================================================================
--- head/sys/vm/vm_reserv.h Sun Aug 18 22:20:28 2019 (r351197)
+++ head/sys/vm/vm_reserv.h Sun Aug 18 23:07:56 2019 (r351198)
@@ -66,8 +66,7 @@ boolean_t vm_reserv_reclaim_inactive(int domain);
void vm_reserv_rename(vm_page_t m, vm_object_t new_object,
vm_object_t old_object, vm_pindex_t old_object_offset);
int vm_reserv_size(int level);
-vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
- vm_paddr_t high_water);
+vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end);
vm_page_t vm_reserv_to_superpage(vm_page_t m);
#endif /* VM_NRESERVLEVEL > 0 */
More information about the svn-src-all
mailing list