svn commit: r361595 - in head/sys: amd64/amd64 i386/i386 vm

Mark Johnston markj at FreeBSD.org
Thu May 28 19:41:02 UTC 2020


Author: markj
Date: Thu May 28 19:41:00 2020
New Revision: 361595
URL: https://svnweb.freebsd.org/changeset/base/361595

Log:
  Fix boot on systems where NUMA domain 0 is unpopulated.
  
  - Add vm_phys_early_add_seg(), complementing vm_phys_early_alloc(), to
    ensure that segments registered during hammer_time() are placed in the
    right domain.  Otherwise, since the SRAT is not parsed at that point,
    we just add them to domain 0, which may be incorrect and results in a
    domain with only several MB worth of memory.
  - Fix uma_startup1() to try allocating memory for zones from any domain.
    If domain 0 is unpopulated, the allocation will simply fail, resulting
    in a page fault slightly later during boot.
  - Change _vm_phys_domain() to return -1 for addresses not covered by the
    affinity table, and change vm_phys_early_alloc() to handle wildcard
    domains.  This is necessary on amd64, where the page array is dense
    and pmap_page_array_startup() may allocate page table pages for
    non-existent page frames.
  
  Reported and tested by:	Rafael Kitover <rkitover at gmail.com>
  Reviewed by:	cem (earlier version), kib
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D25001

Modified:
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/pmap.c
  head/sys/vm/uma_core.c
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/amd64/amd64/machdep.c	Thu May 28 19:41:00 2020	(r361595)
@@ -1223,7 +1223,7 @@ getmemsize(caddr_t kmdp, u_int64_t first)
 	 * Tell the physical memory allocator about pages used to store
 	 * the kernel and preloaded data.  See kmem_bootstrap_free().
 	 */
-	vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
+	vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 
 	bzero(physmap, sizeof(physmap));
 	physmap_idx = 0;

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/amd64/amd64/pmap.c	Thu May 28 19:41:00 2020	(r361595)
@@ -1700,7 +1700,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	 * are required for promotion of the corresponding kernel virtual
 	 * addresses to superpage mappings.
 	 */
-	vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+	vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
 	/*
 	 * Account for the virtual addresses mapped by create_pagetables().

Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/i386/i386/machdep.c	Thu May 28 19:41:00 2020	(r361595)
@@ -1828,7 +1828,7 @@ getmemsize(int first)
 	 * Tell the physical memory allocator about pages used to store
 	 * the kernel and preloaded data.  See kmem_bootstrap_free().
 	 */
-	vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
+	vm_phys_early_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 
 	TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
 	TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/i386/i386/pmap.c	Thu May 28 19:41:00 2020	(r361595)
@@ -633,7 +633,7 @@ __CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr)
 	 * are required for promotion of the corresponding kernel virtual
 	 * addresses to superpage mappings.
 	 */
-	vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+	vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
 	/*
 	 * Initialize the first available kernel virtual address.

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/vm/uma_core.c	Thu May 28 19:41:00 2020	(r361595)
@@ -2810,6 +2810,7 @@ uma_startup1(vm_offset_t virtual_avail)
 	size_t ksize, zsize, size;
 	uma_keg_t masterkeg;
 	uintptr_t m;
+	int domain;
 	uint8_t pflag;
 
 	bootstart = bootmem = virtual_avail;
@@ -2827,7 +2828,12 @@ uma_startup1(vm_offset_t virtual_avail)
 
 	/* Allocate the zone of zones, zone of kegs, and zone of zones keg. */
 	size = (zsize * 2) + ksize;
-	m = (uintptr_t)startup_alloc(NULL, size, 0, &pflag, M_NOWAIT | M_ZERO);
+	for (domain = 0; domain < vm_ndomains; domain++) {
+		m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag,
+		    M_NOWAIT | M_ZERO);
+		if (m != 0)
+			break;
+	}
 	zones = (uma_zone_t)m;
 	m += zsize;
 	kegs = (uma_zone_t)m;
@@ -3191,6 +3197,17 @@ item_dtor(uma_zone_t zone, void *item, int size, void 
 	}
 }
 
+static int
+item_domain(void *item)
+{
+	int domain;
+
+	domain = _vm_phys_domain(vtophys(item));
+	KASSERT(domain >= 0 && domain < vm_ndomains,
+	    ("%s: unknown domain for item %p", __func__, item));
+	return (domain);
+}
+
 #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)
 #define	UMA_ZALLOC_DEBUG
 static int
@@ -4001,7 +4018,7 @@ uma_zfree_smr(uma_zone_t zone, void *item)
 	itemdomain = 0;
 #ifdef NUMA
 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
-		itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+		itemdomain = item_domain(item);
 #endif
 	critical_enter();
 	do {
@@ -4085,7 +4102,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
 	itemdomain = 0;
 #ifdef NUMA
 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
-		itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+		itemdomain = item_domain(item);
 #endif
 	critical_enter();
 	do {
@@ -4159,7 +4176,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
 	ZONE_CROSS_LOCK(zone);
 	while (bucket->ub_cnt > 0) {
 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
-		domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+		domain = item_domain(item);
 		zdom = ZDOM_GET(zone, domain);
 		if (zdom->uzd_cross == NULL) {
 			zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
@@ -4182,8 +4199,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
 
 	while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
 		STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
-		domain = _vm_phys_domain(pmap_kextract(
-		    (vm_offset_t)b->ub_bucket[0]));
+		domain = item_domain(b->ub_bucket[0]);
 		zone_put_bucket(zone, domain, b, udata, true);
 	}
 }

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/vm/vm_phys.c	Thu May 28 19:41:00 2020	(r361595)
@@ -82,6 +82,8 @@ domainset_t __read_mostly all_domains = DOMAINSET_T_IN
 
 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
 int __read_mostly vm_phys_nsegs;
+static struct vm_phys_seg vm_phys_early_segs[8];
+static int vm_phys_early_nsegs;
 
 struct vm_phys_fictitious_seg;
 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
@@ -653,18 +655,16 @@ _vm_phys_domain(vm_paddr_t pa)
 #ifdef NUMA
 	int i;
 
-	if (vm_ndomains == 1 || mem_affinity == NULL)
+	if (vm_ndomains == 1)
 		return (0);
-
-	/*
-	 * Check for any memory that overlaps.
-	 */
 	for (i = 0; mem_affinity[i].end != 0; i++)
 		if (mem_affinity[i].start <= pa &&
 		    mem_affinity[i].end >= pa)
 			return (mem_affinity[i].domain);
-#endif
+	return (-1);
+#else
 	return (0);
+#endif
 }
 
 /*
@@ -1611,6 +1611,21 @@ vm_phys_avail_split(vm_paddr_t pa, int i)
 	return (0);
 }
 
+void
+vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
+{
+	struct vm_phys_seg *seg;
+
+	if (vm_phys_early_nsegs == -1)
+		panic("%s: called after initialization", __func__);
+	if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
+		panic("%s: ran out of early segments", __func__);
+
+	seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
+	seg->start = start;
+	seg->end = end;
+}
+
 /*
  * This routine allocates NUMA node specific memory before the page
  * allocator is bootstrapped.
@@ -1621,6 +1636,8 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
 	int i, mem_index, biggestone;
 	vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
 
+	KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
+	    ("%s: invalid domain index %d", __func__, domain));
 
 	/*
 	 * Search the mem_affinity array for the biggest address
@@ -1633,11 +1650,11 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
 	mem_end = -1;
 #ifdef NUMA
 	if (mem_affinity != NULL) {
-		for (i = 0; ; i++) {
+		for (i = 0;; i++) {
 			size = mem_affinity[i].end - mem_affinity[i].start;
 			if (size == 0)
 				break;
-			if (mem_affinity[i].domain != domain)
+			if (domain != -1 && mem_affinity[i].domain != domain)
 				continue;
 			if (size > biggestsize) {
 				mem_index = i;
@@ -1699,12 +1716,19 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
 void
 vm_phys_early_startup(void)
 {
+	struct vm_phys_seg *seg;
 	int i;
 
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		phys_avail[i] = round_page(phys_avail[i]);
 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
 	}
+
+	for (i = 0; i < vm_phys_early_nsegs; i++) {
+		seg = &vm_phys_early_segs[i];
+		vm_phys_add_seg(seg->start, seg->end);
+	}
+	vm_phys_early_nsegs = -1;
 
 #ifdef NUMA
 	/* Force phys_avail to be split by domain. */

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h	Thu May 28 19:14:44 2020	(r361594)
+++ head/sys/vm/vm_phys.h	Thu May 28 19:41:00 2020	(r361595)
@@ -103,6 +103,7 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npage
 void vm_phys_set_pool(int pool, vm_page_t m, int order);
 boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
+void vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end);
 vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
 void vm_phys_early_startup(void);
 int vm_phys_avail_largest(void);


More information about the svn-src-head mailing list