svn commit: r329058 - in head/sys: kern vm

Gleb Smirnoff glebius at FreeBSD.org
Fri Feb 9 04:45:40 UTC 2018


Author: glebius
Date: Fri Feb  9 04:45:39 2018
New Revision: 329058
URL: https://svnweb.freebsd.org/changeset/base/329058

Log:
  Fix boot_pages exhaustion on machines with many domains and cores, where
  size of UMA zone allocation is greater than page size. In this case zone
  of zones can not use UMA_MD_SMALL_ALLOC, and we  need to postpone switch
  off of this zone from startup_alloc() until full launch of VM.
  
  o Always supply number of VM zones to uma_startup_count(). On machines
    with UMA_MD_SMALL_ALLOC ignore it completely, unless zsize goes over
    a page. In the latter case account VM zones for number of allocations
    from the zone of zones.
  o Rewrite startup_alloc() so that it will immediately switch off from
    itself any zone that is already capable of running real alloc.
    In worst case scenario we may leak a single page here. See comment
    in uma_startup_count().
  o Hardcode call to uma_startup2() into vm_mem_init(). Otherwise some
    extra SYSINITs, e.g. vm_page_init() may sneak in before.
  o While here, remove uma_boot_pages_mtx. With recent changes to boot
    pages calculation, we are guaranteed to use all of the boot_pages
    in the early single threaded stage.
  
  Reported & tested by:	mav

Modified:
  head/sys/kern/kern_malloc.c
  head/sys/vm/uma_core.c
  head/sys/vm/vm_init.c
  head/sys/vm/vm_page.c

Modified: head/sys/kern/kern_malloc.c
==============================================================================
--- head/sys/kern/kern_malloc.c	Fri Feb  9 03:07:12 2018	(r329057)
+++ head/sys/kern/kern_malloc.c	Fri Feb  9 04:45:39 2018	(r329058)
@@ -96,8 +96,6 @@ __FBSDID("$FreeBSD$");
 dtrace_malloc_probe_func_t	dtrace_malloc_probe;
 #endif
 
-extern void	uma_startup2(void);
-
 #if defined(INVARIANTS) || defined(MALLOC_MAKE_FAILURES) ||		\
     defined(DEBUG_MEMGUARD) || defined(DEBUG_REDZONE)
 #define	MALLOC_DEBUG	1
@@ -928,8 +926,6 @@ mallocinit(void *dummy)
 	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
 
 	kmeminit();
-
-	uma_startup2();
 
 	if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
 		kmem_zmax = KMEM_ZMAX;

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Fri Feb  9 03:07:12 2018	(r329057)
+++ head/sys/vm/uma_core.c	Fri Feb  9 04:45:39 2018	(r329058)
@@ -134,13 +134,10 @@ static struct rwlock_padalign __exclusive_cache_line u
 
 /*
  * Pointer and counter to pool of pages, that is preallocated at
- * startup to bootstrap UMA.  Early zones continue to use the pool
- * until it is depleted, so allocations may happen after boot, thus
- * we need a mutex to protect it.
+ * startup to bootstrap UMA.
  */
 static char *bootmem;
 static int boot_pages;
-static struct mtx uma_boot_pages_mtx;
 
 static struct sx uma_drain_lock;
 
@@ -1081,37 +1078,46 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int do
 	int pages;
 
 	keg = zone_first_keg(zone);
-	pages = howmany(bytes, PAGE_SIZE);
-	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
 
 	/*
-	 * Check our small startup cache to see if it has pages remaining.
+	 * If we are in BOOT_BUCKETS or higher, than switch to real
+	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
 	 */
-	mtx_lock(&uma_boot_pages_mtx);
-	if (pages <= boot_pages) {
-#ifdef DIAGNOSTIC
-		printf("%s from \"%s\", %d boot pages left\n", __func__,
-		    zone->uz_name, boot_pages);
+	switch (booted) {
+		case BOOT_COLD:
+		case BOOT_STRAPPED:
+			break;
+		case BOOT_PAGEALLOC:
+			if (keg->uk_ppera > 1)
+				break;
+		case BOOT_BUCKETS:
+		case BOOT_RUNNING:
+#ifdef UMA_MD_SMALL_ALLOC
+			keg->uk_allocf = (keg->uk_ppera > 1) ?
+			    page_alloc : uma_small_alloc;
+#else
+			keg->uk_allocf = page_alloc;
 #endif
-		mem = bootmem;
-		boot_pages -= pages;
-		bootmem += pages * PAGE_SIZE;
-		mtx_unlock(&uma_boot_pages_mtx);
-		*pflag = UMA_SLAB_BOOT;
-		return (mem);
+			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
 	}
-	mtx_unlock(&uma_boot_pages_mtx);
-	if (booted < BOOT_PAGEALLOC)
-		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
+
 	/*
-	 * Now that we've booted reset these users to their real allocator.
+	 * Check our small startup cache to see if it has pages remaining.
 	 */
-#ifdef UMA_MD_SMALL_ALLOC
-	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
-#else
-	keg->uk_allocf = page_alloc;
+	pages = howmany(bytes, PAGE_SIZE);
+	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
+	if (pages > boot_pages)
+		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
+#ifdef DIAGNOSTIC
+	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
+	    boot_pages);
 #endif
-	return keg->uk_allocf(zone, bytes, domain, pflag, wait);
+	mem = bootmem;
+	boot_pages -= pages;
+	bootmem += pages * PAGE_SIZE;
+	*pflag = UMA_SLAB_BOOT;
+
+	return (mem);
 }
 
 /*
@@ -1789,9 +1795,9 @@ zone_foreach(void (*zfunc)(uma_zone_t))
 #define	UMA_BOOT_ALIGN	32
 static int zsize, ksize;
 int
-uma_startup_count(int zones)
+uma_startup_count(int vm_zones)
 {
-	int pages;
+	int zones, pages;
 
 	ksize = sizeof(struct uma_keg) +
 	    (sizeof(struct uma_domain) * vm_ndomains);
@@ -1806,12 +1812,17 @@ uma_startup_count(int zones)
 	pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
 	    roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
 
-	zones += UMA_BOOT_ZONES;
+#ifdef	UMA_MD_SMALL_ALLOC
+	zones = UMA_BOOT_ZONES;
+#else
+	zones = UMA_BOOT_ZONES + vm_zones;
+	vm_zones = 0;
+#endif
 
 	/* Memory for the rest of startup zones, UMA and VM, ... */
 	if (zsize > UMA_SLAB_SIZE)
-		pages += zones * howmany(roundup2(zsize, UMA_BOOT_ALIGN),
-		    UMA_SLAB_SIZE);
+		pages += (zones + vm_zones) *
+		    howmany(roundup2(zsize, UMA_BOOT_ALIGN), UMA_SLAB_SIZE);
 	else
 		pages += howmany(zones,
 		    UMA_SLAB_SPACE / roundup2(zsize, UMA_BOOT_ALIGN));
@@ -1872,7 +1883,6 @@ uma_startup(void *mem, int npages)
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(kegs, zsize, &args, M_WAITOK);
 
-	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 	bootmem = mem;
 	boot_pages = npages;
 
@@ -1917,6 +1927,9 @@ void
 uma_startup2(void)
 {
 
+#ifdef DIAGNOSTIC
+	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
+#endif
 	booted = BOOT_BUCKETS;
 	sx_init(&uma_drain_lock, "umadrain");
 	bucket_enable();

Modified: head/sys/vm/vm_init.c
==============================================================================
--- head/sys/vm/vm_init.c	Fri Feb  9 03:07:12 2018	(r329057)
+++ head/sys/vm/vm_init.c	Fri Feb  9 04:45:39 2018	(r329058)
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 
 extern void	uma_startup1(void);
+extern void	uma_startup2(void);
 extern void	vm_radix_reserve_kva(void);
 
 #if VM_NRESERVLEVEL > 0
@@ -183,9 +184,9 @@ vm_mem_init(dummy)
 #ifndef	UMA_MD_SMALL_ALLOC
 	/* Set up radix zone to use noobj_alloc. */
 	vm_radix_reserve_kva();
-	/* Announce page availability to UMA. */
-	uma_startup1();
 #endif
+	/* Announce full page availability to UMA. */
+	uma_startup2();
 	kmem_init_zero_region();
 	pmap_init();
 	vm_pager_init();

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Fri Feb  9 03:07:12 2018	(r329057)
+++ head/sys/vm/vm_page.c	Fri Feb  9 04:45:39 2018	(r329058)
@@ -506,16 +506,13 @@ vm_page_startup(vm_offset_t vaddr)
 	 * Allocate memory for use when boot strapping the kernel memory
 	 * allocator.  Tell UMA how many zones we are going to create
 	 * before going fully functional.  UMA will add its zones.
-	 */
-#ifdef UMA_MD_SMALL_ALLOC
-	boot_pages = uma_startup_count(0);
-#else
-	/*
+	 *
 	 * VM startup zones: vmem, vmem_btag, VM OBJECT, RADIX NODE, MAP,
 	 * KMAP ENTRY, MAP ENTRY, VMSPACE.
 	 */
 	boot_pages = uma_startup_count(8);
 
+#ifndef UMA_MD_SMALL_ALLOC
 	/* vmem_startup() calls uma_prealloc(). */
 	boot_pages += vmem_startup_count();
 	/* vm_map_startup() calls uma_prealloc(). */


More information about the svn-src-all mailing list