svn commit: r356349 - in head: lib/libmemstat sys/vm

Jeff Roberson jeff at FreeBSD.org
Sat Jan 4 03:30:09 UTC 2020


Author: jeff
Date: Sat Jan  4 03:30:08 2020
New Revision: 356349
URL: https://svnweb.freebsd.org/changeset/base/356349

Log:
  Use per-domain keg locks.  This provides both a lock and separate space
  accounting for each NUMA domain.  Independent keg domain locks are important
  with cross-domain frees.  Hashed zones are non-numa and use a single keg
  lock to protect the hash table.
  
  Reviewed by:	markj, rlibby
  Differential Revision:	https://reviews.freebsd.org/D22829

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/lib/libmemstat/memstat_uma.c
==============================================================================
--- head/lib/libmemstat/memstat_uma.c	Sat Jan  4 03:15:34 2020	(r356348)
+++ head/lib/libmemstat/memstat_uma.c	Sat Jan  4 03:30:08 2020	(r356349)
@@ -311,10 +311,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
 	LIST_HEAD(, uma_keg) uma_kegs;
 	struct memory_type *mtp;
 	struct uma_zone_domain uzd;
+	struct uma_domain ukd;
 	struct uma_bucket *ubp, ub;
 	struct uma_cache *ucp, *ucp_array;
 	struct uma_zone *uzp, uz;
 	struct uma_keg *kzp, kz;
+	uint64_t kegfree;
 	int hint_dontsearch, i, mp_maxid, ndomains, ret;
 	char name[MEMTYPE_MAXNAME];
 	cpuset_t all_cpus;
@@ -454,18 +456,29 @@ skip_percpu:
 			for (i = 0; i < ndomains; i++) {
 				ret = kread(kvm, &uz.uz_domain[i], &uzd,
 				   sizeof(uzd), 0);
+				if (ret != 0)
+					continue;
 				for (ubp =
 				    TAILQ_FIRST(&uzd.uzd_buckets);
 				    ubp != NULL;
 				    ubp = TAILQ_NEXT(&ub, ub_link)) {
 					ret = kread(kvm, ubp, &ub,
 					   sizeof(ub), 0);
+					if (ret != 0)
+						continue;
 					mtp->mt_zonefree += ub.ub_cnt;
 				}
 			}
 			if (!((kz.uk_flags & UMA_ZONE_SECONDARY) &&
 			    LIST_FIRST(&kz.uk_zones) != uzp)) {
-				mtp->mt_kegfree = kz.uk_free;
+				kegfree = 0;
+				for (i = 0; i < ndomains; i++) {
+					ret = kread(kvm, &kzp->uk_domain[i],
+					    &ukd, sizeof(ukd), 0);
+					if (ret != 0)
+						kegfree += ukd.ud_free;
+				}
+				mtp->mt_kegfree = kegfree;
 				mtp->mt_free += mtp->mt_kegfree;
 			}
 			mtp->mt_free += mtp->mt_zonefree;

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Sat Jan  4 03:15:34 2020	(r356348)
+++ head/sys/vm/uma_core.c	Sat Jan  4 03:30:08 2020	(r356349)
@@ -740,23 +740,28 @@ static void
 zone_timeout(uma_zone_t zone, void *unused)
 {
 	uma_keg_t keg;
-	u_int slabs;
+	u_int slabs, pages;
 
 	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
 		goto update_wss;
 
 	keg = zone->uz_keg;
-	KEG_LOCK(keg);
+
 	/*
+	 * Hash zones are non-numa by definition so the first domain
+	 * is the only one present.
+	 */
+	KEG_LOCK(keg, 0);
+	pages = keg->uk_domain[0].ud_pages;
+
+	/*
 	 * Expand the keg hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
-	if (keg->uk_flags & UMA_ZONE_HASH &&
-	    (slabs = keg->uk_pages / keg->uk_ppera) >
-	     keg->uk_hash.uh_hashsize) {
+	if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
 		struct uma_hash oldhash;
 		int ret;
@@ -767,9 +772,9 @@ zone_timeout(uma_zone_t zone, void *unused)
 		 * I have to do everything in stages and check for
 		 * races.
 		 */
-		KEG_UNLOCK(keg);
+		KEG_UNLOCK(keg, 0);
 		ret = hash_alloc(&newhash, 1 << fls(slabs));
-		KEG_LOCK(keg);
+		KEG_LOCK(keg, 0);
 		if (ret) {
 			if (hash_expand(&keg->uk_hash, &newhash)) {
 				oldhash = keg->uk_hash;
@@ -777,12 +782,12 @@ zone_timeout(uma_zone_t zone, void *unused)
 			} else
 				oldhash = newhash;
 
-			KEG_UNLOCK(keg);
+			KEG_UNLOCK(keg, 0);
 			hash_free(&oldhash);
-			return;
+			goto update_wss;
 		}
 	}
-	KEG_UNLOCK(keg);
+	KEG_UNLOCK(keg, 0);
 
 update_wss:
 	ZONE_LOCK(zone);
@@ -1166,7 +1171,7 @@ keg_drain(uma_keg_t keg)
 	struct slabhead freeslabs = { 0 };
 	uma_domain_t dom;
 	uma_slab_t slab, tmp;
-	int i;
+	int i, n;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
@@ -1175,33 +1180,27 @@ keg_drain(uma_keg_t keg)
 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 		return;
 
-	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
-	    keg->uk_name, keg, keg->uk_free);
-	KEG_LOCK(keg);
-	if (keg->uk_free == 0)
-		goto finished;
-
 	for (i = 0; i < vm_ndomains; i++) {
+		CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u",
+		    keg->uk_name, keg, i, dom->ud_free);
+		n = 0;
 		dom = &keg->uk_domain[i];
+		KEG_LOCK(keg, i);
 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
 			/* We have nowhere to free these to. */
 			if (slab->us_flags & UMA_SLAB_BOOT)
 				continue;
-
-			LIST_REMOVE(slab, us_link);
-			keg->uk_pages -= keg->uk_ppera;
-			keg->uk_free -= keg->uk_ipers;
-
 			if (keg->uk_flags & UMA_ZONE_HASH)
 				UMA_HASH_REMOVE(&keg->uk_hash, slab);
-
+			n++;
+			LIST_REMOVE(slab, us_link);
 			LIST_INSERT_HEAD(&freeslabs, slab, us_link);
 		}
+		dom->ud_pages -= n * keg->uk_ppera;
+		dom->ud_free -= n * keg->uk_ipers;
+		KEG_UNLOCK(keg, i);
 	}
 
-finished:
-	KEG_UNLOCK(keg);
-
 	while ((slab = LIST_FIRST(&freeslabs)) != NULL) {
 		LIST_REMOVE(slab, us_link);
 		keg_free_slab(keg, slab, keg->uk_ipers);
@@ -1257,9 +1256,9 @@ zone_trim(uma_zone_t zone, void *unused)
 }
 
 /*
- * Allocate a new slab for a keg.  This does not insert the slab onto a list.
- * The keg should be locked on entry and will be dropped and reacquired on
- * return.
+ * Allocate a new slab for a keg and inserts it into the partial slab list.
+ * The keg should be unlocked on entry.  If the allocation succeeds it will
+ * be locked on return.
  *
  * Arguments:
  *	flags   Wait flags for the item initialization routine
@@ -1273,6 +1272,7 @@ static uma_slab_t
 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
     int aflags)
 {
+	uma_domain_t dom;
 	uma_alloc allocf;
 	uma_slab_t slab;
 	unsigned long size;
@@ -1282,10 +1282,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_alloc_slab: domain %d out of range", domain));
-	KEG_LOCK_ASSERT(keg);
-	allocf = keg->uk_allocf;
-	KEG_UNLOCK(keg);
 
+	allocf = keg->uk_allocf;
 	slab = NULL;
 	mem = NULL;
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
@@ -1319,6 +1317,10 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
 	}
 	uma_total_inc(size);
 
+	/* For HASH zones all pages go to the same uma_domain. */
+	if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
+		domain = 0;
+
 	/* Point the slab into the allocated memory */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
@@ -1333,6 +1335,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = sflags;
 	slab->us_domain = domain;
+
 	BIT_FILL(keg->uk_ipers, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
@@ -1348,7 +1351,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
 			goto fail;
 		}
 	}
-	KEG_LOCK(keg);
+	KEG_LOCK(keg, domain);
 
 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
 	    slab, keg->uk_name, keg);
@@ -1356,13 +1359,19 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
 	if (keg->uk_flags & UMA_ZONE_HASH)
 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 
-	keg->uk_pages += keg->uk_ppera;
-	keg->uk_free += keg->uk_ipers;
+	/*
+	 * If we got a slab here it's safe to mark it partially used
+	 * and return.  We assume that the caller is going to remove
+	 * at least one item.
+	 */
+	dom = &keg->uk_domain[domain];
+	LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
+	dom->ud_pages += keg->uk_ppera;
+	dom->ud_free += keg->uk_ipers;
 
 	return (slab);
 
 fail:
-	KEG_LOCK(keg);
 	return (NULL);
 }
 
@@ -1875,15 +1884,14 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 	struct uma_kctor_args *arg = udata;
 	uma_keg_t keg = mem;
 	uma_zone_t zone;
+	int i;
 
 	bzero(keg, size);
 	keg->uk_size = arg->size;
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
-	keg->uk_free = 0;
 	keg->uk_reserve = 0;
-	keg->uk_pages = 0;
 	keg->uk_flags = arg->flags;
 	keg->uk_slabzone = NULL;
 
@@ -1926,6 +1934,15 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 			keg_small_init(keg);
 	}
 
+	/*
+	 * Sets all kegs with memory that comes from the page array to a
+	 * first-touch domain policy.
+	 */
+#ifdef UMA_FIRSTTOUCH
+	if ((keg->uk_flags & UMA_ZONE_HASH) == 0)
+		keg->uk_flags |= UMA_ZONE_NUMA;
+#endif
+
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		keg->uk_slabzone = slabzone;
 
@@ -1954,9 +1971,10 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 		keg->uk_freef = page_free;
 
 	/*
-	 * Initialize keg's lock
+	 * Initialize keg's locks.
 	 */
-	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
+	for (i = 0; i < vm_ndomains; i++)
+		KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * If we're putting the slab header in the actual page we need to
@@ -1983,10 +2001,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 	if (keg->uk_flags & UMA_ZONE_HASH)
 		hash_alloc(&keg->uk_hash, 0);
 
-	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
-	    keg, zone->uz_name, zone,
-	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
-	    keg->uk_free);
+	CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)\n", keg, zone->uz_name, zone);
 
 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 
@@ -2009,6 +2024,7 @@ static void
 zone_alloc_sysctl(uma_zone_t zone, void *unused)
 {
 	uma_zone_domain_t zdom;
+	uma_domain_t dom;
 	uma_keg_t keg;
 	struct sysctl_oid *oid, *domainoid;
 	int domains, i, cnt;
@@ -2056,6 +2072,10 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
 	/*
 	 * keg if present.
 	 */
+	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
+		domains = vm_ndomains;
+	else
+		domains = 1;
 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
 	    "keg", CTLFLAG_RD, NULL, "");
 	keg = zone->uz_keg;
@@ -2074,16 +2094,24 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
 		    "item alignment mask");
-		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
-		    "pages", CTLFLAG_RD, &keg->uk_pages, 0,
-		    "Total pages currently allocated from VM");
-		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
-		    "free", CTLFLAG_RD, &keg->uk_free, 0,
-		    "items free in the slab layer");
 		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
 		    keg, 0, sysctl_handle_uma_slab_efficiency, "I",
 		    "Slab utilization (100 - internal fragmentation %)");
+		domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid),
+		    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
+		for (i = 0; i < domains; i++) {
+			dom = &keg->uk_domain[i];
+			oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
+			    OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD,
+			    NULL, "");
+			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+			    "pages", CTLFLAG_RD, &dom->ud_pages, 0,
+			    "Total pages currently allocated from VM");
+			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+			    "free", CTLFLAG_RD, &dom->ud_free, 0,
+			    "items free in the slab layer");
+		}
 	} else
 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
 		    "name", CTLFLAG_RD, nokeg, "Keg name");
@@ -2114,14 +2142,12 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
 	    "Number of items in the bucket cache");
 
 	/*
-	 * Per-domain information.
+	 * Per-domain zone information.
 	 */
-	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
-		domains = vm_ndomains;
-	else
-		domains = 1;
 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
 	    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
+	if ((zone->uz_flags & UMA_ZONE_NUMA) == 0)
+		domains = 1;
 	for (i = 0; i < domains; i++) {
 		zdom = &zone->uz_domain[i];
 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
@@ -2359,20 +2385,23 @@ static void
 keg_dtor(void *arg, int size, void *udata)
 {
 	uma_keg_t keg;
+	uint32_t free, pages;
+	int i;
 
 	keg = (uma_keg_t)arg;
-	KEG_LOCK(keg);
-	if (keg->uk_free != 0) {
-		printf("Freed UMA keg (%s) was not empty (%d items). "
-		    " Lost %d pages of memory.\n",
-		    keg->uk_name ? keg->uk_name : "",
-		    keg->uk_free, keg->uk_pages);
+	free = pages = 0;
+	for (i = 0; i < vm_ndomains; i++) {
+		free += keg->uk_domain[i].ud_free;
+		pages += keg->uk_domain[i].ud_pages;
+		KEG_LOCK_FINI(keg, i);
 	}
-	KEG_UNLOCK(keg);
+	if (free != 0)
+		printf("Freed UMA keg (%s) was not empty (%u items). "
+		    " Lost %u pages of memory.\n",
+		    keg->uk_name ? keg->uk_name : "",
+		    free, pages);
 
 	hash_free(&keg->uk_hash);
-
-	KEG_LOCK_FINI(keg);
 }
 
 /*
@@ -2659,11 +2688,6 @@ uma_zcreate(const char *name, size_t size, uma_ctor ct
 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
 	    align, name));
 
-	/* Sets all zones to a first-touch domain policy. */
-#ifdef UMA_FIRSTTOUCH
-	flags |= UMA_ZONE_NUMA;
-#endif
-
 	/* This stuff is essential for the zone ctor */
 	memset(&args, 0, sizeof(args));
 	args.name = name;
@@ -3135,7 +3159,7 @@ keg_first_slab(uma_keg_t keg, int domain, bool rr)
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_first_slab: domain %d out of range", domain));
-	KEG_LOCK_ASSERT(keg);
+	KEG_LOCK_ASSERT(keg, domain);
 
 	slab = NULL;
 	start = domain;
@@ -3156,31 +3180,39 @@ keg_first_slab(uma_keg_t keg, int domain, bool rr)
 	return (NULL);
 }
 
+/*
+ * Fetch an existing slab from a free or partial list.  Returns with the
+ * keg domain lock held if a slab was found or unlocked if not.
+ */
 static uma_slab_t
 keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
 {
+	uma_slab_t slab;
 	uint32_t reserve;
 
-	KEG_LOCK_ASSERT(keg);
+	/* HASH has a single free list. */
+	if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
+		domain = 0;
 
+	KEG_LOCK(keg, domain);
 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
-	if (keg->uk_free <= reserve)
+	if (keg->uk_domain[domain].ud_free <= reserve ||
+	    (slab = keg_first_slab(keg, domain, rr)) == NULL) {
+		KEG_UNLOCK(keg, domain);
 		return (NULL);
-	return (keg_first_slab(keg, domain, rr));
+	}
+	return (slab);
 }
 
 static uma_slab_t
 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
 {
 	struct vm_domainset_iter di;
-	uma_domain_t dom;
 	uma_slab_t slab;
 	int aflags, domain;
 	bool rr;
 
 restart:
-	KEG_LOCK_ASSERT(keg);
-
 	/*
 	 * Use the keg's policy if upper layers haven't already specified a
 	 * domain (as happens with first-touch zones).
@@ -3211,23 +3243,13 @@ restart:
 			break;
 
 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
-		/*
-		 * If we got a slab here it's safe to mark it partially used
-		 * and return.  We assume that the caller is going to remove
-		 * at least one item.
-		 */
-		if (slab) {
-			dom = &keg->uk_domain[slab->us_domain];
-			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
+		if (slab != NULL)
 			return (slab);
-		}
 		if (!rr && (flags & M_WAITOK) == 0)
 			break;
 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
 			if ((flags & M_WAITOK) != 0) {
-				KEG_UNLOCK(keg);
 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
-				KEG_LOCK(keg);
 				goto restart;
 			}
 			break;
@@ -3239,9 +3261,9 @@ restart:
 	 * could have while we were unlocked.  Check again before we
 	 * fail.
 	 */
-	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) {
+	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL)
 		return (slab);
-	}
+
 	return (NULL);
 }
 
@@ -3252,18 +3274,18 @@ slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 	void *item;
 	uint8_t freei;
 
-	KEG_LOCK_ASSERT(keg);
+	KEG_LOCK_ASSERT(keg, slab->us_domain);
 
+	dom = &keg->uk_domain[slab->us_domain];
 	freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
 	BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
 	item = slab_item(slab, keg, freei);
 	slab->us_freecount--;
-	keg->uk_free--;
+	dom->ud_free--;
 
 	/* Move this slab to the full list */
 	if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
-		dom = &keg->uk_domain[slab->us_domain];
 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
 	}
 
@@ -3273,6 +3295,7 @@ slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 static int
 zone_import(void *arg, void **bucket, int max, int domain, int flags)
 {
+	uma_domain_t dom;
 	uma_zone_t zone;
 	uma_slab_t slab;
 	uma_keg_t keg;
@@ -3284,7 +3307,6 @@ zone_import(void *arg, void **bucket, int max, int dom
 	zone = arg;
 	slab = NULL;
 	keg = zone->uz_keg;
-	KEG_LOCK(keg);
 	/* Try to keep the buckets totally full */
 	for (i = 0; i < max; ) {
 		if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
@@ -3292,9 +3314,10 @@ zone_import(void *arg, void **bucket, int max, int dom
 #ifdef NUMA
 		stripe = howmany(max, vm_ndomains);
 #endif
+		dom = &keg->uk_domain[slab->us_domain];
 		while (slab->us_freecount && i < max) { 
 			bucket[i++] = slab_alloc_item(keg, slab);
-			if (keg->uk_free <= keg->uk_reserve)
+			if (dom->ud_free <= keg->uk_reserve)
 				break;
 #ifdef NUMA
 			/*
@@ -3310,11 +3333,11 @@ zone_import(void *arg, void **bucket, int max, int dom
 				break;
 #endif
 		}
+		KEG_UNLOCK(keg, slab->us_domain);
 		/* Don't block if we allocated any successfully. */
 		flags &= ~M_WAITOK;
 		flags |= M_NOWAIT;
 	}
-	KEG_UNLOCK(keg);
 
 	return i;
 }
@@ -3866,11 +3889,10 @@ slab_free_item(uma_zone_t zone, uma_slab_t slab, void 
 	uint8_t freei;
 
 	keg = zone->uz_keg;
-	KEG_LOCK_ASSERT(keg);
+	KEG_LOCK_ASSERT(keg, slab->us_domain);
 
-	dom = &keg->uk_domain[slab->us_domain];
-
 	/* Do we need to remove from any lists? */
+	dom = &keg->uk_domain[slab->us_domain];
 	if (slab->us_freecount+1 == keg->uk_ipers) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
@@ -3885,37 +3907,45 @@ slab_free_item(uma_zone_t zone, uma_slab_t slab, void 
 	slab->us_freecount++;
 
 	/* Keg statistics. */
-	keg->uk_free++;
+	dom->ud_free++;
 }
 
 static void
 zone_release(void *arg, void **bucket, int cnt)
 {
+	struct mtx *lock;
 	uma_zone_t zone;
-	void *item;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
+	void *item;
 	int i;
 
 	zone = arg;
 	keg = zone->uz_keg;
-	KEG_LOCK(keg);
+	lock = NULL;
+	if (__predict_false((zone->uz_flags & UMA_ZONE_HASH) != 0))
+		lock = KEG_LOCK(keg, 0);
 	for (i = 0; i < cnt; i++) {
 		item = bucket[i];
-		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
+		if (__predict_true((zone->uz_flags & UMA_ZONE_VTOSLAB) != 0)) {
+			slab = vtoslab((vm_offset_t)item);
+		} else {
 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
-			if (zone->uz_flags & UMA_ZONE_HASH) {
+			if ((zone->uz_flags & UMA_ZONE_HASH) != 0)
 				slab = hash_sfind(&keg->uk_hash, mem);
-			} else {
-				mem += keg->uk_pgoff;
-				slab = (uma_slab_t)mem;
-			}
-		} else
-			slab = vtoslab((vm_offset_t)item);
+			else
+				slab = (uma_slab_t)(mem + keg->uk_pgoff);
+		}
+		if (lock != KEG_LOCKPTR(keg, slab->us_domain)) {
+			if (lock != NULL)
+				mtx_unlock(lock);
+			lock = KEG_LOCK(keg, slab->us_domain);
+		}
 		slab_free_item(zone, slab, item);
 	}
-	KEG_UNLOCK(keg);
+	if (lock != NULL)
+		mtx_unlock(lock);
 }
 
 /*
@@ -4203,7 +4233,6 @@ uma_prealloc(uma_zone_t zone, int items)
 	int aflags, domain, slabs;
 
 	KEG_GET(zone, keg);
-	KEG_LOCK(keg);
 	slabs = items / keg->uk_ipers;
 	if (slabs * keg->uk_ipers < items)
 		slabs++;
@@ -4216,18 +4245,16 @@ uma_prealloc(uma_zone_t zone, int items)
 			    aflags);
 			if (slab != NULL) {
 				dom = &keg->uk_domain[slab->us_domain];
+				LIST_REMOVE(slab, us_link);
 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
 				    us_link);
+				KEG_UNLOCK(keg, slab->us_domain);
 				break;
 			}
-			if (vm_domainset_iter_policy(&di, &domain) != 0) {
-				KEG_UNLOCK(keg);
+			if (vm_domainset_iter_policy(&di, &domain) != 0)
 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
-				KEG_LOCK(keg);
-			}
 		}
 	}
-	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
@@ -4465,6 +4492,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 	uma_keg_t kz;
 	uma_zone_t z;
 	uint64_t items;
+	uint32_t kfree, pages;
 	int count, error, i;
 
 	error = sysctl_wire_old_buffer(req, 0);
@@ -4494,6 +4522,11 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
+		kfree = pages = 0;
+		for (i = 0; i < vm_ndomains; i++) {
+			kfree += kz->uk_domain[i].ud_free;
+			pages += kz->uk_domain[i].ud_pages;
+		}
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			bzero(&uth, sizeof(uth));
 			ZONE_LOCK(z);
@@ -4506,11 +4539,11 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 				uth.uth_pages = (items / kz->uk_ipers) *
 					kz->uk_ppera;
 			} else
-				uth.uth_pages = kz->uk_pages;
+				uth.uth_pages = pages;
 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
 			    kz->uk_ppera;
 			uth.uth_limit = z->uz_max_items;
-			uth.uth_keg_free = z->uz_keg->uk_free;
+			uth.uth_keg_free = kfree;
 
 			/*
 			 * A zone is secondary is it is not the first entry
@@ -4670,9 +4703,9 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
 	keg = zone->uz_keg;
 	if ((keg->uk_flags & UMA_ZONE_HASH) == 0)
 		return ((uma_slab_t)(mem + keg->uk_pgoff));
-	KEG_LOCK(keg);
+	KEG_LOCK(keg, 0);
 	slab = hash_sfind(&keg->uk_hash, mem);
-	KEG_UNLOCK(keg);
+	KEG_UNLOCK(keg, 0);
 
 	return (slab);
 }
@@ -4791,11 +4824,12 @@ get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *al
 	} else
 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
 		    xdomain);
-	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
-	    (LIST_FIRST(&kz->uk_zones) != z)))
-		*cachefree += kz->uk_free;
-	for (i = 0; i < vm_ndomains; i++)
+	for (i = 0; i < vm_ndomains; i++) {
 		*cachefree += z->uz_domain[i].uzd_nitems;
+		if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
+		    (LIST_FIRST(&kz->uk_zones) != z)))
+			*cachefree += kz->uk_domain[i].ud_free;
+	}
 	*used = *allocs - frees;
 	return (((int64_t)*used + *cachefree) * kz->uk_size);
 }

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h	Sat Jan  4 03:15:34 2020	(r356348)
+++ head/sys/vm/uma_int.h	Sat Jan  4 03:30:08 2020	(r356349)
@@ -254,9 +254,12 @@ cache_uz_size(uma_cache_t cache)
  * Per-domain slab lists.  Embedded in the kegs.
  */
 struct uma_domain {
+	struct mtx_padalign ud_lock;	/* Lock for the domain lists. */
 	struct slabhead	ud_part_slab;	/* partially allocated slabs */
 	struct slabhead	ud_free_slab;	/* completely unallocated slabs */
 	struct slabhead ud_full_slab;	/* fully allocated slabs */
+	uint32_t	ud_pages;	/* Total page count */
+	uint32_t	ud_free;	/* Count of items free in slabs */
 } __aligned(CACHE_LINE_SIZE);
 
 typedef struct uma_domain * uma_domain_t;
@@ -268,14 +271,11 @@ typedef struct uma_domain * uma_domain_t;
  *
  */
 struct uma_keg {
-	struct mtx	uk_lock;	/* Lock for the keg. */
 	struct uma_hash	uk_hash;
 	LIST_HEAD(,uma_zone)	uk_zones;	/* Keg's zones */
 
 	struct domainset_ref uk_dr;	/* Domain selection policy. */
 	uint32_t	uk_align;	/* Alignment mask */
-	uint32_t	uk_pages;	/* Total page count */
-	uint32_t	uk_free;	/* Count of items free in slabs */
 	uint32_t	uk_reserve;	/* Number of reserved items. */
 	uint32_t	uk_size;	/* Requested size of each item */
 	uint32_t	uk_rsize;	/* Real size of each item */
@@ -305,8 +305,8 @@ typedef struct uma_keg	* uma_keg_t;
 
 #ifdef _KERNEL
 #define	KEG_ASSERT_COLD(k)						\
-	KASSERT((k)->uk_pages == 0, ("keg %s initialization after use.",\
-	    (k)->uk_name))
+	KASSERT((k)->uk_domain[0].ud_pages == 0,			\
+	    ("keg %s initialization after use.", (k)->uk_name))
 
 /*
  * Free bits per-slab.
@@ -536,20 +536,22 @@ static __inline uma_slab_t hash_sfind(struct uma_hash 
 
 /* Lock Macros */
 
-#define	KEG_LOCK_INIT(k, lc)					\
-	do {							\
-		if ((lc))					\
-			mtx_init(&(k)->uk_lock, (k)->uk_name,	\
-			    (k)->uk_name, MTX_DEF | MTX_DUPOK);	\
-		else						\
-			mtx_init(&(k)->uk_lock, (k)->uk_name,	\
-			    "UMA zone", MTX_DEF | MTX_DUPOK);	\
+#define	KEG_LOCKPTR(k, d)	(struct mtx *)&(k)->uk_domain[(d)].ud_lock
+#define	KEG_LOCK_INIT(k, d, lc)						\
+	do {								\
+		if ((lc))						\
+			mtx_init(KEG_LOCKPTR(k, d), (k)->uk_name,	\
+			    (k)->uk_name, MTX_DEF | MTX_DUPOK);		\
+		else							\
+			mtx_init(KEG_LOCKPTR(k, d), (k)->uk_name,	\
+			    "UMA zone", MTX_DEF | MTX_DUPOK);		\
 	} while (0)
 
-#define	KEG_LOCK_FINI(k)	mtx_destroy(&(k)->uk_lock)
-#define	KEG_LOCK(k)	mtx_lock(&(k)->uk_lock)
-#define	KEG_UNLOCK(k)	mtx_unlock(&(k)->uk_lock)
-#define	KEG_LOCK_ASSERT(k)	mtx_assert(&(k)->uk_lock, MA_OWNED)
+#define	KEG_LOCK_FINI(k, d)	mtx_destroy(KEG_LOCKPTR(k, d))
+#define	KEG_LOCK(k, d)							\
+	({ mtx_lock(KEG_LOCKPTR(k, d)); KEG_LOCKPTR(k, d); })
+#define	KEG_UNLOCK(k, d)	mtx_unlock(KEG_LOCKPTR(k, d))
+#define	KEG_LOCK_ASSERT(k, d)	mtx_assert(KEG_LOCKPTR(k, d), MA_OWNED)
 
 #define	KEG_GET(zone, keg) do {					\
 	(keg) = (zone)->uz_keg;					\


More information about the svn-src-all mailing list