svn commit: r356350 - head/sys/vm

Jeff Roberson jeff at FreeBSD.org
Sat Jan 4 07:56:29 UTC 2020


Author: jeff
Date: Sat Jan  4 07:56:28 2020
New Revision: 356350
URL: https://svnweb.freebsd.org/changeset/base/356350

Log:
  Sort cross-domain frees into per-domain buckets before inserting these
  onto their respective bucket lists.  This is a several order of magnitude
  improvement in contention on the keg lock under heavy free traffic while
  requiring only an additional bucket per-domain worth of memory.
  
  Discussed with:		markj, rlibby
  Differential Revision:	https://reviews.freebsd.org/D22830

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Sat Jan  4 03:30:08 2020	(r356349)
+++ head/sys/vm/uma_core.c	Sat Jan  4 07:56:28 2020	(r356350)
@@ -951,10 +951,6 @@ cache_drain(uma_zone_t zone)
 	 *
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
-	 *
-	 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
-	 * it is used elsewhere.  Should the tear-down path be made special
-	 * there in some form?
 	 */
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
@@ -974,9 +970,7 @@ cache_drain(uma_zone_t zone)
 			bucket_free(zone, bucket, NULL);
 		}
 	}
-	ZONE_LOCK(zone);
 	bucket_cache_reclaim(zone, true);
-	ZONE_UNLOCK(zone);
 }
 
 static void
@@ -1082,9 +1076,29 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
 	int i;
 
 	for (i = 0; i < vm_ndomains; i++) {
+		/*
+		 * The cross bucket is partially filled and not part of
+		 * the item count.  Reclaim it individually here.
+		 */
 		zdom = &zone->uz_domain[i];
+		ZONE_CROSS_LOCK(zone);
+		bucket = zdom->uzd_cross;
+		zdom->uzd_cross = NULL;
+		ZONE_CROSS_UNLOCK(zone);
+		if (bucket != NULL) {
+			bucket_drain(zone, bucket);
+			bucket_free(zone, bucket, NULL);
+		}
 
 		/*
+		 * Shrink the zone bucket size to ensure that the per-CPU caches
+		 * don't grow too large.
+		 */
+		ZONE_LOCK(zone);
+		if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
+			zone->uz_bucket_size--;
+
+		/*
 		 * If we were asked to drain the zone, we are done only once
 		 * this bucket cache is empty.  Otherwise, we reclaim items in
 		 * excess of the zone's estimated working set size.  If the
@@ -1114,14 +1128,8 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
 			bucket_free(zone, bucket, NULL);
 			ZONE_LOCK(zone);
 		}
+		ZONE_UNLOCK(zone);
 	}
-
-	/*
-	 * Shrink the zone bucket size to ensure that the per-CPU caches
-	 * don't grow too large.
-	 */
-	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
-		zone->uz_bucket_size--;
 }
 
 static void
@@ -1224,8 +1232,8 @@ zone_reclaim(uma_zone_t zone, int waitok, bool drain)
 		msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
 	}
 	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
-	bucket_cache_reclaim(zone, drain);
 	ZONE_UNLOCK(zone);
+	bucket_cache_reclaim(zone, drain);
 
 	/*
 	 * The DRAINING flag protects us from being freed while
@@ -2263,6 +2271,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
 	zone_foreach(zone_count, &cnt);
 	zone->uz_namecnt = cnt.count;
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
+	ZONE_CROSS_LOCK_INIT(zone);
 
 	for (i = 0; i < vm_ndomains; i++)
 		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
@@ -2448,6 +2457,7 @@ zone_dtor(void *arg, int size, void *udata)
 	counter_u64_free(zone->uz_fails);
 	free(zone->uz_ctlname, M_UMA);
 	ZONE_LOCK_FINI(zone);
+	ZONE_CROSS_LOCK_FINI(zone);
 }
 
 /*
@@ -3724,7 +3734,76 @@ zfree_item:
 	zone_free_item(zone, item, udata, SKIP_DTOR);
 }
 
+#ifdef UMA_XDOMAIN
+/*
+ * sort crossdomain free buckets to domain correct buckets and cache
+ * them.
+ */
 static void
+zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
+{
+	struct uma_bucketlist fullbuckets;
+	uma_zone_domain_t zdom;
+	uma_bucket_t b;
+	void *item;
+	int domain;
+
+	CTR3(KTR_UMA,
+	    "uma_zfree: zone %s(%p) draining cross bucket %p",
+	    zone->uz_name, zone, bucket);
+
+	TAILQ_INIT(&fullbuckets);
+
+	/*
+	 * To avoid having ndomain * ndomain buckets for sorting we have a
+	 * lock on the current crossfree bucket.  A full matrix with
+	 * per-domain locking could be used if necessary.
+	 */
+	ZONE_CROSS_LOCK(zone);
+	while (bucket->ub_cnt > 0) {
+		item = bucket->ub_bucket[bucket->ub_cnt - 1];
+		domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+		zdom = &zone->uz_domain[domain];
+		if (zdom->uzd_cross == NULL) {
+			zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
+			if (zdom->uzd_cross == NULL)
+				break;
+		}
+		zdom->uzd_cross->ub_bucket[zdom->uzd_cross->ub_cnt++] = item;
+		if (zdom->uzd_cross->ub_cnt == zdom->uzd_cross->ub_entries) {
+			TAILQ_INSERT_HEAD(&fullbuckets, zdom->uzd_cross,
+			    ub_link);
+			zdom->uzd_cross = NULL;
+		}
+		bucket->ub_cnt--;
+	}
+	ZONE_CROSS_UNLOCK(zone);
+	if (!TAILQ_EMPTY(&fullbuckets)) {
+		ZONE_LOCK(zone);
+		while ((b = TAILQ_FIRST(&fullbuckets)) != NULL) {
+			TAILQ_REMOVE(&fullbuckets, b, ub_link);
+			if (zone->uz_bkt_count >= zone->uz_bkt_max) {
+				ZONE_UNLOCK(zone);
+				bucket_drain(zone, b);
+				bucket_free(zone, b, udata);
+				ZONE_LOCK(zone);
+			} else {
+				domain = _vm_phys_domain(
+				    pmap_kextract(
+				    (vm_offset_t)b->ub_bucket[0]));
+				zdom = &zone->uz_domain[domain];
+				zone_put_bucket(zone, zdom, b, true);
+			}
+		}
+		ZONE_UNLOCK(zone);
+	}
+	if (bucket->ub_cnt != 0)
+		bucket_drain(zone, bucket);
+	bucket_free(zone, bucket, udata);
+}
+#endif
+
+static void
 zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
     int domain, int itemdomain)
 {
@@ -3735,17 +3814,14 @@ zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket,
 	 * Buckets coming from the wrong domain will be entirely for the
 	 * only other domain on two domain systems.  In this case we can
 	 * simply cache them.  Otherwise we need to sort them back to
-	 * correct domains by freeing the contents to the slab layer.
+	 * correct domains.
 	 */
 	if (domain != itemdomain && vm_ndomains > 2) {
-		CTR3(KTR_UMA,
-		    "uma_zfree: zone %s(%p) draining cross bucket %p",
-		    zone->uz_name, zone, bucket);
-		bucket_drain(zone, bucket);
-		bucket_free(zone, bucket, udata);
+		zone_free_cross(zone, bucket, udata);
 		return;
 	}
 #endif
+
 	/*
 	 * Attempt to save the bucket in the zone's domain bucket cache.
 	 *

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h	Sat Jan  4 03:30:08 2020	(r356349)
+++ head/sys/vm/uma_int.h	Sat Jan  4 07:56:28 2020	(r356350)
@@ -399,6 +399,7 @@ TAILQ_HEAD(uma_bucketlist, uma_bucket);
 
 struct uma_zone_domain {
 	struct uma_bucketlist uzd_buckets; /* full buckets */
+	uma_bucket_t	uzd_cross;	/* Fills from cross buckets. */
 	long		uzd_nitems;	/* total item count */
 	long		uzd_imax;	/* maximum item count this period */
 	long		uzd_imin;	/* minimum item count this period */
@@ -449,6 +450,8 @@ struct uma_zone {
 	struct task	uz_maxaction;	/* Task to run when at limit */
 	uint16_t	uz_bucket_size_min; /* Min number of items in bucket */
 
+	struct mtx_padalign	uz_cross_lock;	/* Cross domain free lock */
+
 	/* Offset 256+, stats and misc. */
 	counter_u64_t	uz_allocs;	/* Total number of allocations */
 	counter_u64_t	uz_frees;	/* Total number of frees */
@@ -574,6 +577,12 @@ static __inline uma_slab_t hash_sfind(struct uma_hash 
 #define	ZONE_UNLOCK(z)	mtx_unlock(&(z)->uz_lock)
 #define	ZONE_LOCK_FINI(z)	mtx_destroy(&(z)->uz_lock)
 #define	ZONE_LOCK_ASSERT(z)	mtx_assert(&(z)->uz_lock, MA_OWNED)
+
+#define	ZONE_CROSS_LOCK_INIT(z)					\
+	mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
+#define	ZONE_CROSS_LOCK(z)	mtx_lock(&(z)->uz_cross_lock)
+#define	ZONE_CROSS_UNLOCK(z)	mtx_unlock(&(z)->uz_cross_lock)
+#define	ZONE_CROSS_LOCK_FINI(z)	mtx_destroy(&(z)->uz_cross_lock)
 
 /*
  * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup


More information about the svn-src-all mailing list