svn commit: r351673 - in head: lib/libmemstat share/man/man9 sys/cddl/compat/opensolaris/kern sys/kern sys/vm

Sun Sep 1 22:22:45 UTC 2019

Author: markj
Date: Sun Sep  1 22:22:43 2019
New Revision: 351673
URL: https://svnweb.freebsd.org/changeset/base/351673

Log:
  Extend uma_reclaim() to permit different reclamation targets.
  
  The page daemon periodically invokes uma_reclaim() to reclaim cached
  items from each zone when the system is under memory pressure.  This
  is important since the size of these caches is unbounded by default.
  However it also results in bursts of high latency when allocating from
  heavily used zones as threads miss in the per-CPU caches and must
  access the keg in order to allocate new items.
  
  With r340405 we maintain an estimate of each zone's usage of its
  (per-NUMA domain) cache of full buckets.  Start making use of this
  estimate to avoid reclaiming the entire cache when under memory
  pressure.  In particular, introduce TRIM, DRAIN and DRAIN_CPU
  verbs for uma_reclaim() and uma_zone_reclaim().  When trimming, only
  items in excess of the estimate are reclaimed.  Draining a zone
  reclaims all of the cached full buckets (the previous behaviour of
  uma_reclaim()), and may further drain the per-CPU caches in extreme
  cases.
  
  Now, when under memory pressure, the page daemon will trim zones
  rather than draining them.  As a result, heavily used zones do not incur
  bursts of bucket cache misses following reclamation, but large, unused
  caches will be reclaimed as before.
  
  Reviewed by:	jeff
  Tested by:	pho (an earlier version)
  MFC after:	2 months
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D16667

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/share/man/man9/Makefile
  head/share/man/man9/zone.9
  head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
  head/sys/kern/kern_mbuf.c
  head/sys/kern/subr_vmem.c
  head/sys/kern/vfs_subr.c
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h
  head/sys/vm/vm_pageout.c

Modified: head/lib/libmemstat/memstat_uma.c
==============================================================================

--- head/lib/libmemstat/memstat_uma.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/lib/libmemstat/memstat_uma.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -474,9 +474,9 @@ skip_percpu:
 				ret = kread(kvm, &uz.uz_domain[i], &uzd,
 				   sizeof(uzd), 0);
 				for (ubp =
-				    LIST_FIRST(&uzd.uzd_buckets);
+				    TAILQ_FIRST(&uzd.uzd_buckets);
 				    ubp != NULL;
-				    ubp = LIST_NEXT(&ub, ub_link)) {
+				    ubp = TAILQ_NEXT(&ub, ub_link)) {
 					ret = kread(kvm, ubp, &ub,
 					   sizeof(ub), 0);
 					mtp->mt_zonefree += ub.ub_cnt;

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/share/man/man9/Makefile	Sun Sep  1 22:22:43 2019	(r351673)
@@ -2281,6 +2281,8 @@ MLINKS+=vrele.9 vput.9 \
 	vrele.9 vunref.9
 MLINKS+=vslock.9 vsunlock.9
 MLINKS+=zone.9 uma.9 \
+	zone.9 uma_prealloc.9 \
+	zone.9 uma_reclaim.9 \
 	zone.9 uma_zalloc.9 \
 	zone.9 uma_zalloc_arg.9 \
 	zone.9 uma_zalloc_domain.9 \
@@ -2296,7 +2298,7 @@ MLINKS+=zone.9 uma.9 \
 	zone.9 uma_zfree_pcpu_arg.9 \
 	zone.9 uma_zone_get_cur.9 \
 	zone.9 uma_zone_get_max.9 \
-	zone.9 uma_zone_prealloc.9 \
+	zone.9 uma_zone_reclaim.9 \
 	zone.9 uma_zone_reserve.9 \
 	zone.9 uma_zone_reserve_kva.9 \
 	zone.9 uma_zone_set_allocf.9 \

Modified: head/share/man/man9/zone.9
==============================================================================
--- head/share/man/man9/zone.9	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/share/man/man9/zone.9	Sun Sep  1 22:22:43 2019	(r351673)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 30, 2019
+.Dd September 1, 2019
 .Dt UMA 9
 .Os
 .Sh NAME
@@ -98,6 +98,10 @@ typedef void (*uma_free)(void *item, vm_size_t size, u
 .Ft void
 .Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
 .Ft void
+.Fn uma_reclaim "int req"
+.Ft void
+.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
+.Ft void
 .Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
 .Ft void
 .Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
@@ -436,6 +440,32 @@ Note that unlike
 does not restrict the use of the pre-allocation to
 .Dv M_USE_RESERVE
 requests.
+.Pp
+The
+.Fn uma_reclaim
+and
+.Fn uma_zone_reclaim
+functions reclaim cached items from UMA zones, releasing unused memory.
+The
+.Fn uma_reclaim
+function reclaims items from all regular zones, while
+.Fn uma_zone_reclaim
+reclaims items only from the specified zone.
+The
+.Fa req
+parameter must be one of three values which specify how aggressively
+items are to be reclaimed:
+.Bl -tag -width indent
+.It Dv UMA_RECLAIM_TRIM
+Reclaim items only in excess of the zone's estimated working set size.
+The working set size is periodically updated and tracks the recent history
+of the zone's usage.
+.It Dv UMA_RECLAIM_DRAIN
+Reclaim all items from the unbounded cache.
+Free items in the per-CPU caches are left alone.
+.It Dv UMA_RECLAIM_DRAIN_CPU
+Reclaim all cached items.
+.El
 .Pp
 The
 .Fn uma_zone_set_allocf

Modified: head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
==============================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -238,14 +238,14 @@ void
 kmem_cache_reap_soon(kmem_cache_t *cache)
 {
 #ifndef KMEM_DEBUG
-	zone_drain(cache->kc_zone);
+	uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN);
 #endif
 }
 
 void
 kmem_reap(void)
 {
-	uma_reclaim();
+	uma_reclaim(UMA_RECLAIM_TRIM);
 }
 #else
 void

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/kern/kern_mbuf.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -711,14 +711,14 @@ mb_dtor_pack(void *mem, int size, void *arg)
 #endif
 	/*
 	 * If there are processes blocked on zone_clust, waiting for pages
-	 * to be freed up, * cause them to be woken up by draining the
-	 * packet zone.  We are exposed to a race here * (in the check for
+	 * to be freed up, cause them to be woken up by draining the
+	 * packet zone.  We are exposed to a race here (in the check for
 	 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
 	 * is deliberate. We don't want to acquire the zone lock for every
 	 * mbuf free.
 	 */
 	if (uma_zone_exhausted_nolock(zone_clust))
-		zone_drain(zone_pack);
+		uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
 }
 
 /*
@@ -1362,7 +1362,7 @@ m_clget(struct mbuf *m, int how)
 	 * we might be able to loosen a few clusters up on the drain.
 	 */
 	if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
-		zone_drain(zone_pack);
+		uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
 		uma_zalloc_arg(zone_clust, m, how);
 	}
 	MBUF_PROBE2(m__clget, m, how);

Modified: head/sys/kern/subr_vmem.c
==============================================================================
--- head/sys/kern/subr_vmem.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/kern/subr_vmem.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -588,7 +588,7 @@ qc_drain(vmem_t *vm)
 
 	qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
 	for (i = 0; i < qcache_idx_max; i++)
-		zone_drain(vm->vm_qcache[i].qc_cache);
+		uma_zone_reclaim(vm->vm_qcache[i].qc_cache, UMA_RECLAIM_DRAIN);
 }
 
 #ifndef UMA_MD_SMALL_ALLOC

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/kern/vfs_subr.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -1321,7 +1321,7 @@ vnlru_proc(void)
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
-			uma_reclaim();
+			uma_reclaim(UMA_RECLAIM_DRAIN);
 		if (done == 0) {
 			if (force == 0 || force == 1) {
 				force = 2;

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/vm/uma.h	Sun Sep  1 22:22:43 2019	(r351673)
@@ -50,8 +50,6 @@ struct uma_zone;
 /* Opaque type used as a handle to the zone */
 typedef struct uma_zone * uma_zone_t;
 
-void zone_drain(uma_zone_t);
-
 /*
  * Item constructor
  *
@@ -438,17 +436,18 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t 
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 
 /*
- * Reclaims unused memory for all zones
+ * Reclaims unused memory
  *
  * Arguments:
- *	None
+ *	req  Reclamation request type.
  * Returns:
  *	None
- *
- * This should only be called by the page out daemon.
  */
-
-void uma_reclaim(void);
+#define	UMA_RECLAIM_DRAIN	1	/* release bucket cache */
+#define	UMA_RECLAIM_DRAIN_CPU	2	/* release bucket and per-CPU caches */
+#define	UMA_RECLAIM_TRIM	3	/* trim bucket cache to WSS */
+void uma_reclaim(int req);
+void uma_zone_reclaim(uma_zone_t, int req);
 
 /*
  * Sets the alignment mask to be used for all zones requesting cache

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/vm/uma_core.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -142,7 +142,7 @@ static struct rwlock_padalign __exclusive_cache_line u
 static char *bootmem;
 static int boot_pages;
 
-static struct sx uma_drain_lock;
+static struct sx uma_reclaim_lock;
 
 /*
  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
@@ -250,7 +250,7 @@ static void pcpu_page_free(void *, vm_size_t, uint8_t)
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
-static void bucket_cache_drain(uma_zone_t zone);
+static void bucket_cache_reclaim(uma_zone_t zone, bool);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
@@ -467,27 +467,36 @@ bucket_zone_drain(void)
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
-		zone_drain(ubz->ubz_zone);
+		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
 }
 
+/*
+ * Attempt to satisfy an allocation by retrieving a full bucket from one of the
+ * zone's caches.
+ */
 static uma_bucket_t
-zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
+zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
 {
 	uma_bucket_t bucket;
 
 	ZONE_LOCK_ASSERT(zone);
 
-	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+	if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
 		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
-		LIST_REMOVE(bucket, ub_link);
+		TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
 		zdom->uzd_nitems -= bucket->ub_cnt;
-		if (ws && zdom->uzd_imin > zdom->uzd_nitems)
+		if (zdom->uzd_imin > zdom->uzd_nitems)
 			zdom->uzd_imin = zdom->uzd_nitems;
 		zone->uz_bkt_count -= bucket->ub_cnt;
 	}
 	return (bucket);
 }
 
+/*
+ * Insert a full bucket into the specified cache.  The "ws" parameter indicates
+ * whether the bucket's contents should be counted as part of the zone's working
+ * set.
+ */
 static void
 zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
     const bool ws)
@@ -497,7 +506,10 @@ zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdo
 	KASSERT(zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow",
 	    __func__, zone));
 
-	LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+	if (ws)
+		TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+	else
+		TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
 	zdom->uzd_nitems += bucket->ub_cnt;
 	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
 		zdom->uzd_imax = zdom->uzd_nitems;
@@ -558,7 +570,7 @@ zone_domain_update_wss(uma_zone_domain_t zdom)
 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
 	wss = zdom->uzd_imax - zdom->uzd_imin;
 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
-	zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
+	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
 }
 
 /*
@@ -609,11 +621,12 @@ zone_timeout(uma_zone_t zone)
 			return;
 		}
 	}
+	KEG_UNLOCK(keg);
 
+	ZONE_LOCK(zone);
 	for (int i = 0; i < vm_ndomains; i++)
 		zone_domain_update_wss(&zone->uz_domain[i]);
-
-	KEG_UNLOCK(keg);
+	ZONE_UNLOCK(zone);
 }
 
 /*
@@ -777,7 +790,7 @@ cache_drain(uma_zone_t zone)
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
 	 *
-	 * XXX: We lock the zone before passing into bucket_cache_drain() as
+	 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
 	 * it is used elsewhere.  Should the tear-down path be made special
 	 * there in some form?
 	 */
@@ -797,7 +810,7 @@ cache_drain(uma_zone_t zone)
 		cache->uc_crossbucket = NULL;
 	}
 	ZONE_LOCK(zone);
-	bucket_cache_drain(zone);
+	bucket_cache_reclaim(zone, true);
 	ZONE_UNLOCK(zone);
 }
 
@@ -869,7 +882,7 @@ cache_drain_safe_cpu(uma_zone_t zone)
  * Zone lock must not be held on call this function.
  */
 static void
-cache_drain_safe(uma_zone_t zone)
+pcpu_cache_drain_safe(uma_zone_t zone)
 {
 	int cpu;
 
@@ -897,22 +910,46 @@ cache_drain_safe(uma_zone_t zone)
 }
 
 /*
- * Drain the cached buckets from a zone.  Expects a locked zone on entry.
+ * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
+ * requested a drain, otherwise the per-domain caches are trimmed to either
+ * estimated working set size.
  */
 static void
-bucket_cache_drain(uma_zone_t zone)
+bucket_cache_reclaim(uma_zone_t zone, bool drain)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
+	long target, tofree;
 	int i;
 
-	/*
-	 * Drain the bucket queues and free the buckets.
-	 */
 	for (i = 0; i < vm_ndomains; i++) {
 		zdom = &zone->uz_domain[i];
-		while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
-		    NULL) {
+
+		/*
+		 * If we were asked to drain the zone, we are done only once
+		 * this bucket cache is empty.  Otherwise, we reclaim items in
+		 * excess of the zone's estimated working set size.  If the
+		 * difference nitems - imin is larger than the WSS estimate,
+		 * then the estimate will grow at the end of this interval and
+		 * we ignore the historical average.
+		 */
+		target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
+		    zdom->uzd_imin);
+		while (zdom->uzd_nitems > target) {
+			bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
+			if (bucket == NULL)
+				break;
+			tofree = bucket->ub_cnt;
+			TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
+			zdom->uzd_nitems -= tofree;
+
+			/*
+			 * Shift the bounds of the current WSS interval to avoid
+			 * perturbing the estimate.
+			 */
+			zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
+			zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
+
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
@@ -921,8 +958,8 @@ bucket_cache_drain(uma_zone_t zone)
 	}
 
 	/*
-	 * Shrink further bucket sizes.  Price of single zone lock collision
-	 * is probably lower then price of global cache drain.
+	 * Shrink the zone bucket size to ensure that the per-CPU caches
+	 * don't grow too large.
 	 */
 	if (zone->uz_count > zone->uz_count_min)
 		zone->uz_count--;
@@ -1020,7 +1057,7 @@ finished:
 }
 
 static void
-zone_drain_wait(uma_zone_t zone, int waitok)
+zone_reclaim(uma_zone_t zone, int waitok, bool drain)
 {
 
 	/*
@@ -1030,14 +1067,15 @@ zone_drain_wait(uma_zone_t zone, int waitok)
 	 * when it wakes up.
 	 */
 	ZONE_LOCK(zone);
-	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
+	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
 		if (waitok == M_NOWAIT)
 			goto out;
 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
 	}
-	zone->uz_flags |= UMA_ZFLAG_DRAINING;
-	bucket_cache_drain(zone);
+	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
+	bucket_cache_reclaim(zone, drain);
 	ZONE_UNLOCK(zone);
+
 	/*
 	 * The DRAINING flag protects us from being freed while
 	 * we're running.  Normally the uma_rwlock would protect us but we
@@ -1045,19 +1083,26 @@ zone_drain_wait(uma_zone_t zone, int waitok)
 	 */
 	keg_drain(zone->uz_keg);
 	ZONE_LOCK(zone);
-	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
+	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
 	wakeup(zone);
 out:
 	ZONE_UNLOCK(zone);
 }
 
-void
+static void
 zone_drain(uma_zone_t zone)
 {
 
-	zone_drain_wait(zone, M_NOWAIT);
+	zone_reclaim(zone, M_NOWAIT, true);
 }
 
+static void
+zone_trim(uma_zone_t zone)
+{
+
+	zone_reclaim(zone, M_NOWAIT, false);
+}
+
 /*
  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
  * If the allocation was successful, the keg lock will be held upon return,
@@ -1756,6 +1801,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
 	uma_zone_t zone = mem;
 	uma_zone_t z;
 	uma_keg_t keg;
+	int i;
 
 	bzero(zone, size);
 	zone->uz_name = arg->name;
@@ -1783,6 +1829,9 @@ zone_ctor(void *mem, int size, void *udata, int flags)
 		zone->uz_fails = EARLY_COUNTER;
 	}
 
+	for (i = 0; i < vm_ndomains; i++)
+		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
+
 	/*
 	 * This is a pure cache zone, no kegs.
 	 */
@@ -1933,7 +1982,7 @@ zone_dtor(void *arg, int size, void *udata)
 	 * released and then refilled before we
 	 * remove it... we dont care for now
 	 */
-	zone_drain_wait(zone, M_WAITOK);
+	zone_reclaim(zone, M_WAITOK, true);
 	/*
 	 * We only destroy kegs from non secondary/non cache zones.
 	 */
@@ -2138,7 +2187,7 @@ uma_startup2(void)
 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
 #endif
 	booted = BOOT_BUCKETS;
-	sx_init(&uma_drain_lock, "umadrain");
+	sx_init(&uma_reclaim_lock, "umareclaim");
 	bucket_enable();
 }
 
@@ -2233,12 +2282,12 @@ uma_zcreate(const char *name, size_t size, uma_ctor ct
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
-		sx_slock(&uma_drain_lock);
+		sx_slock(&uma_reclaim_lock);
 		locked = true;
 	}
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
-		sx_sunlock(&uma_drain_lock);
+		sx_sunlock(&uma_reclaim_lock);
 	return (res);
 }
 
@@ -2267,13 +2316,13 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
-		sx_slock(&uma_drain_lock);
+		sx_slock(&uma_reclaim_lock);
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
-		sx_sunlock(&uma_drain_lock);
+		sx_sunlock(&uma_reclaim_lock);
 	return (res);
 }
 
@@ -2306,9 +2355,9 @@ void
 uma_zdestroy(uma_zone_t zone)
 {
 
-	sx_slock(&uma_drain_lock);
+	sx_slock(&uma_reclaim_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
-	sx_sunlock(&uma_drain_lock);
+	sx_sunlock(&uma_reclaim_lock);
 }
 
 void
@@ -2521,7 +2570,7 @@ zalloc_start:
 		zdom = &zone->uz_domain[0];
 	}
 
-	if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
+	if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 		cache->uc_allocbucket = bucket;
@@ -3672,17 +3721,28 @@ uma_prealloc(uma_zone_t zone, int items)
 }
 
 /* See uma.h */
-static void
-uma_reclaim_locked(bool kmem_danger)
+void
+uma_reclaim(int req)
 {
 
 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
-	sx_assert(&uma_drain_lock, SA_XLOCKED);
+	sx_xlock(&uma_reclaim_lock);
 	bucket_enable();
-	zone_foreach(zone_drain);
-	if (vm_page_count_min() || kmem_danger) {
-		cache_drain_safe(NULL);
+
+	switch (req) {
+	case UMA_RECLAIM_TRIM:
+		zone_foreach(zone_trim);
+		break;
+	case UMA_RECLAIM_DRAIN:
+	case UMA_RECLAIM_DRAIN_CPU:
 		zone_foreach(zone_drain);
+		if (req == UMA_RECLAIM_DRAIN_CPU) {
+			pcpu_cache_drain_safe(NULL);
+			zone_foreach(zone_drain);
+		}
+		break;
+	default:
+		panic("unhandled reclamation request %d", req);
 	}
 
 	/*
@@ -3692,17 +3752,9 @@ uma_reclaim_locked(bool kmem_danger)
 	 */
 	zone_drain(slabzone);
 	bucket_zone_drain();
+	sx_xunlock(&uma_reclaim_lock);
 }
 
-void
-uma_reclaim(void)
-{
-
-	sx_xlock(&uma_drain_lock);
-	uma_reclaim_locked(false);
-	sx_xunlock(&uma_drain_lock);
-}
-
 static volatile int uma_reclaim_needed;
 
 void
@@ -3718,18 +3770,37 @@ uma_reclaim_worker(void *arg __unused)
 {
 
 	for (;;) {
-		sx_xlock(&uma_drain_lock);
+		sx_xlock(&uma_reclaim_lock);
 		while (atomic_load_int(&uma_reclaim_needed) == 0)
-			sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl",
+			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
 			    hz);
-		sx_xunlock(&uma_drain_lock);
+		sx_xunlock(&uma_reclaim_lock);
 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
-		sx_xlock(&uma_drain_lock);
-		uma_reclaim_locked(true);
+		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
 		atomic_store_int(&uma_reclaim_needed, 0);
-		sx_xunlock(&uma_drain_lock);
 		/* Don't fire more than once per-second. */
 		pause("umarclslp", hz);
+	}
+}
+
+/* See uma.h */
+void
+uma_zone_reclaim(uma_zone_t zone, int req)
+{
+
+	switch (req) {
+	case UMA_RECLAIM_TRIM:
+		zone_trim(zone);
+		break;
+	case UMA_RECLAIM_DRAIN:
+		zone_drain(zone);
+		break;
+	case UMA_RECLAIM_DRAIN_CPU:
+		pcpu_cache_drain_safe(zone);
+		zone_drain(zone);
+		break;
+	default:
+		panic("unhandled reclamation request %d", req);
 	}
 }
 

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/vm/uma_int.h	Sun Sep  1 22:22:43 2019	(r351673)
@@ -197,7 +197,7 @@ struct uma_hash {
  */
 
 struct uma_bucket {
-	LIST_ENTRY(uma_bucket)	ub_link;	/* Link into the zone */
+	TAILQ_ENTRY(uma_bucket)	ub_link;	/* Link into the zone */
 	int16_t	ub_cnt;				/* Count of items in bucket. */
 	int16_t	ub_entries;			/* Max items. */
 	void	*ub_bucket[];			/* actual allocation storage */
@@ -306,8 +306,10 @@ struct uma_slab {
 
 typedef struct uma_slab * uma_slab_t;
 
+TAILQ_HEAD(uma_bucketlist, uma_bucket);
+
 struct uma_zone_domain {
-	LIST_HEAD(,uma_bucket)	uzd_buckets;	/* full buckets */
+	struct uma_bucketlist uzd_buckets; /* full buckets */
 	long		uzd_nitems;	/* total item count */
 	long		uzd_imax;	/* maximum item count this period */
 	long		uzd_imin;	/* minimum item count this period */
@@ -384,7 +386,7 @@ struct uma_zone {
  * These flags must not overlap with the UMA_ZONE flags specified in uma.h.
  */
 #define	UMA_ZFLAG_CACHE		0x04000000	/* uma_zcache_create()d it */
-#define	UMA_ZFLAG_DRAINING	0x08000000	/* Running zone_drain. */
+#define	UMA_ZFLAG_RECLAIMING	0x08000000	/* Running zone_reclaim(). */
 #define	UMA_ZFLAG_BUCKET	0x10000000	/* Bucket zone. */
 #define UMA_ZFLAG_INTERNAL	0x20000000	/* No offpage no PCPU. */
 #define UMA_ZFLAG_CACHEONLY	0x80000000	/* Don't ask VM for buckets. */

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Sun Sep  1 21:38:08 2019	(r351672)
+++ head/sys/vm/vm_pageout.c	Sun Sep  1 22:22:43 2019	(r351673)
@@ -1871,9 +1871,12 @@ vm_pageout_lowmem(void)
 
 		/*
 		 * We do this explicitly after the caches have been
-		 * drained above.
+		 * drained above.  If we have a severe page shortage on
+		 * our hands, completely drain all UMA zones.  Otherwise,
+		 * just prune the caches.
 		 */
-		uma_reclaim();
+		uma_reclaim(vm_page_count_min() ? UMA_RECLAIM_DRAIN_CPU :
+		    UMA_RECLAIM_TRIM);
 		return (true);
 	}
 	return (false);