git: 8b0dafdb2f18 - main - vm: implement vm_page_reclaim_contig_domain_ext()

From: Andrew Gallatin <gallatin_at_FreeBSD.org>
Date: Tue, 09 May 2023 17:11:58 UTC
The branch main has been updated by gallatin:

URL: https://cgit.FreeBSD.org/src/commit/?id=8b0dafdb2f18b9bdc464a4ddbcfd749c3d3875f1

commit 8b0dafdb2f18b9bdc464a4ddbcfd749c3d3875f1
Author:     Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2023-05-08 13:25:40 +0000
Commit:     Andrew Gallatin <gallatin@FreeBSD.org>
CommitDate: 2023-05-09 17:09:34 +0000

    vm: implement vm_page_reclaim_contig_domain_ext()
    
    Implement vm_page_reclaim_contig_domain_ext() to reclaim multiple
    contiguous regions at once.  This makes it more efficient for users
    that need multiple contiguous regions to reclaim those regions
    efficiently.
    
    This is needed because callers like ktls may need to reclaim many
    contiguous regions, and each scan of physical memory can take
    multiple seconds on a large memory machine (order of 100GB of
    RMA).  Rather than modifying the core algorithm, I extended
    vm_page_reclaim_contig_domain() to take a "desired_runs" argument to
    allow the caller to request that it reclaim more than just a single
    run. There is no functional change intended for all existing
    callers.
    
    The first user for this interface is the ktls code
    (https://reviews.freebsd.org/D39421). By reclaiming multiple runs,
    ktls goes from consuming hours of CPU to refill its buffer zone to
    just seconds or minutes.
    
    Differential Revision: https://reviews.freebsd.org/D39739
    Sponsored by:   Netflix
    Reviewed by:    alc, jhb, markj
---
 sys/vm/vm_page.c | 69 +++++++++++++++++++++++++++++++++++++++++++-------------
 sys/vm/vm_page.h |  3 +++
 2 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 90413f235ec0..4b967a94aa1f 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2995,9 +2995,7 @@ unlock:
 
 #define	NRUNS	16
 
-CTASSERT(powerof2(NRUNS));
-
-#define	RUN_INDEX(count)	((count) & (NRUNS - 1))
+#define	RUN_INDEX(count, nruns)	((count) % (nruns))
 
 #define	MIN_RECLAIM	8
 
@@ -3025,19 +3023,42 @@ CTASSERT(powerof2(NRUNS));
  *	must be a power of two.
  */
 bool
-vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
-    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
+vm_page_reclaim_contig_domain_ext(int domain, int req, u_long npages,
+    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+    int desired_runs)
 {
 	struct vm_domain *vmd;
 	vm_paddr_t curr_low;
-	vm_page_t m_run, m_runs[NRUNS];
+	vm_page_t m_run, _m_runs[NRUNS], *m_runs;
 	u_long count, minalign, reclaimed;
-	int error, i, options, req_class;
+	int error, i, min_reclaim, nruns, options, req_class;
+	bool ret;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 
+	ret = false;
+
+	/*
+	 * If the caller wants to reclaim multiple runs, try to allocate
+	 * space to store the runs.  If that fails, fall back to the old
+	 * behavior of just reclaiming MIN_RECLAIM pages.
+	 */
+	if (desired_runs > 1)
+		m_runs = malloc((NRUNS + desired_runs) * sizeof(*m_runs),
+		    M_TEMP, M_NOWAIT);
+	else
+		m_runs = NULL;
+
+	if (m_runs == NULL) {
+		m_runs = _m_runs;
+		nruns = NRUNS;
+	} else {
+		nruns = NRUNS + desired_runs - 1;
+	}
+	min_reclaim = MAX(desired_runs * npages, MIN_RECLAIM);
+
 	/*
 	 * The caller will attempt an allocation after some runs have been
 	 * reclaimed and added to the vm_phys buddy lists.  Due to limitations
@@ -3066,7 +3087,7 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
 	if (count < npages + vmd->vmd_free_reserved || (count < npages +
 	    vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
 	    (count < npages && req_class == VM_ALLOC_INTERRUPT))
-		return (false);
+		goto done;
 
 	/*
 	 * Scan up to three times, relaxing the restrictions ("options") on
@@ -3085,27 +3106,29 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
 			if (m_run == NULL)
 				break;
 			curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages);
-			m_runs[RUN_INDEX(count)] = m_run;
+			m_runs[RUN_INDEX(count, nruns)] = m_run;
 			count++;
 		}
 
 		/*
 		 * Reclaim the highest runs in LIFO (descending) order until
 		 * the number of reclaimed pages, "reclaimed", is at least
-		 * MIN_RECLAIM.  Reset "reclaimed" each time because each
+		 * "min_reclaim".  Reset "reclaimed" each time because each
 		 * reclamation is idempotent, and runs will (likely) recur
 		 * from one scan to the next as restrictions are relaxed.
 		 */
 		reclaimed = 0;
-		for (i = 0; count > 0 && i < NRUNS; i++) {
+		for (i = 0; count > 0 && i < nruns; i++) {
 			count--;
-			m_run = m_runs[RUN_INDEX(count)];
+			m_run = m_runs[RUN_INDEX(count, nruns)];
 			error = vm_page_reclaim_run(req_class, domain, npages,
 			    m_run, high);
 			if (error == 0) {
 				reclaimed += npages;
-				if (reclaimed >= MIN_RECLAIM)
-					return (true);
+				if (reclaimed >= min_reclaim) {
+					ret = true;
+					goto done;
+				}
 			}
 		}
 
@@ -3117,9 +3140,23 @@ vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
 			options = VPSC_NOSUPER;
 		else if (options == VPSC_NOSUPER)
 			options = VPSC_ANY;
-		else if (options == VPSC_ANY)
-			return (reclaimed != 0);
+		else if (options == VPSC_ANY) {
+			ret = reclaimed != 0;
+			goto done;
+		}
 	}
+done:
+	if (m_runs != _m_runs)
+		free(m_runs, M_TEMP);
+	return (ret);
+}
+
+bool
+vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
+    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
+{
+	return (vm_page_reclaim_contig_domain_ext(domain, req, npages, low, high,
+	    alignment, boundary, 1));
 }
 
 bool
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 9563f4ac714c..824a853fb0f7 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -668,6 +668,9 @@ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
+bool vm_page_reclaim_contig_domain_ext(int domain, int req, u_long npages,
+    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+    int desired_runs);
 void vm_page_reference(vm_page_t m);
 #define	VPR_TRYFREE	0x01
 #define	VPR_NOREUSE	0x02