git: 8deb5f2f6415 - stable/13 - Add a VM flag to prevent reclaim on a failed contig allocation

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 03 Nov 2021 17:53:58 UTC
The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=8deb5f2f641581fc547710e9db04e5d784f374b9

commit 8deb5f2f641581fc547710e9db04e5d784f374b9
Author:     Ryan Stone <rstone@FreeBSD.org>
AuthorDate: 2021-01-29 21:13:57 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-11-03 17:35:16 +0000

    Add a VM flag to prevent reclaim on a failed contig allocation
    
    If a M_WAITOK contig alloc fails, the VM subsystem will try to
    reclaim contiguous memory twice before actually failing the
    request.  On a system with 64GB of RAM I've observed this take
    400-500ms before it finally gives up, and I believe that this
    will only be worse on systems with even more memory.
    
    In certain contexts this delay is extremely harmful, so add a flag
    that will skip reclaim for allocation requests to allow those
    paths to opt-out of doing an expensive reclaim.
    
    Sponsored by: Dell Inc
    Differential Revision:  https://reviews.freebsd.org/D28422
    Reviewed by: markj, kib
    
    (cherry picked from commit 660344ca44c63bfe4a16c3e57d0f6dbcbb5e083e)
---
 sys/sys/malloc.h | 1 +
 sys/vm/vm_kern.c | 9 +++++++--
 sys/vm/vm_page.h | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index 93ec81c252ff..3d88460a751e 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -54,6 +54,7 @@
  */
 #define	M_NOWAIT	0x0001		/* do not block */
 #define	M_WAITOK	0x0002		/* ok to block */
+#define	M_NORECLAIM	0x0080		/* do not reclaim after failure */
 #define	M_ZERO		0x0100		/* bzero the allocation */
 #define	M_NOVM		0x0200		/* don't ask VM for pages */
 #define	M_USE_RESERVE	0x0400		/* can alloc out of reserve memory */
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index f25784857440..7ab1fdb8950e 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -178,17 +178,22 @@ kmem_alloc_contig_pages(vm_object_t object, vm_pindex_t pindex, int domain,
 {
 	vm_page_t m;
 	int tries;
-	bool wait;
+	bool wait, reclaim;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
+	/* Disallow an invalid combination of flags. */
+	MPASS((pflags & (VM_ALLOC_WAITOK | VM_ALLOC_NORECLAIM)) !=
+	    (VM_ALLOC_WAITOK | VM_ALLOC_NORECLAIM));
+
 	wait = (pflags & VM_ALLOC_WAITOK) != 0;
+	reclaim = (pflags & VM_ALLOC_NORECLAIM) == 0;
 	pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
 	pflags |= VM_ALLOC_NOWAIT;
 	for (tries = wait ? 3 : 1;; tries--) {
 		m = vm_page_alloc_contig_domain(object, pindex, domain, pflags,
 		    npages, low, high, alignment, boundary, memattr);
-		if (m != NULL || tries == 0)
+		if (m != NULL || tries == 0 || !reclaim)
 			break;
 
 		VM_OBJECT_WUNLOCK(object);
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index d897a088244f..6e0a4328e260 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -541,6 +541,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 #define	VM_ALLOC_WAITFAIL	0x0010	/* (acf) Sleep and return error */
 #define	VM_ALLOC_WIRED		0x0020	/* (acfgp) Allocate a wired page */
 #define	VM_ALLOC_ZERO		0x0040	/* (acfgp) Allocate a prezeroed page */
+#define	VM_ALLOC_NORECLAIM	0x0080	/* (c) Do not reclaim after failure */
 #define	VM_ALLOC_NOOBJ		0x0100	/* (acg) No associated object */
 #define	VM_ALLOC_NOBUSY		0x0200	/* (acgp) Do not excl busy the page */
 #define	VM_ALLOC_NOCREAT	0x0400	/* (gp) Don't create a page */
@@ -570,6 +571,8 @@ malloc2vm_flags(int malloc_flags)
 		pflags |= VM_ALLOC_NOWAIT;
 	if ((malloc_flags & M_WAITOK))
 		pflags |= VM_ALLOC_WAITOK;
+	if ((malloc_flags & M_NORECLAIM))
+		pflags |= VM_ALLOC_NORECLAIM;
 	return (pflags);
 }
 #endif