git: d44095394237 - main - vm_domainset: Only probe domains once when iterating, instead of up to 4 times
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 09 Sep 2025 07:58:00 UTC
The branch main has been updated by olce:
URL: https://cgit.FreeBSD.org/src/commit/?id=d440953942372ca275d0743a6e220631bde440ee
commit d440953942372ca275d0743a6e220631bde440ee
Author: Olivier Certner <olce@FreeBSD.org>
AuthorDate: 2025-07-07 20:29:12 +0000
Commit: Olivier Certner <olce@FreeBSD.org>
CommitDate: 2025-09-09 07:56:45 +0000
vm_domainset: Only probe domains once when iterating, instead of up to 4 times
Because of the 'di_minskip' logic, which resets the initial domain, an
iterator starts by considering only domains that have more than
'free_min' pages in a first phase, and then all domains in a second one.
Non-"underpaged" domains are thus examined twice, even if the allocation
can't succeed.
Re-scanning the same domains twice just wastes time, as allocation
attempts that must not wait may rely on failing sooner and those that
must will loop anyway (a domain previously scanned twice has more pages
than 'free_min' and consequently vm_wait_doms() will just return
immediately).
Additionally, the DOMAINSET_POLICY_FIRSTTOUCH policy would aggravate
this situation by reexamining the current domain again at the end of
each phase. In the case of a single domain, this means doubling again
the number of times domain 0 is probed.
Implementation consists in adding two 'domainset_t' to 'struct
vm_domainset_iter' (and removing the 'di_n' counter). The first,
'di_remain_mask', contains domains still to be explored in the current
phase, the first phase concerning only domains with more pages than
'free_min' ('di_minskip' true) and the second one concerning only
domains previously under 'free_min' ('di_minskip' false). The second,
'di_min_mask', holds the domains with less pages than 'free_min'
encountered during the first phase, and serves as the reset value for
'di_remain_mask' when transitioning to the second phase.
PR: 277476
Fixes: e5818a53dbd2 ("Implement several enhancements to NUMA policies.")
Fixes: 23984ce5cd24 ("Avoid resource deadlocks when one domain has exhausted its memory."...)
MFC after: 10 days
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D51249
---
sys/vm/vm_domainset.c | 53 ++++++++++++++++++++++++++++++---------------------
sys/vm/vm_domainset.h | 6 +++++-
2 files changed, 36 insertions(+), 23 deletions(-)
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index b44bdb96b0d4..bd15449559a5 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -131,7 +131,8 @@ static void
vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
{
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
+ KASSERT(!DOMAINSET_EMPTY(&di->di_remain_mask),
+ ("%s: Already iterated on all domains", __func__));
switch (di->di_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
/*
@@ -161,37 +162,39 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
switch (di->di_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
*domain = PCPU_GET(domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) {
- /*
- * Add an extra iteration because we will visit the
- * current domain a second time in the rr iterator.
- */
- di->di_n = di->di_domain->ds_cnt + 1;
+ if (DOMAINSET_ISSET(*domain, &di->di_valid_mask))
break;
- }
/*
* To prevent impossible allocations we convert an invalid
* first-touch to round-robin.
*/
/* FALLTHROUGH */
case DOMAINSET_POLICY_ROUNDROBIN:
- di->di_n = di->di_domain->ds_cnt;
vm_domainset_iter_rr(di, domain);
break;
case DOMAINSET_POLICY_PREFER:
*domain = di->di_domain->ds_prefer;
- di->di_n = di->di_domain->ds_cnt;
break;
case DOMAINSET_POLICY_INTERLEAVE:
vm_domainset_iter_interleave(di, domain);
- di->di_n = di->di_domain->ds_cnt;
break;
default:
panic("%s: Unknown policy %d", __func__, di->di_policy);
}
- KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
KASSERT(*domain < vm_ndomains,
("%s: Invalid domain %d", __func__, *domain));
+
+ /* Initialize the mask of domains to visit. */
+ if (di->di_minskip) {
+ /* Phase 1: Skip domains under 'v_free_min'. */
+ DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask);
+ DOMAINSET_ZERO(&di->di_min_mask);
+ } else
+ /* Phase 2: Browse domains that were under 'v_free_min'. */
+ DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask);
+
+ /* Mark first domain as seen. */
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
}
void
@@ -225,12 +228,15 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask)))
return (ENOMEM);
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
+ /* If there are more domains to visit in this phase, run the iterator. */
+ while (!DOMAINSET_EMPTY(&di->di_remain_mask)) {
vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
+ if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) {
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
+ return (0);
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ }
}
/* If we skipped domains below min restart the search. */
@@ -298,12 +304,15 @@ vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
if (DOMAINSET_EMPTY(&di->di_valid_mask))
return (ENOMEM);
- /* If there are more domains to visit we run the iterator. */
- while (--di->di_n != 0) {
+ /* If there are more domains to visit in this phase, run the iterator. */
+ while (!DOMAINSET_EMPTY(&di->di_remain_mask)) {
vm_domainset_iter_next(di, domain);
- if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
- (!di->di_minskip || !vm_page_count_min_domain(*domain)))
- return (0);
+ if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) {
+ DOMAINSET_CLR(*domain, &di->di_remain_mask);
+ if (!di->di_minskip || !vm_page_count_min_domain(*domain))
+ return (0);
+ DOMAINSET_SET(*domain, &di->di_min_mask);
+ }
}
/* If we skipped domains below min restart the search. */
diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h
index 0d325a642f40..b223a4d03df9 100644
--- a/sys/vm/vm_domainset.h
+++ b/sys/vm/vm_domainset.h
@@ -33,11 +33,15 @@ struct pctrie_iter;
struct vm_domainset_iter {
struct domainset *di_domain;
unsigned int *di_iter;
+ /* Initialized from 'di_domain', initial value after reset. */
domainset_t di_valid_mask;
+ /* Domains to browse in the current phase. */
+ domainset_t di_remain_mask;
+ /* Domains skipped in phase 1 because under 'v_free_min'. */
+ domainset_t di_min_mask;
vm_pindex_t di_offset;
int di_flags;
uint16_t di_policy;
- domainid_t di_n;
bool di_minskip;
};