git: 8f58b693814e - main - vm_grab_pages_unlocked: read all the pages at once
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 29 Apr 2025 06:33:46 UTC
The branch main has been updated by dougm:
URL: https://cgit.FreeBSD.org/src/commit/?id=8f58b693814e06afc51eade99a8c38e4abe9a804
commit 8f58b693814e06afc51eade99a8c38e4abe9a804
Author: Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2025-04-29 06:28:32 +0000
Commit: Doug Moore <dougm@FreeBSD.org>
CommitDate: 2025-04-29 06:28:32 +0000
vm_grab_pages_unlocked: read all the pages at once
Define a function pctrie_lookup_range_unlocked, which looks up
consecutive elements in the pctrie, until a count limit is reached or
an item is discovered missing, and writes them into the first elements
of an array, returning the count of items found. It does not require
a lock. It uses an iterator, strictly between smr_enter and smr_exit,
when some of the nodes in the pctrie on entry may come to have only
one child, but none of them can be freed before exit.
Define a vm_radix interface to it for reading page ranges.
Change vm_page_grab_pages_unlocked to read all requested pages at
once, without relying on the existence of a linked list of pages.
Drop smr arguments from pctrie_iter_stride, since there's no smr
version in use. Drop _pctrie_iter_lookup, since it isn't needed.
Reviewed by: alc, kib, markj
Differential Revision: https://reviews.freebsd.org/D47114
---
sys/kern/subr_pctrie.c | 83 +++++++++++++++++++++++++++++++-------------------
sys/sys/pctrie.h | 26 ++++++++++++++--
sys/vm/vm_page.c | 24 ++++-----------
sys/vm/vm_radix.h | 12 ++++++++
4 files changed, 93 insertions(+), 52 deletions(-)
diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index 03cf3e1e5990..feb29cf82330 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -494,7 +494,7 @@ _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node,
* search for a value matching 'index'.
*/
while (node != NULL) {
- KASSERT(!powerof2(node->pn_popmap),
+ KASSERT(access == PCTRIE_SMR || !powerof2(node->pn_popmap),
("%s: freed node in iter path", __func__));
if (!pctrie_keybarr(node, index, &slot))
break;
@@ -520,29 +520,20 @@ _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node,
}
/*
- * Returns the value stored at a given index value, possibly NULL.
+ * Returns the value stored at a given index value, possibly NULL, assuming
+ * access is externally synchronized by a lock.
*/
-static __always_inline uint64_t *
-_pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
- enum pctrie_access access)
+uint64_t *
+pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
{
struct pctrie_node *node;
it->index = index;
node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node,
- smr, access);
+ NULL, PCTRIE_LOCKED);
return (pctrie_match_value(node, index));
}
-/*
- * Returns the value stored at a given index value, possibly NULL.
- */
-uint64_t *
-pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
-{
- return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
-}
-
/*
* Insert the val in the trie, starting search with iterator. Return a pointer
* to indicate where a new node must be allocated to complete insertion.
@@ -581,9 +572,8 @@ pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
* Returns the value stored at a fixed offset from the current index value,
* possibly NULL.
*/
-static __always_inline uint64_t *
-_pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
- enum pctrie_access access)
+uint64_t *
+pctrie_iter_stride(struct pctrie_iter *it, int stride)
{
uint64_t index = it->index + stride;
@@ -594,17 +584,7 @@ _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
if ((index < it->limit) != (it->index < it->limit))
return (NULL);
- return (_pctrie_iter_lookup(it, index, smr, access));
-}
-
-/*
- * Returns the value stored at a fixed offset from the current index value,
- * possibly NULL.
- */
-uint64_t *
-pctrie_iter_stride(struct pctrie_iter *it, int stride)
-{
- return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
+ return (pctrie_iter_lookup(it, index));
}
/*
@@ -614,7 +594,7 @@ pctrie_iter_stride(struct pctrie_iter *it, int stride)
uint64_t *
pctrie_iter_next(struct pctrie_iter *it)
{
- return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
+ return (pctrie_iter_stride(it, 1));
}
/*
@@ -624,7 +604,48 @@ pctrie_iter_next(struct pctrie_iter *it)
uint64_t *
pctrie_iter_prev(struct pctrie_iter *it)
{
- return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
+ return (pctrie_iter_stride(it, -1));
+}
+
+/*
+ * Returns the number of contiguous, non-NULL entries read into the value[]
+ * array, without requiring an external lock. These entries *may* never have
+ * been in the pctrie all at one time, but for a series of times t0, t1, t2,
+ * ..., with ti <= t(i+1), value[i] was in the trie at time ti.
+ */
+int
+pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index,
+ uint64_t *value[], int count, smr_t smr)
+{
+ struct pctrie_node *parent, *node;
+ int base, end, i;
+
+ parent = NULL;
+ smr_enter(smr);
+ for (i = 0; i < count;) {
+ node = _pctrie_lookup_node(ptree, parent, index + i, &parent,
+ smr, PCTRIE_SMR);
+ value[i] = pctrie_match_value(node, index + i);
+ if (value[i] == NULL)
+ break;
+ ++i;
+ base = (index + i) % PCTRIE_COUNT;
+ if (base == 0 || parent == NULL || parent->pn_clev != 0)
+ continue;
+ end = MIN(count, i + PCTRIE_COUNT - base);
+ while (i < end) {
+ node = pctrie_node_load(&parent->pn_child[base++],
+ smr, PCTRIE_SMR);
+ value[i] = pctrie_toval(node);
+ if (value[i] == NULL)
+ break;
+ ++i;
+ }
+ if (i < end)
+ break;
+ }
+ smr_exit(smr);
+ return (i);
}
/*
diff --git a/sys/sys/pctrie.h b/sys/sys/pctrie.h
index dac18b58498b..01d9f34f11b9 100644
--- a/sys/sys/pctrie.h
+++ b/sys/sys/pctrie.h
@@ -83,6 +83,19 @@ name##_PCTRIE_LOOKUP_UNLOCKED(struct pctrie *ptree, uint64_t key) \
return name##_PCTRIE_VAL2PTR(pctrie_lookup_unlocked(ptree, \
key, (smr))); \
} \
+ \
+static __inline __unused int \
+name##_PCTRIE_LOOKUP_RANGE_UNLOCKED(struct pctrie *ptree, uint64_t key, \
+ struct type *value[], int count) \
+{ \
+ uint64_t **data = (uint64_t **)value; \
+ \
+ count = pctrie_lookup_range_unlocked(ptree, key, data, count, \
+ (smr)); \
+ for (int i = 0; i < count; i++) \
+ value[i] = name##_PCTRIE_NZVAL2PTR(data[i]); \
+ return (count); \
+} \
#define PCTRIE_DEFINE(name, type, field, allocfn, freefn) \
\
@@ -94,13 +107,18 @@ CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \
CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \
\
static __inline struct type * \
-name##_PCTRIE_VAL2PTR(uint64_t *val) \
+name##_PCTRIE_NZVAL2PTR(uint64_t *val) \
{ \
+ return (struct type *) \
+ ((uintptr_t)val - __offsetof(struct type, field)); \
+} \
\
+static __inline struct type * \
+name##_PCTRIE_VAL2PTR(uint64_t *val) \
+{ \
if (val == NULL) \
return (NULL); \
- return (struct type *) \
- ((uintptr_t)val - __offsetof(struct type, field)); \
+ return (name##_PCTRIE_NZVAL2PTR(val)); \
} \
\
static __inline uint64_t * \
@@ -365,6 +383,8 @@ void pctrie_insert_node(uint64_t *val, struct pctrie_node *parent,
uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key,
smr_t smr);
+int pctrie_lookup_range_unlocked(struct pctrie *ptree,
+ uint64_t index, uint64_t *value[], int count, smr_t smr);
uint64_t *pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index);
uint64_t *pctrie_iter_stride(struct pctrie_iter *it, int stride);
uint64_t *pctrie_iter_next(struct pctrie_iter *it);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index e2f11eb57100..8dce6feaca09 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -5269,7 +5269,7 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex,
{
vm_page_t m;
int flags;
- int i;
+ int i, num_fetched;
KASSERT(count > 0,
("vm_page_grab_pages_unlocked: invalid page count %d", count));
@@ -5281,22 +5281,10 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex,
*/
flags = allocflags & ~VM_ALLOC_NOBUSY;
vm_page_grab_check(flags);
- m = NULL;
- for (i = 0; i < count; i++, pindex++) {
- /*
- * We may see a false NULL here because the previous page has
- * been removed or just inserted and the list is loaded without
- * barriers. Switch to radix to verify.
- */
- if (m == NULL || QMD_IS_TRASHED(m) || m->pindex != pindex ||
- atomic_load_ptr(&m->object) != object) {
- /*
- * This guarantees the result is instantaneously
- * correct.
- */
- m = NULL;
- }
- m = vm_page_acquire_unlocked(object, pindex, m, flags);
+ num_fetched = vm_radix_lookup_range_unlocked(&object->rtree, pindex,
+ ma, count);
+ for (i = 0; i < num_fetched; i++, pindex++) {
+ m = vm_page_acquire_unlocked(object, pindex, ma[i], flags);
if (m == PAGE_NOT_ACQUIRED)
return (i);
if (m == NULL)
@@ -5308,8 +5296,8 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex,
}
/* m will still be wired or busy according to flags. */
vm_page_grab_release(m, allocflags);
+ /* vm_page_acquire_unlocked() may not return ma[i]. */
ma[i] = m;
- m = TAILQ_NEXT(m, listq);
}
if (i == count || (allocflags & VM_ALLOC_NOCREAT) != 0)
return (i);
diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h
index df3639091d58..231075d32754 100644
--- a/sys/vm/vm_radix.h
+++ b/sys/vm/vm_radix.h
@@ -121,6 +121,18 @@ vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
return (VM_RADIX_PCTRIE_LOOKUP_UNLOCKED(&rtree->rt_trie, index));
}
+/*
+ * Returns the number of contiguous, non-NULL pages read into the ma[]
+ * array, without requiring an external lock.
+ */
+static __inline int
+vm_radix_lookup_range_unlocked(struct vm_radix *rtree, vm_pindex_t index,
+ vm_page_t ma[], int count)
+{
+ return (VM_RADIX_PCTRIE_LOOKUP_RANGE_UNLOCKED(&rtree->rt_trie, index,
+ ma, count));
+}
+
/*
* Initialize an iterator for vm_radix.
*/