git: e2414d91d33f - main - vfs_subr: maintain sorted tailq
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 22 Oct 2024 21:58:42 UTC
The branch main has been updated by dougm:
URL: https://cgit.FreeBSD.org/src/commit/?id=e2414d91d33f31d6f2c9f49eef7a1553b5798c9e
commit e2414d91d33f31d6f2c9f49eef7a1553b5798c9e
Author: Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2024-10-22 21:54:34 +0000
Commit: Doug Moore <dougm@FreeBSD.org>
CommitDate: 2024-10-22 21:54:34 +0000
vfs_subr: maintain sorted tailq
Pctries are based on unsigned index values. Type daddr_t is
signed. Using daddr_t as an index type for a pctrie works, except that
the pctrie considers negative values greater than nonnegative
ones. Building a sorted tailq of bufs, based on pctrie results, sorts
negative daddr_ts larger than nonnegative ones, and makes code that
depends on the tailq being actually sorted broken.
Write wrappers for the functions that do pctrie operations that depend
on index ordering that fix the order problem, and use them in place of
direct pctrie operations.
PR: 282134
Reported by: pho
Reviewed by: kib, markj
Tested by: pho
Fixes: 2c8caa4b3925aa7335 vfs_subr: optimize inval_buf_range
Differential Revision: https://reviews.freebsd.org/D47200
---
sys/kern/vfs_subr.c | 56 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 44 insertions(+), 12 deletions(-)
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index ff18c50546dd..3b00fdbe93b4 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -537,6 +537,42 @@ buf_trie_free(struct pctrie *ptree, void *node)
PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free,
buf_trie_smr);
+/*
+ * Lookup the next element greater than or equal to lblkno, accounting for the
+ * fact that, for pctries, negative values are greater than nonnegative ones.
+ */
+static struct buf *
+buf_lookup_ge(struct bufv *bv, daddr_t lblkno)
+{
+ struct buf *bp;
+
+ bp = BUF_PCTRIE_LOOKUP_GE(&bv->bv_root, lblkno);
+ if (bp == NULL && lblkno < 0)
+ bp = BUF_PCTRIE_LOOKUP_GE(&bv->bv_root, 0);
+ if (bp != NULL && bp->b_lblkno < lblkno)
+ bp = NULL;
+ return (bp);
+}
+
+/*
+ * Insert bp, and find the next element smaller than bp, accounting for the fact
+ * that, for pctries, negative values are greater than nonnegative ones.
+ */
+static int
+buf_insert_lookup_le(struct bufv *bv, struct buf *bp, struct buf **n)
+{
+ int error;
+
+ error = BUF_PCTRIE_INSERT_LOOKUP_LE(&bv->bv_root, bp, n);
+ if (error != EEXIST) {
+ if (*n == NULL && bp->b_lblkno >= 0)
+ *n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, ~0L);
+ if (*n != NULL && (*n)->b_lblkno >= bp->b_lblkno)
+ *n = NULL;
+ }
+ return (error);
+}
+
/*
* Initialize the vnode management data structures.
*
@@ -2489,9 +2525,8 @@ bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn)
for (lblkno = startn;;) {
again:
- bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno);
- if (bp == NULL || bp->b_lblkno >= endn ||
- bp->b_lblkno < startn)
+ bp = buf_lookup_ge(bufv, lblkno);
+ if (bp == NULL || bp->b_lblkno >= endn)
break;
error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0);
@@ -2628,9 +2663,8 @@ v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo,
clean = true;
do {
bv = clean ? &bo->bo_clean : &bo->bo_dirty;
- bp = BUF_PCTRIE_LOOKUP_GE(&bv->bv_root, startlbn);
- if (bp == NULL || bp->b_lblkno >= endlbn ||
- bp->b_lblkno < startlbn)
+ bp = buf_lookup_ge(bv, startlbn);
+ if (bp == NULL)
continue;
TAILQ_FOREACH_FROM_SAFE(bp, &bv->bv_hd, b_bobufs, nbp) {
if (bp->b_lblkno >= endlbn)
@@ -2709,13 +2743,13 @@ buf_vlist_find_or_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
else
bv = &bo->bo_clean;
- error = BUF_PCTRIE_INSERT_LOOKUP_LE(&bv->bv_root, bp, &n);
+ error = buf_insert_lookup_le(bv, bp, &n);
if (n == NULL) {
KASSERT(error != EEXIST,
("buf_vlist_add: EEXIST but no existing buf found: bp %p",
bp));
} else {
- KASSERT((uint64_t)n->b_lblkno <= (uint64_t)bp->b_lblkno,
+ KASSERT(n->b_lblkno <= bp->b_lblkno,
("buf_vlist_add: out of order insert/lookup: bp %p n %p",
bp, n));
KASSERT((n->b_lblkno == bp->b_lblkno) == (error == EEXIST),
@@ -2728,16 +2762,14 @@ buf_vlist_find_or_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
/* Keep the list ordered. */
if (n == NULL) {
KASSERT(TAILQ_EMPTY(&bv->bv_hd) ||
- (uint64_t)bp->b_lblkno <
- (uint64_t)TAILQ_FIRST(&bv->bv_hd)->b_lblkno,
+ bp->b_lblkno < TAILQ_FIRST(&bv->bv_hd)->b_lblkno,
("buf_vlist_add: queue order: "
"%p should be before first %p",
bp, TAILQ_FIRST(&bv->bv_hd)));
TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
} else {
KASSERT(TAILQ_NEXT(n, b_bobufs) == NULL ||
- (uint64_t)bp->b_lblkno <
- (uint64_t)TAILQ_NEXT(n, b_bobufs)->b_lblkno,
+ bp->b_lblkno < TAILQ_NEXT(n, b_bobufs)->b_lblkno,
("buf_vlist_add: queue order: "
"%p should be before next %p",
bp, TAILQ_NEXT(n, b_bobufs)));