git: 0dc98b57f321 - main - getblk: track "non-sterile" bufobj to avoid bo lock on miss if sterile

From: Ryan Libby <rlibby_at_FreeBSD.org>
Date: Sun, 16 Jun 2024 21:18:33 UTC
The branch main has been updated by rlibby:

URL: https://cgit.FreeBSD.org/src/commit/?id=0dc98b57f321945b24a160282eb402bf0a6e4220

commit 0dc98b57f321945b24a160282eb402bf0a6e4220
Author:     Ryan Libby <rlibby@FreeBSD.org>
AuthorDate: 2024-06-16 21:09:45 +0000
Commit:     Ryan Libby <rlibby@FreeBSD.org>
CommitDate: 2024-06-16 21:09:45 +0000

    getblk: track "non-sterile" bufobj to avoid bo lock on miss if sterile
    
    This is a scheme to avoid taking the bufobj lock and doing a second
    lookup in the case where in getblk we do an unlocked lookup and find no
    buf.  Was there really no buf, or were we in the middle of a reassignbuf
    race?  By tracking any use of reassignbuf with a flag, we can know if
    there can't have been a race because there has been no reassignbuf.
    Because this scheme is spoiled on the first use of reassignbuf, it is
    mostly only beneficial for cases where a certain vnode is never expected
    to use dirty bufs at all.
    
    Reviewed by:    kib
    Sponsored by:   Dell EMC Isilon
    Differential Revision:  https://reviews.freebsd.org/D45571
---
 sys/kern/vfs_bio.c  |  8 +++++++-
 sys/kern/vfs_subr.c | 10 ++++++++++
 sys/sys/bufobj.h    |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 4f1df9711dec..7bcc61c27109 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -4002,9 +4002,15 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag,
 		/*
 		 * With GB_NOCREAT we must be sure about not finding the buffer
 		 * as it may have been reassigned during unlocked lookup.
+		 * If BO_NONSTERILE is still unset, no reassign has occurred.
 		 */
-		if ((flags & GB_NOCREAT) != 0)
+		if ((flags & GB_NOCREAT) != 0) {
+			/* Ensure bo_flag is loaded after gbincore_unlocked. */
+			atomic_thread_fence_acq();
+			if ((bo->bo_flag & BO_NONSTERILE) == 0)
+				return (EEXIST);
 			goto loop;
+		}
 		goto newbuf_unlocked;
 	}
 
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 398eda7ed897..f8f4c2a868f3 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -3207,6 +3207,16 @@ reassignbuf(struct buf *bp)
 	    bp, bp->b_vp, bp->b_flags);
 
 	BO_LOCK(bo);
+	if ((bo->bo_flag & BO_NONSTERILE) == 0) {
+		/*
+		 * Coordinate with getblk's unlocked lookup.  Make
+		 * BO_NONSTERILE visible before the first reassignbuf produces
+		 * any side effect.  This could be outside the bo lock if we
+		 * used a separate atomic flag field.
+		 */
+		bo->bo_flag |= BO_NONSTERILE;
+		atomic_thread_fence_rel();
+	}
 	buf_vlist_remove(bp);
 
 	/*
diff --git a/sys/sys/bufobj.h b/sys/sys/bufobj.h
index a6a9c5c2bf01..1c8fdcebe4d4 100644
--- a/sys/sys/bufobj.h
+++ b/sys/sys/bufobj.h
@@ -116,6 +116,7 @@ struct bufobj {
 #define	BO_WWAIT	(1 << 1)	/* Wait for output to complete */
 #define	BO_DEAD		(1 << 2)	/* Dead; only with INVARIANTS */
 #define	BO_NOBUFS	(1 << 3)	/* No bufs allowed */
+#define	BO_NONSTERILE	(1 << 4)	/* Ever called reassignbuf() */
 
 #define	BO_LOCKPTR(bo)		(&(bo)->bo_lock)
 #define	BO_LOCK(bo)		rw_wlock(BO_LOCKPTR((bo)))