git: 6c406b5b9312 - main - exterror(9): add infra for bufs and bios

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 04 Nov 2025 04:11:57 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=6c406b5b93125d030f0e63716ff389ce1a6ec4c5

commit 6c406b5b93125d030f0e63716ff389ce1a6ec4c5
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-10-25 09:18:28 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-11-04 04:11:12 +0000

    exterror(9): add infra for bufs and bios
    
    The extended error can be stored in either struct bio or struct buf,
    indicated by BIO_EXTERR bio_flag.  At some strategic places, it is
    copied into the current thread extended error.
    
    This structure is required because io request from the top might pass
    down through several io threads and the context that can report meaningful
    extended error does not belong to the thread that initiated the io.
    
    Sizes before the change, on amd64 nodebug:
    sizeof(struct buf) =  456
    sizeof(struct bio) = 376
    
    after:
    sizeof(struct buf) =  496
    sizeof(struct bio) = 408
    
    WIP: more geom providers should handle BIO_EXTERR when passing cloned
    bios down and then handling completions.
    
    Reviewed by:    mckusick
    Sponsored by:   The FreeBSD Foundation
    Differential revision:  https://reviews.freebsd.org/D53351
---
 sys/geom/geom_dev.c  |  4 ++++
 sys/geom/geom_disk.c | 10 ++++++++--
 sys/geom/geom_subr.c | 10 ++++++++--
 sys/geom/geom_vfs.c  |  7 ++++++-
 sys/kern/vfs_bio.c   | 15 +++++++++++----
 sys/sys/bio.h        |  7 ++++++-
 sys/sys/buf.h        | 10 +++++++++-
 sys/sys/exterr_cat.h |  2 ++
 8 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
index db0bc77a752f..a723d06334a0 100644
--- a/sys/geom/geom_dev.c
+++ b/sys/geom/geom_dev.c
@@ -734,6 +734,10 @@ g_dev_done(struct bio *bp2)
 		g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
 		    bp2, bp2->bio_error);
 		bp->bio_flags |= BIO_ERROR;
+		if ((bp2->bio_flags & BIO_EXTERR) != 0) {
+			bp->bio_flags |= BIO_EXTERR;
+			bp->bio_exterr = bp2->bio_exterr;
+		}
 	} else {
 		if (bp->bio_cmd == BIO_READ)
 			KNOTE_UNLOCKED(&sc->sc_selinfo.si_note, NOTE_READ);
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
index 9dbf00371dba..b267130d1e0c 100644
--- a/sys/geom/geom_disk.c
+++ b/sys/geom/geom_disk.c
@@ -235,8 +235,14 @@ g_disk_done(struct bio *bp)
 	bp2 = bp->bio_parent;
 	binuptime(&now);
 	mtx_lock(&sc->done_mtx);
-	if (bp2->bio_error == 0)
-		bp2->bio_error = bp->bio_error;
+	if (bp2->bio_error == 0) {
+		if ((bp->bio_flags & BIO_EXTERR) != 0) {
+			bp2->bio_flags |= BIO_EXTERR;
+			bp2->bio_exterr = bp->bio_exterr;
+		} else {
+			bp2->bio_error = bp->bio_error;
+		}
+	}
 	bp2->bio_completed += bp->bio_length - bp->bio_resid;
 
 	if (bp->bio_cmd == BIO_READ)
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index 2a6ce1ab6486..c70d55c6c321 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -1162,8 +1162,14 @@ g_std_done(struct bio *bp)
 	struct bio *bp2;
 
 	bp2 = bp->bio_parent;
-	if (bp2->bio_error == 0)
-		bp2->bio_error = bp->bio_error;
+	if (bp2->bio_error == 0) {
+		if ((bp->bio_flags & BIO_EXTERR) != 0) {
+			bp2->bio_flags |= BIO_EXTERR;
+			bp2->bio_exterr = bp->bio_exterr;
+		} else {
+			bp2->bio_error = bp->bio_error;
+		}
+	}
 	bp2->bio_completed += bp->bio_completed;
 	g_destroy_bio(bp);
 	bp2->bio_inbed++;
diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c
index 9b5e5a84191f..f074ac43d245 100644
--- a/sys/geom/geom_vfs.c
+++ b/sys/geom/geom_vfs.c
@@ -26,9 +26,11 @@
  * SUCH DAMAGE.
  */
 
+#define	EXTERR_CATEGORY	EXTERR_CAT_GEOMVFS
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
+#include <sys/exterrvar.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -156,10 +158,13 @@ g_vfs_done(struct bio *bip)
 			    " suppressing further ENXIO");
 		}
 	}
-	bp->b_error = bip->bio_error;
 	bp->b_ioflags = bip->bio_flags;
 	if (bip->bio_error)
 		bp->b_ioflags |= BIO_ERROR;
+	if ((bp->b_ioflags & BIO_EXTERR) != 0)
+		bp->b_exterr = bip->bio_exterr;
+	else
+		bp->b_error = bip->bio_error;
 	bp->b_resid = bp->b_bcount - bip->bio_completed;
 	g_destroy_bio(bip);
 
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 19c39e42bafa..22b7fe8d059a 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -44,6 +44,7 @@
  * see man buf(9) for more info.
  */
 
+#define	EXTERR_CATEGORY	EXTERR_CAT_VFSBIO
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/asan.h>
@@ -55,6 +56,7 @@
 #include <sys/counter.h>
 #include <sys/devicestat.h>
 #include <sys/eventhandler.h>
+#include <sys/exterrvar.h>
 #include <sys/fail.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
@@ -1775,7 +1777,6 @@ buf_alloc(struct bufdomain *bd)
 	bp->b_blkno = bp->b_lblkno = 0;
 	bp->b_offset = NOOFFSET;
 	bp->b_iodone = 0;
-	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
 	bp->b_npages = 0;
@@ -1785,6 +1786,7 @@ buf_alloc(struct bufdomain *bd)
 	bp->b_fsprivate1 = NULL;
 	bp->b_fsprivate2 = NULL;
 	bp->b_fsprivate3 = NULL;
+	exterr_clear(&bp->b_exterr);
 	LIST_INIT(&bp->b_dep);
 
 	return (bp);
@@ -2276,7 +2278,7 @@ breadn_flags(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size,
 		}
 		if ((flags & GB_CVTENXIO) != 0)
 			bp->b_xflags |= BX_CVTENXIO;
-		bp->b_ioflags &= ~BIO_ERROR;
+		bp->b_ioflags &= ~(BIO_ERROR | BIO_EXTERR);
 		if (bp->b_rcred == NOCRED && cred != NOCRED)
 			bp->b_rcred = crhold(cred);
 		vfs_busy_pages(bp, 0);
@@ -2353,7 +2355,7 @@ bufwrite(struct buf *bp)
 	bundirty(bp);
 
 	bp->b_flags &= ~B_DONE;
-	bp->b_ioflags &= ~BIO_ERROR;
+	bp->b_ioflags &= ~(BIO_ERROR | BIO_EXTERR);
 	bp->b_flags |= B_CACHE;
 	bp->b_iocmd = BIO_WRITE;
 
@@ -4520,8 +4522,11 @@ biowait(struct bio *bp, const char *wmesg)
 	while ((bp->bio_flags & BIO_DONE) == 0)
 		msleep(bp, mtxp, PRIBIO, wmesg, 0);
 	mtx_unlock(mtxp);
-	if (bp->bio_error != 0)
+	if (bp->bio_error != 0) {
+		if ((bp->bio_flags & BIO_EXTERR) != 0)
+			return (exterr_set_from(&bp->bio_exterr));
 		return (bp->bio_error);
+	}
 	if (!(bp->bio_flags & BIO_ERROR))
 		return (0);
 	return (EIO);
@@ -4568,6 +4573,8 @@ bufwait(struct buf *bp)
 		return (EINTR);
 	}
 	if (bp->b_ioflags & BIO_ERROR) {
+		if ((bp->b_ioflags & BIO_EXTERR) != 0)
+			exterr_set_from(&bp->b_exterr);
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 74d2b03bd180..fa7f19961ebd 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -37,6 +37,7 @@
 #ifndef _SYS_BIO_H_
 #define	_SYS_BIO_H_
 
+#include <sys/_exterr.h>
 #include <sys/queue.h>
 #include <sys/disk_zone.h>
 
@@ -65,6 +66,7 @@
 #define	BIO_TRANSIENT_MAPPING	0x20
 #define	BIO_VLIST	0x40
 #define	BIO_SWAP	0x200	/* Swap-related I/O */
+#define	BIO_EXTERR	0x2000
 #define BIO_SPEEDUP_WRITE	0x4000	/* Resource shortage at upper layers */
 #define BIO_SPEEDUP_TRIM	0x8000	/* Resource shortage at upper layers */
 
@@ -94,7 +96,6 @@ struct bio {
 	struct vm_page **bio_ma;	/* Or unmapped. */
 	int	bio_ma_offset;		/* Offset in the first page of bio_ma. */
 	int	bio_ma_n;		/* Number of pages in bio_ma. */
-	int	bio_error;		/* Errno for BIO_ERROR. */
 	long	bio_resid;		/* Remaining I/O in bytes. */
 	void	(*bio_done)(struct bio *);
 	void	*bio_driver1;		/* Private use by the provider. */
@@ -130,8 +131,12 @@ struct bio {
 
 	/* XXX: these go away when bio chaining is introduced */
 	daddr_t bio_pblkno;               /* physical block number */
+	struct kexterr bio_exterr;
 };
 
+/* Errno for BIO_ERROR. */
+#define	bio_error	bio_exterr.error
+
 struct uio;
 struct devstat;
 
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 064d5cb05214..f08f05e6d50f 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -37,6 +37,7 @@
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
+#include <sys/_exterr.h>
 #include <sys/bufobj.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
@@ -98,7 +99,6 @@ struct buf {
 	long		b_bcount;
 	void		*b_caller1;
 	caddr_t		b_data;
-	int		b_error;
 	uint16_t	b_iocmd;	/* BIO_* bio_cmd from bio.h */
 	uint16_t	b_ioflags;	/* BIO_* bio_flags from bio.h */
 	off_t		b_iooffset;
@@ -153,10 +153,12 @@ struct buf {
 #elif defined(BUF_TRACKING)
 	const char	*b_io_tracking;
 #endif
+	struct	kexterr b_exterr;
 	struct	vm_page *b_pages[];
 };
 
 #define b_object	b_bufobj->bo_object
+#define	b_error		b_exterr.error
 
 /*
  * These flags are kept in b_flags.
@@ -390,6 +392,12 @@ struct buf {
 	_lockmgr_disown(&(bp)->b_lock, LOCK_FILE, LOCK_LINE)
 #endif
 
+#define	BUF_EXTERR_FROM_CURTHR(bp)					\
+	bp->b_exterr = curthread->td_kexterr
+
+#define	BUF_EXTERR_TO_CURTHR(bp)					\
+	curthread->td_kexterr = bp->b_exterr
+
 #endif /* _KERNEL */
 
 struct buf_queue_head {
diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h
index 43f31e1d5dd6..34a4b9f86694 100644
--- a/sys/sys/exterr_cat.h
+++ b/sys/sys/exterr_cat.h
@@ -21,6 +21,8 @@
 #define	EXTERR_CAT_BRIDGE	7
 #define	EXTERR_CAT_SWAP		8
 #define	EXTERR_CAT_VFSSYSCALL	9
+#define	EXTERR_CAT_VFSBIO	10
+#define	EXTERR_CAT_GEOMVFS	11
 
 #endif