git: 792ad4676ba9 - stable/14 - zfsd: Use vdev prop values for fault/degrade thresholds

From: Alan Somers <asomers_at_FreeBSD.org>
Date: Thu, 09 May 2024 20:19:57 UTC
The branch stable/14 has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=792ad4676ba9ff95ea30e2a74590a65b723cac1d

commit 792ad4676ba9ff95ea30e2a74590a65b723cac1d
Author:     Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2024-03-05 17:55:55 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2024-05-09 17:47:00 +0000

    zfsd: Use vdev prop values for fault/degrade thresholds
    
    ZED uses vdev props for setting disk fault/degrade thresholds, this
    patch enables zfsd to use the same vdev props for these same tasks.
    
    OpenZFS on Linux is using vdev props for ZED disk fault/degrade
    thresholds. Originally the thresholds supported were for io and checksum
    events and recently this was updated to process slow io events as
    well, see
    https://github.com/openzfs/zfs/commit/cbe882298e4ddc3917dfaf239eca475fe06d62d4
    
    This patch enables us to use the same vdev props in zfsd as ZED uses.
    After this patch is merged both OSs will use the same vdev props to set
    retirement thresholds.
    
    It's probably important to note that the threshold defaults are
    different between OS. I've kept the existing defaults inside zfsd and
    DID NOT match them to what ZED does.
    
    Differential Revision: https://reviews.freebsd.org/D44043
    Relnotes:       yes
    Reviewed by:    asomers, allanjude
    Sponsored by:   Axcient
    Submitted by:   Alek Pinchuk <apinchuk@axcient.com>
    
    (cherry picked from commit 89f4f91dbfdcabe65bc7476bc5f13dfb837870fe)
    
    zfsd: fix unit tests after 89f4f91dbfdcabe65bc7476bc5f13dfb837870fe
    
    Reported by:    markj
    Sponsored by:   Axcient
    Reviewed by:    Alek Pinchuk <pinchuk.alek@gmail.com>
    Differential Revision: https://reviews.freebsd.org/D44744
    
    (cherry picked from commit 25038e8de6b4e5f2ffca821565b50a633eea499a)
---
 cddl/usr.sbin/zfsd/case_file.cc           | 98 ++++++++++++++++++++++++-------
 cddl/usr.sbin/zfsd/case_file.h            | 22 ++++---
 cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc |  3 +
 cddl/usr.sbin/zfsd/vdev.h                 |  2 +-
 cddl/usr.sbin/zfsd/zfsd.8                 | 58 ++++++++++++++----
 5 files changed, 139 insertions(+), 44 deletions(-)

diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc
index 39f89fbbf7c8..f9fd84da7277 100644
--- a/cddl/usr.sbin/zfsd/case_file.cc
+++ b/cddl/usr.sbin/zfsd/case_file.cc
@@ -53,6 +53,7 @@
 #include <syslog.h>
 #include <unistd.h>
 
+#include <libzutil.h>
 #include <libzfs.h>
 
 #include <list>
@@ -91,7 +92,6 @@ using DevdCtl::ParseException;
 
 CaseFileList  CaseFile::s_activeCases;
 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
-const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
 
 //- CaseFile Static Public Methods ---------------------------------------------
 CaseFile *
@@ -255,6 +255,7 @@ CaseFile::RefreshVdevState()
 
 	m_vdevState    = vd.State();
 	m_vdevPhysPath = vd.PhysicalPath();
+	m_vdevName = vd.Name(casePool, false);
 	return (true);
 }
 
@@ -610,15 +611,55 @@ CaseFile::ActivateSpare() {
 	return (Replace(vdev_type, devPath, /*isspare*/true));
 }
 
+/* Does the argument event refer to a checksum error? */
+static bool
+IsChecksumEvent(const Event* const event)
+{
+	return ("ereport.fs.zfs.checksum" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO error? */
+static bool
+IsIOEvent(const Event* const event)
+{
+	return ("ereport.fs.zfs.io" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO delay? */
+static bool
+IsDelayEvent(const Event* const event)
+{
+	return ("ereport.fs.zfs.delay" == event->Value("type"));
+}
+
 void
 CaseFile::RegisterCallout(const Event &event)
 {
 	timeval now, countdown, elapsed, timestamp, zero, remaining;
+	/**
+	 * The time ZFSD waits before promoting a tentative event
+	 * into a permanent event.
+	 */
+	int sec = -1;
+	if (IsChecksumEvent(&event))
+		sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
+	else if (IsIOEvent(&event))
+		sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
+	else if (IsDelayEvent(&event))
+		sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
+
+	if (sec == -1)
+		sec = 60; /* default */
+
+	timeval removeGracePeriod = {
+	    sec, /*sec*/
+	    0 /*usec*/
+	};
 
 	gettimeofday(&now, 0);
 	timestamp = event.GetTimestamp();
 	timersub(&now, &timestamp, &elapsed);
-	timersub(&s_removeGracePeriod, &elapsed, &countdown);
+	timersub(&removeGracePeriod, &elapsed, &countdown);
 	/*
 	 * If countdown is <= zero, Reset the timer to the
 	 * smallest positive time value instead
@@ -827,6 +868,10 @@ CaseFile::CaseFile(const Vdev &vdev)
 	guidString << m_poolGUID;
 	m_poolGUIDString = guidString.str();
 
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
+	m_vdevName = vdev.Name(zhp, false);
+
 	s_activeCases.push_back(this);
 
 	syslog(LOG_INFO, "Creating new CaseFile:\n");
@@ -1158,43 +1203,54 @@ CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
 	return (retval);
 }
 
-/* Does the argument event refer to a checksum error? */
-static bool
-IsChecksumEvent(const Event* const event)
+/* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
+int
+CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
 {
-	return ("ereport.fs.zfs.checksum" == event->Value("type"));
-}
+	char val[ZFS_MAXPROPLEN];
+	zprop_source_t srctype;
+	DevdCtl::Guid poolGUID = PoolGUID();
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
+	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
 
-/* Does the argument event refer to an IO error? */
-static bool
-IsIOEvent(const Event* const event)
-{
-	return ("ereport.fs.zfs.io" == event->Value("type"));
-}
+	char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
+	if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
+	    vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
+		return (-1);
 
-/* Does the argument event refer to an IO delay? */
-static bool
-IsDelayEvent(const Event* const event)
-{
-	return ("ereport.fs.zfs.delay" == event->Value("type"));
+	/* we'll get "-" from libzfs for a prop that is not set */
+	if (zfs_isnumber(val) == B_FALSE)
+		return (-1);
+
+	return (atoi(val));
 }
 
 bool
 CaseFile::ShouldDegrade() const
 {
+	int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
+	if (checksum_n == -1)
+		checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
 	return (std::count_if(m_events.begin(), m_events.end(),
-			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
+			      IsChecksumEvent) > checksum_n);
 }
 
 bool
 CaseFile::ShouldFault() const
 {
 	bool should_fault_for_io, should_fault_for_delay;
+	int io_n = GetVdevProp(VDEV_PROP_IO_N);
+	int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
+
+	if (io_n == -1)
+		io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
+	if (slow_io_n == -1)
+		slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
 
 	should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
-			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT;
+			      IsIOEvent) > io_n;
 	should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
-			      IsDelayEvent) > ZFS_FAULT_DELAY_COUNT;
+			      IsDelayEvent) > slow_io_n;
 
 	return (should_fault_for_io || should_fault_for_delay);
 }
diff --git a/cddl/usr.sbin/zfsd/case_file.h b/cddl/usr.sbin/zfsd/case_file.h
index 9566b1586ef5..199918c4fead 100644
--- a/cddl/usr.sbin/zfsd/case_file.h
+++ b/cddl/usr.sbin/zfsd/case_file.h
@@ -235,18 +235,27 @@ public:
 	 */
 	int IsSpare();
 
+	/**
+	 * \brief Get case vdev's specified property
+	 */
+	int GetVdevProp(vdev_prop_t) const;
+
 protected:
 	enum {
+		/*
+		 * Use these defaults if we can't get the corresponding vdev
+		 * prop or if the prop is not set
+		 */
 		/**
 		 * The number of soft errors on a vdev required
 		 * to transition a vdev from healthy to degraded
-		 * status.
+		 * status
 		 */
-		ZFS_DEGRADE_IO_COUNT = 50,
+		DEFAULT_ZFS_DEGRADE_IO_COUNT = 50,
 		/**
 		 * The number of delay errors on a vdev required to fault it
 		 */
-		ZFS_FAULT_DELAY_COUNT = 8,
+		DEFAULT_ZFS_FAULT_SLOW_IO_COUNT = 8,
 	};
 
 	static CalloutFunc_t OnGracePeriodEnded;
@@ -379,12 +388,6 @@ protected:
 	 */
 	static const string  s_caseFilePath;
 
-	/**
-	 * \brief The time ZFSD waits before promoting a tentative event
-	 *        into a permanent event.
-	 */
-	static const timeval s_removeGracePeriod;
-
 	/**
 	 * \brief A list of soft error events counted against the health of
 	 *        a vdev.
@@ -404,6 +407,7 @@ protected:
 	string		   m_poolGUIDString;
 	string		   m_vdevGUIDString;
 	string		   m_vdevPhysPath;
+	string		   m_vdevName;
 	int		   m_is_spare;
 
 	/**
diff --git a/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc b/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc
index d76abb54c9ed..f1e925b0b4ef 100644
--- a/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc
+++ b/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc
@@ -134,6 +134,7 @@ public:
 	MOCK_CONST_METHOD0(PoolGUID, Guid());
 	MOCK_CONST_METHOD0(State, vdev_state());
 	MOCK_CONST_METHOD0(PhysicalPath, string());
+	MOCK_CONST_METHOD2(Name, string(zpool_handle_t * zhp, bool verbose));
 };
 
 MockVdev::MockVdev(nvlist_t *vdevConfig)
@@ -431,6 +432,8 @@ protected:
 		m_vdev = new MockVdev(m_vdevConfig);
 		ON_CALL(*m_vdev, GUID())
 		    .WillByDefault(::testing::Return(Guid(123)));
+		ON_CALL(*m_vdev, Name(::testing::_, ::testing::_))
+		    .WillByDefault(::testing::Return(string("/dev/da999")));
 		ON_CALL(*m_vdev, PoolGUID())
 		    .WillByDefault(::testing::Return(Guid(456)));
 		ON_CALL(*m_vdev, State())
diff --git a/cddl/usr.sbin/zfsd/vdev.h b/cddl/usr.sbin/zfsd/vdev.h
index ace5d5a009fa..42278a3d7229 100644
--- a/cddl/usr.sbin/zfsd/vdev.h
+++ b/cddl/usr.sbin/zfsd/vdev.h
@@ -130,7 +130,7 @@ public:
 	nvlist_t		*Config()	const;
 	Vdev			 Parent();
 	Vdev			 RootVdev();
-	std::string		 Name(zpool_handle_t *, bool verbose)	const;
+	virtual std::string	 Name(zpool_handle_t *, bool verbose)	const;
 	bool			 IsSpare();
 	bool			 IsAvailableSpare()	const;
 	bool			 IsActiveSpare()	const;
diff --git a/cddl/usr.sbin/zfsd/zfsd.8 b/cddl/usr.sbin/zfsd/zfsd.8
index 75a3333e6f9e..d6b0e1d4bd22 100644
--- a/cddl/usr.sbin/zfsd/zfsd.8
+++ b/cddl/usr.sbin/zfsd/zfsd.8
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd April 18, 2020
+.Dd February 20, 2024
 .Dt ZFSD 8
 .Os
 .Sh NAME
@@ -55,7 +55,7 @@ directly.
 Instead, they control its behavior indirectly through zpool configuration.
 There are two ways to influence
 .Nm :
-assigning hotspares and setting pool properties.
+assigning hot spares and setting pool properties.
 Currently, only the
 .Em autoreplace
 property has any effect.
@@ -69,7 +69,7 @@ will attempt to resolve the following types of fault:
 .It device removal
 When a leaf vdev disappears,
 .Nm
-will activate any available hotspare.
+will activate any available hot spare.
 .It device arrival
 When a new GEOM device appears,
 .Nm
@@ -77,40 +77,71 @@ will attempt to read its ZFS label, if any.
 If it matches a previously removed vdev on an active pool,
 .Nm
 will online it.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
 .Pp
 If the new device has no ZFS label but its physical path matches the
 physical path of a previously removed vdev on an active pool, and that
 pool has the autoreplace property set, then
 .Nm
 will replace the missing vdev with the newly arrived device.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
 .It vdev degrade or fault events
 If a vdev becomes degraded or faulted,
 .Nm
-will activate any available hotspare.
+will activate any available hot spare.
 .It I/O errors
-If a leaf vdev generates more than 50 I/O errors in a 60 second period, then
+By default, if a leaf vdev generates more than 50 I/O errors in a 60 second
+period, then
+.Nm
+will mark that vdev as
+.Em FAULTED .
+ZFS will no longer issue any I/Os to it.
+.Nm
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em io_n
+and/or
+.Em io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
+.It I/O delays
+By default, if a leaf vdev generates more than delayed 8 I/O events in a 60
+second period, then
 .Nm
 will mark that vdev as
 .Em FAULTED .
 ZFS will no longer issue any I/Os to it.
 .Nm
-will activate a hotspare if one is available.
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em slow_io_n
+and/or
+.Em slow_io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
 .It Checksum errors
-If a leaf vdev generates more than 50 checksum errors in a 60 second
-period, then
+By default, if a leaf vdev generates more than 50 checksum errors in a 60
+second period, then
 .Nm
 will mark that vdev as
 .Em DEGRADED .
-ZFS will still use it, but zfsd will activate a spare anyway.
+ZFS will still use it, but zfsd will also activate a hot spare if one is
+available. The defaults can be changed by setting the
+.Em checksum_n
+and/or
+.Em checksum_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
 .It Spare addition
-If the system administrator adds a hotspare to a pool that is already degraded,
+If the system administrator adds a hot spare to a pool that is already degraded,
 .Nm
 will activate the spare.
 .It Resilver complete
 .Nm
-will detach any hotspare once a permanent replacement finishes resilvering.
+will detach any hot spare once a permanent replacement finishes resilvering.
 .It Physical path change
 If the physical path of an existing disk changes,
 .Nm
@@ -134,6 +165,7 @@ then reads them back in when next it starts up.
 .El
 .Sh SEE ALSO
 .Xr devctl 4 ,
+.Xr vdevprops 7 ,
 .Xr zpool 8
 .Sh HISTORY
 .Nm