svn commit: r300906 - in head: cddl/usr.sbin cddl/usr.sbin/zfsd cddl/usr.sbin/zfsd/tests etc/defaults etc/mtree etc/rc.d lib lib/libdevdctl lib/libdevdctl/tests share/mk sys/cddl/contrib/opensolari...

Alan Somers asomers at FreeBSD.org
Sat May 28 17:43:42 UTC 2016


Author: asomers
Date: Sat May 28 17:43:40 2016
New Revision: 300906
URL: https://svnweb.freebsd.org/changeset/base/300906

Log:
  zfsd(8), the ZFS fault management daemon
  
  Add zfsd, which deals with hard drive faults in ZFS pools. It manages
  hotspares and replements in drive slots that publish physical paths.
  
  cddl/usr.sbin/zfsd
  	Add zfsd(8) and its unit tests
  
  cddl/usr.sbin/Makefile
  	Add zfsd to the build
  
  lib/libdevdctl
  	A C++ library that helps devd clients process events
  
  lib/Makefile
  share/mk/bsd.libnames.mk
  share/mk/src.libnames.mk
  	Add libdevdctl to the build. It's a private library, unusable by
  	out-of-tree software.
  
  etc/defaults/rc.conf
  	By default, set zfsd_enable to NO
  
  etc/mtree/BSD.include.dist
  	Add a directory for libdevdctl's include files
  
  etc/mtree/BSD.tests.dist
  	Add a directory for zfsd's unit tests
  
  etc/mtree/BSD.var.dist
  	Add /var/db/zfsd/cases, where zfsd stores case files while it's shut
  	down.
  
  etc/rc.d/Makefile
  etc/rc.d/zfsd
  	Add zfsd's rc script
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  	Fix the resource.fs.zfs.statechange message. It had a number of
  	problems:
  
  	It was only being emitted on a transition to the HEALTHY state.
  	That made it impossible for zfsd to take actions based on drives
  	getting sicker.
  
  	It compared the new state to vdev_prevstate, which is the state that
  	the vdev had the last time it was opened.  That doesn't make sense,
  	because a vdev can change state multiple times without being
  	reopened.
  
  	vdev_set_state contains logic that will change the device's new
  	state based on various conditions.  However, the statechange event
  	was being posted _before_ that logic took effect.  Now it's being
  	posted after.
  
  Submitted by:	gibbs, asomers, mav, allanjude
  Reviewed by:	mav, delphij
  Relnotes:	yes
  Sponsored by:	Spectra Logic Corp, iX Systems
  Differential Revision:	https://reviews.freebsd.org/D6564

Added:
  head/cddl/usr.sbin/zfsd/
  head/cddl/usr.sbin/zfsd/Makefile   (contents, props changed)
  head/cddl/usr.sbin/zfsd/Makefile.common   (contents, props changed)
  head/cddl/usr.sbin/zfsd/callout.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/callout.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/case_file.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/case_file.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/
  head/cddl/usr.sbin/zfsd/tests/Makefile   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/libmocks.c   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/libmocks.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.supp   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev_iterator.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/vdev_iterator.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.8   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_event.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_event.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_exception.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_exception.h   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zfsd_main.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zpool_list.cc   (contents, props changed)
  head/cddl/usr.sbin/zfsd/zpool_list.h   (contents, props changed)
  head/etc/rc.d/zfsd   (contents, props changed)
  head/lib/libdevdctl/
  head/lib/libdevdctl/Makefile   (contents, props changed)
  head/lib/libdevdctl/consumer.cc   (contents, props changed)
  head/lib/libdevdctl/consumer.h   (contents, props changed)
  head/lib/libdevdctl/event.cc   (contents, props changed)
  head/lib/libdevdctl/event.h   (contents, props changed)
  head/lib/libdevdctl/event_factory.cc   (contents, props changed)
  head/lib/libdevdctl/event_factory.h   (contents, props changed)
  head/lib/libdevdctl/exception.cc   (contents, props changed)
  head/lib/libdevdctl/exception.h   (contents, props changed)
  head/lib/libdevdctl/guid.cc   (contents, props changed)
  head/lib/libdevdctl/guid.h   (contents, props changed)
  head/lib/libdevdctl/tests/
  head/lib/libdevdctl/tests/Makefile   (contents, props changed)
  head/lib/libdevdctl/tests/libdevdctl_unittest.cc   (contents, props changed)
Modified:
  head/cddl/usr.sbin/Makefile
  head/etc/defaults/rc.conf
  head/etc/mtree/BSD.include.dist
  head/etc/mtree/BSD.tests.dist
  head/etc/mtree/BSD.var.dist
  head/etc/rc.d/Makefile
  head/lib/Makefile
  head/share/mk/bsd.libnames.mk
  head/share/mk/src.libnames.mk
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c

Modified: head/cddl/usr.sbin/Makefile
==============================================================================
--- head/cddl/usr.sbin/Makefile	Sat May 28 16:38:09 2016	(r300905)
+++ head/cddl/usr.sbin/Makefile	Sat May 28 17:43:40 2016	(r300906)
@@ -7,6 +7,7 @@ SUBDIR=	${_dtrace} \
 	${_plockstat} \
 	${_tests} \
 	${_zdb} \
+	${_zfsd} \
 	${_zhack}
 
 .if ${MK_TESTS} != "no"
@@ -18,6 +19,9 @@ _tests=	tests
 _zdb=	zdb
 _zhack=	zhack
 .endif
+. if ${MK_CXX} != "no"
+_zfsd=	zfsd
+. endif
 .endif
 
 .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"

Added: head/cddl/usr.sbin/zfsd/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile	Sat May 28 17:43:40 2016	(r300906)
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+SRCDIR=${.CURDIR}/../../..
+.include "Makefile.common"
+
+PROG_CXX=	zfsd
+MAN=		zfsd.8
+
+.include <bsd.prog.mk>
+
+# The unittests require devel/googletest and devel/googlemock from ports.
+# Don't automatically build them.
+SUBDIR=

Added: head/cddl/usr.sbin/zfsd/Makefile.common
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile.common	Sat May 28 17:43:40 2016	(r300906)
@@ -0,0 +1,42 @@
+# $FreeBSD$
+
+SRCS=		callout.cc		\
+		case_file.cc		\
+		zfsd_event.cc		\
+		vdev.cc			\
+		vdev_iterator.cc	\
+		zfsd.cc			\
+		zfsd_exception.cc	\
+		zpool_list.cc		\
+		zfsd_main.cc
+
+WARNS?=		3
+
+# Ignore warnings about Solaris specific pragmas.
+IGNORE_PRAGMA=  YES
+
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzpool/common
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/include
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/lib/libumem
+INCFLAGS+= -I${SRCDIR}/sys/cddl/compat/opensolaris
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/head
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libuutil/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libumem/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs_core/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libnvpair
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/common/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/sys
+
+CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS}
+
+DPADD=  ${LIBDEVDCTL} ${LIBZFS} ${LIBZFS_CORE} ${LIBUTIL} ${LIBGEOM} \
+	${LIBBSDXML} ${LIBSBUF} ${LIBNVPAIR} ${LIBUUTIL}
+LIBADD=  devdctl zfs zfs_core util geom bsdxml sbuf nvpair uutil
+
+cscope:
+	find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \
+	     > ${.CURDIR}/cscope.files
+	cd ${.CURDIR} && cscope -buq ${INCFLAGS}

Added: head/cddl/usr.sbin/zfsd/callout.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.cc	Sat May 28 17:43:40 2016	(r300906)
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.cc
+ *
+ * \brief Implementation of the Callout class - multi-client
+ *        timer services built on top of the POSIX interval timer.
+ */
+
+#include <sys/time.h>
+
+#include <signal.h>
+#include <syslog.h>
+
+#include <climits>
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/consumer.h>
+#include <devdctl/exception.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+
+std::list<Callout *> Callout::s_activeCallouts;
+bool		     Callout::s_alarmFired(false);
+
+void
+Callout::Init()
+{
+	signal(SIGALRM,  Callout::AlarmSignalHandler);
+}
+
+bool
+Callout::Stop()
+{
+	if (!IsPending())
+		return (false);
+
+	for (std::list<Callout *>::iterator it(s_activeCallouts.begin());
+	     it != s_activeCallouts.end(); it++) {
+		if (*it != this)
+			continue;
+
+		it = s_activeCallouts.erase(it);
+		if (it != s_activeCallouts.end()) {
+
+			/*
+			 * Maintain correct interval for the
+			 * callouts that follow the just removed
+			 * entry.
+			 */
+			timeradd(&(*it)->m_interval, &m_interval,
+				 &(*it)->m_interval);
+		}
+		break;
+	}
+	m_pending = false;
+	return (true);
+}
+
+bool
+Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg)
+{
+	bool cancelled(false);
+
+	if (!timerisset(&interval))
+		throw ZfsdException("Callout::Reset: interval of 0");
+
+	cancelled = Stop();
+
+	m_interval = interval;
+	m_func     = func;
+	m_arg      = arg;
+	m_pending  = true;
+
+	std::list<Callout *>::iterator it(s_activeCallouts.begin());
+	for (; it != s_activeCallouts.end(); it++) {
+
+		if (timercmp(&(*it)->m_interval, &m_interval, <=)) {
+			/*
+			 * Decrease our interval by those that come
+			 * before us.
+			 */
+			timersub(&m_interval, &(*it)->m_interval, &m_interval);
+		} else {
+			/*
+			 * Account for the time between the newly
+			 * inserted event and those that follow.
+			 */
+			timersub(&(*it)->m_interval, &m_interval,
+				 &(*it)->m_interval);
+			break;
+		}
+	}
+	s_activeCallouts.insert(it, this);
+
+
+	if (s_activeCallouts.front() == this) {
+		itimerval timerval = { {0, 0}, m_interval };
+
+		setitimer(ITIMER_REAL, &timerval, NULL);
+	}
+
+	return (cancelled);
+}
+
+void
+Callout::AlarmSignalHandler(int)
+{
+	s_alarmFired = true;
+	ZfsDaemon::WakeEventLoop();
+}
+
+void
+Callout::ExpireCallouts()
+{
+	if (!s_alarmFired)
+		return;
+
+	s_alarmFired = false;
+	if (s_activeCallouts.empty()) {
+		/* Callout removal/SIGALRM race was lost. */
+		return;
+	}
+
+	/*
+	 * Expire the first callout (the one we used to set the
+	 * interval timer) as well as any callouts following that
+	 * expire at the same time (have a zero interval from
+	 * the callout before it).
+	 */
+	do {
+		Callout *cur(s_activeCallouts.front());
+		s_activeCallouts.pop_front();
+		cur->m_pending = false;
+		cur->m_func(cur->m_arg);
+	} while (!s_activeCallouts.empty()
+	      && timerisset(&s_activeCallouts.front()->m_interval) == 0);
+
+	if (!s_activeCallouts.empty()) {
+		Callout *next(s_activeCallouts.front());
+		itimerval timerval = { { 0, 0 }, next->m_interval };
+
+		setitimer(ITIMER_REAL, &timerval, NULL);
+	}
+}
+
+timeval
+Callout::TimeRemaining() const
+{
+	/*
+	 * Outline: Add the m_interval for each callout in s_activeCallouts
+	 * ahead of this, except for the first callout.  Add to that the result
+	 * of getitimer (That's because the first callout stores its original
+	 * interval setting while the timer is ticking).
+	 */
+	itimerval timervalToAlarm;
+	timeval timeToExpiry;
+	std::list<Callout *>::iterator it;
+
+	if (!IsPending()) {
+		timeToExpiry.tv_sec = INT_MAX;
+		timeToExpiry.tv_usec = 999999;	/*maximum normalized value*/
+		return (timeToExpiry);
+	}
+
+	timerclear(&timeToExpiry);
+	getitimer(ITIMER_REAL, &timervalToAlarm);
+	timeval& timeToAlarm = timervalToAlarm.it_value;
+	timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry);
+
+	it =s_activeCallouts.begin();
+	it++;	/*skip the first callout in the list*/
+	for (; it != s_activeCallouts.end(); it++) {
+		timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry);
+		if ((*it) == this)
+			break;
+	}
+	return (timeToExpiry);
+}

Added: head/cddl/usr.sbin/zfsd/callout.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.h	Sat May 28 17:43:40 2016	(r300906)
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.h
+ *
+ * \brief Interface for timer based callback services.
+ *
+ * Header requirements:
+ *
+ *     #include <sys/time.h>
+ *
+ *     #include <list>
+ */
+
+#ifndef _CALLOUT_H_
+#define _CALLOUT_H_
+
+/**
+ * \brief Type of the function callback from a Callout.
+ */
+typedef void CalloutFunc_t(void *);
+
+/**
+ * \brief Interface to a schedulable one-shot timer with the granularity
+ *        of the system clock (see setitimer(2)).
+ *
+ * Determination of callback expiration is triggered by the SIGALRM
+ * signal.  Callout callbacks are always delivered from Zfsd's event
+ * processing loop.
+ *
+ * Periodic actions can be triggered via the Callout mechanisms by
+ * resetting the Callout from within its callback.
+ */
+class Callout
+{
+public:
+
+	/**
+	 * Initialize the Callout subsystem.
+	 */
+	static void Init();
+
+	/**
+	 * Function called (via SIGALRM) when our interval
+	 * timer expires.
+	 */
+	static void AlarmSignalHandler(int);
+
+	/**
+	 * Execute callbacks for all callouts that have the same
+	 * expiration time as the first callout in the list.
+	 */
+	static void ExpireCallouts();
+
+	/** Constructor. */
+	Callout();
+
+	/**
+	 * Returns true if callout has not been stopped,
+	 * or deactivated since the last time the callout was
+	 * reset.
+	 */
+	bool IsActive() const;
+
+	/**
+	 * Returns true if callout is still waiting to expire.
+	 */
+	bool IsPending() const;
+
+	/**
+	 * Disestablish a callout.
+	 */
+	bool Stop();
+
+	/**
+	 * \brief Establish or change a timeout.
+	 *
+	 * \param interval  Timeval indicating the time which must elapse
+	 *                  before this callout fires.
+	 * \param func      Pointer to the callback funtion
+	 * \param arg       Argument pointer to pass to callback function
+	 *
+	 * \return  Cancellation status.
+	 *             true:  The previous callback was pending and therefore
+	 *                    was cancelled.
+	 *             false: The callout was not pending at the time of this
+	 *                    reset request.
+	 *          In all cases, a new callout is established.
+	 */
+	bool  Reset(const timeval &interval, CalloutFunc_t *func, void *arg);
+
+	/**
+	 * \brief Calculate the remaining time until this Callout's timer
+	 *        expires.
+	 *
+	 * The return value will be slightly greater than the actual time to
+	 * expiry.
+	 *
+	 * If the callout is not pending, returns INT_MAX.
+	 */
+	timeval TimeRemaining() const;
+
+private:
+	/**
+	 * All active callouts sorted by expiration time.  The callout
+	 * with the nearest expiration time is at the head of the list.
+	 */
+	static std::list<Callout *> s_activeCallouts;
+
+	/**
+	 * The interval timer has expired.  This variable is set from
+	 * signal handler context and tested from Zfsd::EventLoop()
+	 * context via ExpireCallouts().
+	 */
+	static bool                 s_alarmFired;
+
+	/**
+	 * Time, relative to others in the active list, until
+	 * this callout is fired.
+	 */
+	timeval                     m_interval;
+
+	/** Callback function argument. */
+	void                       *m_arg;
+
+	/**
+	 * The callback function associated with this timer
+	 * entry.
+	 */
+	CalloutFunc_t              *m_func;
+
+	/** State of this callout. */
+	bool                        m_pending;
+};
+
+//- Callout public const methods ----------------------------------------------
+inline bool
+Callout::IsPending() const
+{
+	return (m_pending);
+}
+
+//- Callout public methods ----------------------------------------------------
+inline
+Callout::Callout()
+ : m_arg(0),
+   m_func(NULL),
+   m_pending(false)
+{
+	timerclear(&m_interval);
+}
+
+#endif /* CALLOUT_H_ */

Added: head/cddl/usr.sbin/zfsd/case_file.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/case_file.cc	Sat May 28 17:43:40 2016	(r300906)
@@ -0,0 +1,1104 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ */
+
+/**
+ * \file case_file.cc
+ *
+ * We keep case files for any leaf vdev that is not in the optimal state.
+ * However, we only serialize to disk those events that need to be preserved
+ * across reboots.  For now, this is just a log of soft errors which we
+ * accumulate in order to mark a device as degraded.
+ */
+#include <sys/cdefs.h>
+#include <sys/time.h>
+
+#include <sys/fs/zfs.h>
+
+#include <dirent.h>
+#include <iomanip>
+#include <fstream>
+#include <functional>
+#include <sstream>
+#include <syslog.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/exception.h>
+#include <devdctl/consumer.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd_event.h"
+#include "case_file.h"
+#include "vdev.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+#include "zpool_list.h"
+
+__FBSDID("$FreeBSD$");
+
+/*============================ Namespace Control =============================*/
+using std::auto_ptr;
+using std::hex;
+using std::ifstream;
+using std::stringstream;
+using std::setfill;
+using std::setw;
+
+using DevdCtl::Event;
+using DevdCtl::EventBuffer;
+using DevdCtl::EventFactory;
+using DevdCtl::EventList;
+using DevdCtl::Guid;
+using DevdCtl::ParseException;
+
+/*--------------------------------- CaseFile ---------------------------------*/
+//- CaseFile Static Data -------------------------------------------------------
+
+CaseFileList  CaseFile::s_activeCases;
+const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
+const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
+
+//- CaseFile Static Public Methods ---------------------------------------------
+CaseFile *
+CaseFile::Find(Guid poolGUID, Guid vdevGUID)
+{
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++) {
+
+		if ((*curCase)->PoolGUID() != poolGUID
+		 || (*curCase)->VdevGUID() != vdevGUID)
+			continue;
+
+		/*
+		 * We only carry one active case per-vdev.
+		 */
+		return (*curCase);
+	}
+	return (NULL);
+}
+
+CaseFile *
+CaseFile::Find(const string &physPath)
+{
+	CaseFile *result = NULL;
+
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++) {
+
+		if ((*curCase)->PhysicalPath() != physPath)
+			continue;
+
+		if (result != NULL) {
+			syslog(LOG_WARNING, "Multiple casefiles found for "
+			    "physical path %s.  "
+			    "This is most likely a bug in zfsd",
+			    physPath.c_str());
+		}
+		result = *curCase;
+	}
+	return (result);
+}
+
+
+void
+CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
+{
+	CaseFileList::iterator casefile;
+	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
+		CaseFileList::iterator next = casefile;
+		next++;
+		if (poolGUID == (*casefile)->PoolGUID())
+			(*casefile)->ReEvaluate(event);
+		casefile = next;
+	}
+}
+
+CaseFile &
+CaseFile::Create(Vdev &vdev)
+{
+	CaseFile *activeCase;
+
+	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
+	if (activeCase == NULL)
+		activeCase = new CaseFile(vdev);
+
+	return (*activeCase);
+}
+
+void
+CaseFile::DeSerialize()
+{
+	struct dirent **caseFiles;
+
+	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
+			 DeSerializeSelector, /*compar*/NULL));
+
+	if (numCaseFiles == -1)
+		return;
+	if (numCaseFiles == 0) {
+		free(caseFiles);
+		return;
+	}
+
+	for (int i = 0; i < numCaseFiles; i++) {
+
+		DeSerializeFile(caseFiles[i]->d_name);
+		free(caseFiles[i]);
+	}
+	free(caseFiles);
+}
+
+void
+CaseFile::LogAll()
+{
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++)
+		(*curCase)->Log();
+}
+
+void
+CaseFile::PurgeAll()
+{
+	/*
+	 * Serialize casefiles before deleting them so that they can be reread
+	 * and revalidated during BuildCaseFiles.
+	 * CaseFiles remove themselves from this list on destruction.
+	 */
+	while (s_activeCases.size() != 0) {
+		CaseFile *casefile = s_activeCases.front();
+		casefile->Serialize();
+		delete casefile;
+	}
+
+}
+
+//- CaseFile Public Methods ----------------------------------------------------
+bool
+CaseFile::RefreshVdevState()
+{
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
+	if (casePool == NULL)
+		return (false);
+
+	Vdev vd(casePool, CaseVdev(casePool));
+	if (vd.DoesNotExist())
+		return (false);
+
+	m_vdevState    = vd.State();
+	m_vdevPhysPath = vd.PhysicalPath();
+	return (true);
+}
+
+bool
+CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
+{
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
+
+	if (pool == NULL || !RefreshVdevState()) {
+		/*
+		 * The pool or vdev for this case file is no longer
+		 * part of the configuration.  This can happen
+		 * if we process a device arrival notification
+		 * before seeing the ZFS configuration change
+		 * event.
+		 */
+		syslog(LOG_INFO,
+		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
+		       "Closing\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str());
+		Close();
+
+		/*
+		 * Since this event was not used to close this
+		 * case, do not report it as consumed.
+		 */
+		return (/*consumed*/false);
+	}
+
+	if (VdevState() > VDEV_STATE_CANT_OPEN) {
+		/*
+		 * For now, newly discovered devices only help for
+		 * devices that are missing.  In the future, we might
+		 * use a newly inserted spare to replace a degraded
+		 * or faulted device.
+		 */
+		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
+		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
+		return (/*consumed*/false);
+	}
+
+	if (vdev != NULL
+	 && vdev->PoolGUID() == m_poolGUID
+	 && vdev->GUID() == m_vdevGUID) {
+
+		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
+				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
+				  &m_vdevState);
+		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
+		       zpool_get_name(pool), vdev->GUIDString().c_str(),
+		       devPath.c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+
+		/*
+		 * Check the vdev state post the online action to see
+		 * if we can retire this case.
+		 */
+		CloseIfSolved();
+
+		return (/*consumed*/true);
+	}
+
+	/*
+	 * If the auto-replace policy is enabled, and we have physical
+	 * path information, try a physical path replacement.
+	 */
+	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (/*consumed*/false);
+	}
+
+	if (PhysicalPath().empty()) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): No physical path information.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (/*consumed*/false);
+	}
+
+	if (physPath != PhysicalPath()) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (/*consumed*/false);
+	}
+
+	/* Write a label on the newly inserted disk. */
+	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
+		syslog(LOG_ERR,
+		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str(),
+		       libzfs_error_action(g_zfsHandle),
+		       libzfs_error_description(g_zfsHandle));
+		return (/*consumed*/false);
+	}
+
+	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
+	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
+	    devPath.c_str());
+	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
+}
+
+bool
+CaseFile::ReEvaluate(const ZfsEvent &event)
+{
+	bool consumed(false);
+
+	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
+		/*
+		 * The Vdev we represent has been removed from the
+		 * configuration.  This case is no longer of value.
+		 */
+		Close();
+
+		return (/*consumed*/true);
+	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
+		/* This Pool has been destroyed.  Discard the case */
+		Close();
+
+		return (/*consumed*/true);
+	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
+		RefreshVdevState();
+		if (VdevState() < VDEV_STATE_HEALTHY)
+			consumed = ActivateSpare();
+	}
+
+
+	if (event.Value("class") == "resource.fs.zfs.removed") {
+		bool spare_activated;
+
+		if (!RefreshVdevState()) {
+			/*
+			 * The pool or vdev for this case file is no longer
+			 * part of the configuration.  This can happen
+			 * if we process a device arrival notification
+			 * before seeing the ZFS configuration change
+			 * event.
+			 */
+			syslog(LOG_INFO,
+			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
+			       "unconfigured.  Closing\n",
+			       PoolGUIDString().c_str(),
+			       VdevGUIDString().c_str());
+			/*
+			 * Close the case now so we won't waste cycles in the
+			 * system rescan
+			 */
+			Close();
+
+			/*
+			 * Since this event was not used to close this
+			 * case, do not report it as consumed.
+			 */
+			return (/*consumed*/false);
+		}
+
+		/*
+		 * Discard any tentative I/O error events for
+		 * this case.  They were most likely caused by the
+		 * hot-unplug of this device.
+		 */
+		PurgeTentativeEvents();
+
+		/* Try to activate spares if they are available */
+		spare_activated = ActivateSpare();
+
+		/*
+		 * Rescan the drives in the system to see if a recent
+		 * drive arrival can be used to solve this case.
+		 */
+		ZfsDaemon::RequestSystemRescan();
+
+		/*
+		 * Consume the event if we successfully activated a spare.
+		 * Otherwise, leave it in the unconsumed events list so that the
+		 * future addition of a spare to this pool might be able to
+		 * close the case
+		 */
+		consumed = spare_activated;
+	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
+		RefreshVdevState();
+		/*
+		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
+		 * activate a hotspare.  Otherwise, ignore the event
+		 */
+		if (VdevState() == VDEV_STATE_FAULTED ||
+		    VdevState() == VDEV_STATE_DEGRADED ||
+		    VdevState() == VDEV_STATE_CANT_OPEN)
+			(void) ActivateSpare();
+		consumed = true;
+	}
+	else if (event.Value("class") == "ereport.fs.zfs.io" ||
+	         event.Value("class") == "ereport.fs.zfs.checksum") {
+
+		m_tentativeEvents.push_front(event.DeepCopy());
+		RegisterCallout(event);
+		consumed = true;
+	}
+
+	bool closed(CloseIfSolved());
+
+	return (consumed || closed);
+}
+
+
+bool
+CaseFile::ActivateSpare() {
+	nvlist_t	*config, *nvroot;
+	nvlist_t       **spares;
+	char		*devPath, *vdev_type;
+	const char	*poolname;
+	u_int		 nspares, i;
+	int		 error;
+
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
+	if (zhp == NULL) {
+		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+		       "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID);
+		return (false);
+	}
+	poolname = zpool_get_name(zhp);
+	config = zpool_get_config(zhp, NULL);
+	if (config == NULL) {
+		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+		       "config for pool %s", poolname);
+		return (false);
+	}
+	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
+	if (error != 0){
+		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
+		       "tree for pool %s", poolname);
+		return (false);
+	}
+	nspares = 0;
+	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+				   &nspares);
+	if (nspares == 0) {
+		/* The pool has no spares configured */
+		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
+		       "No spares available for pool %s", poolname);
+		return (false);
+	}
+	for (i = 0; i < nspares; i++) {
+		uint64_t    *nvlist_array;
+		vdev_stat_t *vs;
+		uint_t	     nstats;
+
+		if (nvlist_lookup_uint64_array(spares[i],
+		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
+			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
+			       "find vdev stats for pool %s, spare %d",
+			       poolname, i);
+			return (false);
+		}
+		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
+
+		if ((vs->vs_aux != VDEV_AUX_SPARED)
+		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
+			/* We found a usable spare */
+			break;
+		}
+	}
+
+	if (i == nspares) {
+		/* No available spares were found */
+		return (false);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list