svn commit: r300906 - in head: cddl/usr.sbin cddl/usr.sbin/zfsd cddl/usr.sbin/zfsd/tests etc/defaults etc/mtree etc/rc.d lib lib/libdevdctl lib/libdevdctl/tests share/mk sys/cddl/contrib/opensolari...
Alan Somers
asomers at FreeBSD.org
Sat May 28 17:43:42 UTC 2016
Author: asomers
Date: Sat May 28 17:43:40 2016
New Revision: 300906
URL: https://svnweb.freebsd.org/changeset/base/300906
Log:
zfsd(8), the ZFS fault management daemon
Add zfsd, which deals with hard drive faults in ZFS pools. It manages
hotspares and replements in drive slots that publish physical paths.
cddl/usr.sbin/zfsd
Add zfsd(8) and its unit tests
cddl/usr.sbin/Makefile
Add zfsd to the build
lib/libdevdctl
A C++ library that helps devd clients process events
lib/Makefile
share/mk/bsd.libnames.mk
share/mk/src.libnames.mk
Add libdevdctl to the build. It's a private library, unusable by
out-of-tree software.
etc/defaults/rc.conf
By default, set zfsd_enable to NO
etc/mtree/BSD.include.dist
Add a directory for libdevdctl's include files
etc/mtree/BSD.tests.dist
Add a directory for zfsd's unit tests
etc/mtree/BSD.var.dist
Add /var/db/zfsd/cases, where zfsd stores case files while it's shut
down.
etc/rc.d/Makefile
etc/rc.d/zfsd
Add zfsd's rc script
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
Fix the resource.fs.zfs.statechange message. It had a number of
problems:
It was only being emitted on a transition to the HEALTHY state.
That made it impossible for zfsd to take actions based on drives
getting sicker.
It compared the new state to vdev_prevstate, which is the state that
the vdev had the last time it was opened. That doesn't make sense,
because a vdev can change state multiple times without being
reopened.
vdev_set_state contains logic that will change the device's new
state based on various conditions. However, the statechange event
was being posted _before_ that logic took effect. Now it's being
posted after.
Submitted by: gibbs, asomers, mav, allanjude
Reviewed by: mav, delphij
Relnotes: yes
Sponsored by: Spectra Logic Corp, iX Systems
Differential Revision: https://reviews.freebsd.org/D6564
Added:
head/cddl/usr.sbin/zfsd/
head/cddl/usr.sbin/zfsd/Makefile (contents, props changed)
head/cddl/usr.sbin/zfsd/Makefile.common (contents, props changed)
head/cddl/usr.sbin/zfsd/callout.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/callout.h (contents, props changed)
head/cddl/usr.sbin/zfsd/case_file.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/case_file.h (contents, props changed)
head/cddl/usr.sbin/zfsd/tests/
head/cddl/usr.sbin/zfsd/tests/Makefile (contents, props changed)
head/cddl/usr.sbin/zfsd/tests/libmocks.c (contents, props changed)
head/cddl/usr.sbin/zfsd/tests/libmocks.h (contents, props changed)
head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.supp (contents, props changed)
head/cddl/usr.sbin/zfsd/vdev.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/vdev.h (contents, props changed)
head/cddl/usr.sbin/zfsd/vdev_iterator.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/vdev_iterator.h (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd.8 (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd.h (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd_event.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd_event.h (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd_exception.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd_exception.h (contents, props changed)
head/cddl/usr.sbin/zfsd/zfsd_main.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/zpool_list.cc (contents, props changed)
head/cddl/usr.sbin/zfsd/zpool_list.h (contents, props changed)
head/etc/rc.d/zfsd (contents, props changed)
head/lib/libdevdctl/
head/lib/libdevdctl/Makefile (contents, props changed)
head/lib/libdevdctl/consumer.cc (contents, props changed)
head/lib/libdevdctl/consumer.h (contents, props changed)
head/lib/libdevdctl/event.cc (contents, props changed)
head/lib/libdevdctl/event.h (contents, props changed)
head/lib/libdevdctl/event_factory.cc (contents, props changed)
head/lib/libdevdctl/event_factory.h (contents, props changed)
head/lib/libdevdctl/exception.cc (contents, props changed)
head/lib/libdevdctl/exception.h (contents, props changed)
head/lib/libdevdctl/guid.cc (contents, props changed)
head/lib/libdevdctl/guid.h (contents, props changed)
head/lib/libdevdctl/tests/
head/lib/libdevdctl/tests/Makefile (contents, props changed)
head/lib/libdevdctl/tests/libdevdctl_unittest.cc (contents, props changed)
Modified:
head/cddl/usr.sbin/Makefile
head/etc/defaults/rc.conf
head/etc/mtree/BSD.include.dist
head/etc/mtree/BSD.tests.dist
head/etc/mtree/BSD.var.dist
head/etc/rc.d/Makefile
head/lib/Makefile
head/share/mk/bsd.libnames.mk
head/share/mk/src.libnames.mk
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
Modified: head/cddl/usr.sbin/Makefile
==============================================================================
--- head/cddl/usr.sbin/Makefile Sat May 28 16:38:09 2016 (r300905)
+++ head/cddl/usr.sbin/Makefile Sat May 28 17:43:40 2016 (r300906)
@@ -7,6 +7,7 @@ SUBDIR= ${_dtrace} \
${_plockstat} \
${_tests} \
${_zdb} \
+ ${_zfsd} \
${_zhack}
.if ${MK_TESTS} != "no"
@@ -18,6 +19,9 @@ _tests= tests
_zdb= zdb
_zhack= zhack
.endif
+. if ${MK_CXX} != "no"
+_zfsd= zfsd
+. endif
.endif
.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"
Added: head/cddl/usr.sbin/zfsd/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile Sat May 28 17:43:40 2016 (r300906)
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+SRCDIR=${.CURDIR}/../../..
+.include "Makefile.common"
+
+PROG_CXX= zfsd
+MAN= zfsd.8
+
+.include <bsd.prog.mk>
+
+# The unittests require devel/googletest and devel/googlemock from ports.
+# Don't automatically build them.
+SUBDIR=
Added: head/cddl/usr.sbin/zfsd/Makefile.common
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/Makefile.common Sat May 28 17:43:40 2016 (r300906)
@@ -0,0 +1,42 @@
+# $FreeBSD$
+
+SRCS= callout.cc \
+ case_file.cc \
+ zfsd_event.cc \
+ vdev.cc \
+ vdev_iterator.cc \
+ zfsd.cc \
+ zfsd_exception.cc \
+ zpool_list.cc \
+ zfsd_main.cc
+
+WARNS?= 3
+
+# Ignore warnings about Solaris specific pragmas.
+IGNORE_PRAGMA= YES
+
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzpool/common
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/include
+INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/lib/libumem
+INCFLAGS+= -I${SRCDIR}/sys/cddl/compat/opensolaris
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/head
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libuutil/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libumem/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs_core/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs/common
+INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libnvpair
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/common/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/sys
+
+CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS}
+
+DPADD= ${LIBDEVDCTL} ${LIBZFS} ${LIBZFS_CORE} ${LIBUTIL} ${LIBGEOM} \
+ ${LIBBSDXML} ${LIBSBUF} ${LIBNVPAIR} ${LIBUUTIL}
+LIBADD= devdctl zfs zfs_core util geom bsdxml sbuf nvpair uutil
+
+cscope:
+ find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \
+ > ${.CURDIR}/cscope.files
+ cd ${.CURDIR} && cscope -buq ${INCFLAGS}
Added: head/cddl/usr.sbin/zfsd/callout.cc
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.cc Sat May 28 17:43:40 2016 (r300906)
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.cc
+ *
+ * \brief Implementation of the Callout class - multi-client
+ * timer services built on top of the POSIX interval timer.
+ */
+
+#include <sys/time.h>
+
+#include <signal.h>
+#include <syslog.h>
+
+#include <climits>
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/consumer.h>
+#include <devdctl/exception.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+
+std::list<Callout *> Callout::s_activeCallouts;
+bool Callout::s_alarmFired(false);
+
+void
+Callout::Init()
+{
+ signal(SIGALRM, Callout::AlarmSignalHandler);
+}
+
+bool
+Callout::Stop()
+{
+ if (!IsPending())
+ return (false);
+
+ for (std::list<Callout *>::iterator it(s_activeCallouts.begin());
+ it != s_activeCallouts.end(); it++) {
+ if (*it != this)
+ continue;
+
+ it = s_activeCallouts.erase(it);
+ if (it != s_activeCallouts.end()) {
+
+ /*
+ * Maintain correct interval for the
+ * callouts that follow the just removed
+ * entry.
+ */
+ timeradd(&(*it)->m_interval, &m_interval,
+ &(*it)->m_interval);
+ }
+ break;
+ }
+ m_pending = false;
+ return (true);
+}
+
+bool
+Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg)
+{
+ bool cancelled(false);
+
+ if (!timerisset(&interval))
+ throw ZfsdException("Callout::Reset: interval of 0");
+
+ cancelled = Stop();
+
+ m_interval = interval;
+ m_func = func;
+ m_arg = arg;
+ m_pending = true;
+
+ std::list<Callout *>::iterator it(s_activeCallouts.begin());
+ for (; it != s_activeCallouts.end(); it++) {
+
+ if (timercmp(&(*it)->m_interval, &m_interval, <=)) {
+ /*
+ * Decrease our interval by those that come
+ * before us.
+ */
+ timersub(&m_interval, &(*it)->m_interval, &m_interval);
+ } else {
+ /*
+ * Account for the time between the newly
+ * inserted event and those that follow.
+ */
+ timersub(&(*it)->m_interval, &m_interval,
+ &(*it)->m_interval);
+ break;
+ }
+ }
+ s_activeCallouts.insert(it, this);
+
+
+ if (s_activeCallouts.front() == this) {
+ itimerval timerval = { {0, 0}, m_interval };
+
+ setitimer(ITIMER_REAL, &timerval, NULL);
+ }
+
+ return (cancelled);
+}
+
+void
+Callout::AlarmSignalHandler(int)
+{
+ s_alarmFired = true;
+ ZfsDaemon::WakeEventLoop();
+}
+
+void
+Callout::ExpireCallouts()
+{
+ if (!s_alarmFired)
+ return;
+
+ s_alarmFired = false;
+ if (s_activeCallouts.empty()) {
+ /* Callout removal/SIGALRM race was lost. */
+ return;
+ }
+
+ /*
+ * Expire the first callout (the one we used to set the
+ * interval timer) as well as any callouts following that
+ * expire at the same time (have a zero interval from
+ * the callout before it).
+ */
+ do {
+ Callout *cur(s_activeCallouts.front());
+ s_activeCallouts.pop_front();
+ cur->m_pending = false;
+ cur->m_func(cur->m_arg);
+ } while (!s_activeCallouts.empty()
+ && timerisset(&s_activeCallouts.front()->m_interval) == 0);
+
+ if (!s_activeCallouts.empty()) {
+ Callout *next(s_activeCallouts.front());
+ itimerval timerval = { { 0, 0 }, next->m_interval };
+
+ setitimer(ITIMER_REAL, &timerval, NULL);
+ }
+}
+
+timeval
+Callout::TimeRemaining() const
+{
+ /*
+ * Outline: Add the m_interval for each callout in s_activeCallouts
+ * ahead of this, except for the first callout. Add to that the result
+ * of getitimer (That's because the first callout stores its original
+ * interval setting while the timer is ticking).
+ */
+ itimerval timervalToAlarm;
+ timeval timeToExpiry;
+ std::list<Callout *>::iterator it;
+
+ if (!IsPending()) {
+ timeToExpiry.tv_sec = INT_MAX;
+ timeToExpiry.tv_usec = 999999; /*maximum normalized value*/
+ return (timeToExpiry);
+ }
+
+ timerclear(&timeToExpiry);
+ getitimer(ITIMER_REAL, &timervalToAlarm);
+ timeval& timeToAlarm = timervalToAlarm.it_value;
+ timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry);
+
+ it =s_activeCallouts.begin();
+ it++; /*skip the first callout in the list*/
+ for (; it != s_activeCallouts.end(); it++) {
+ timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry);
+ if ((*it) == this)
+ break;
+ }
+ return (timeToExpiry);
+}
Added: head/cddl/usr.sbin/zfsd/callout.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/callout.h Sat May 28 17:43:40 2016 (r300906)
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs (Spectra Logic Corporation)
+ *
+ * $FreeBSD$
+ */
+
+/**
+ * \file callout.h
+ *
+ * \brief Interface for timer based callback services.
+ *
+ * Header requirements:
+ *
+ * #include <sys/time.h>
+ *
+ * #include <list>
+ */
+
+#ifndef _CALLOUT_H_
+#define _CALLOUT_H_
+
+/**
+ * \brief Type of the function callback from a Callout.
+ */
+typedef void CalloutFunc_t(void *);
+
+/**
+ * \brief Interface to a schedulable one-shot timer with the granularity
+ * of the system clock (see setitimer(2)).
+ *
+ * Determination of callback expiration is triggered by the SIGALRM
+ * signal. Callout callbacks are always delivered from Zfsd's event
+ * processing loop.
+ *
+ * Periodic actions can be triggered via the Callout mechanisms by
+ * resetting the Callout from within its callback.
+ */
+class Callout
+{
+public:
+
+ /**
+ * Initialize the Callout subsystem.
+ */
+ static void Init();
+
+ /**
+ * Function called (via SIGALRM) when our interval
+ * timer expires.
+ */
+ static void AlarmSignalHandler(int);
+
+ /**
+ * Execute callbacks for all callouts that have the same
+ * expiration time as the first callout in the list.
+ */
+ static void ExpireCallouts();
+
+ /** Constructor. */
+ Callout();
+
+ /**
+ * Returns true if callout has not been stopped,
+ * or deactivated since the last time the callout was
+ * reset.
+ */
+ bool IsActive() const;
+
+ /**
+ * Returns true if callout is still waiting to expire.
+ */
+ bool IsPending() const;
+
+ /**
+ * Disestablish a callout.
+ */
+ bool Stop();
+
+ /**
+ * \brief Establish or change a timeout.
+ *
+ * \param interval Timeval indicating the time which must elapse
+ * before this callout fires.
+ * \param func Pointer to the callback funtion
+ * \param arg Argument pointer to pass to callback function
+ *
+ * \return Cancellation status.
+ * true: The previous callback was pending and therefore
+ * was cancelled.
+ * false: The callout was not pending at the time of this
+ * reset request.
+ * In all cases, a new callout is established.
+ */
+ bool Reset(const timeval &interval, CalloutFunc_t *func, void *arg);
+
+ /**
+ * \brief Calculate the remaining time until this Callout's timer
+ * expires.
+ *
+ * The return value will be slightly greater than the actual time to
+ * expiry.
+ *
+ * If the callout is not pending, returns INT_MAX.
+ */
+ timeval TimeRemaining() const;
+
+private:
+ /**
+ * All active callouts sorted by expiration time. The callout
+ * with the nearest expiration time is at the head of the list.
+ */
+ static std::list<Callout *> s_activeCallouts;
+
+ /**
+ * The interval timer has expired. This variable is set from
+ * signal handler context and tested from Zfsd::EventLoop()
+ * context via ExpireCallouts().
+ */
+ static bool s_alarmFired;
+
+ /**
+ * Time, relative to others in the active list, until
+ * this callout is fired.
+ */
+ timeval m_interval;
+
+ /** Callback function argument. */
+ void *m_arg;
+
+ /**
+ * The callback function associated with this timer
+ * entry.
+ */
+ CalloutFunc_t *m_func;
+
+ /** State of this callout. */
+ bool m_pending;
+};
+
+//- Callout public const methods ----------------------------------------------
+inline bool
+Callout::IsPending() const
+{
+ return (m_pending);
+}
+
+//- Callout public methods ----------------------------------------------------
+inline
+Callout::Callout()
+ : m_arg(0),
+ m_func(NULL),
+ m_pending(false)
+{
+ timerclear(&m_interval);
+}
+
+#endif /* CALLOUT_H_ */
Added: head/cddl/usr.sbin/zfsd/case_file.cc
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/cddl/usr.sbin/zfsd/case_file.cc Sat May 28 17:43:40 2016 (r300906)
@@ -0,0 +1,1104 @@
+/*-
+ * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs (Spectra Logic Corporation)
+ */
+
+/**
+ * \file case_file.cc
+ *
+ * We keep case files for any leaf vdev that is not in the optimal state.
+ * However, we only serialize to disk those events that need to be preserved
+ * across reboots. For now, this is just a log of soft errors which we
+ * accumulate in order to mark a device as degraded.
+ */
+#include <sys/cdefs.h>
+#include <sys/time.h>
+
+#include <sys/fs/zfs.h>
+
+#include <dirent.h>
+#include <iomanip>
+#include <fstream>
+#include <functional>
+#include <sstream>
+#include <syslog.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include <list>
+#include <map>
+#include <string>
+
+#include <devdctl/guid.h>
+#include <devdctl/event.h>
+#include <devdctl/event_factory.h>
+#include <devdctl/exception.h>
+#include <devdctl/consumer.h>
+
+#include "callout.h"
+#include "vdev_iterator.h"
+#include "zfsd_event.h"
+#include "case_file.h"
+#include "vdev.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+#include "zpool_list.h"
+
+__FBSDID("$FreeBSD$");
+
+/*============================ Namespace Control =============================*/
+using std::auto_ptr;
+using std::hex;
+using std::ifstream;
+using std::stringstream;
+using std::setfill;
+using std::setw;
+
+using DevdCtl::Event;
+using DevdCtl::EventBuffer;
+using DevdCtl::EventFactory;
+using DevdCtl::EventList;
+using DevdCtl::Guid;
+using DevdCtl::ParseException;
+
+/*--------------------------------- CaseFile ---------------------------------*/
+//- CaseFile Static Data -------------------------------------------------------
+
+CaseFileList CaseFile::s_activeCases;
+const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
+const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
+
+//- CaseFile Static Public Methods ---------------------------------------------
+CaseFile *
+CaseFile::Find(Guid poolGUID, Guid vdevGUID)
+{
+ for (CaseFileList::iterator curCase = s_activeCases.begin();
+ curCase != s_activeCases.end(); curCase++) {
+
+ if ((*curCase)->PoolGUID() != poolGUID
+ || (*curCase)->VdevGUID() != vdevGUID)
+ continue;
+
+ /*
+ * We only carry one active case per-vdev.
+ */
+ return (*curCase);
+ }
+ return (NULL);
+}
+
+CaseFile *
+CaseFile::Find(const string &physPath)
+{
+ CaseFile *result = NULL;
+
+ for (CaseFileList::iterator curCase = s_activeCases.begin();
+ curCase != s_activeCases.end(); curCase++) {
+
+ if ((*curCase)->PhysicalPath() != physPath)
+ continue;
+
+ if (result != NULL) {
+ syslog(LOG_WARNING, "Multiple casefiles found for "
+ "physical path %s. "
+ "This is most likely a bug in zfsd",
+ physPath.c_str());
+ }
+ result = *curCase;
+ }
+ return (result);
+}
+
+
+void
+CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
+{
+ CaseFileList::iterator casefile;
+ for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
+ CaseFileList::iterator next = casefile;
+ next++;
+ if (poolGUID == (*casefile)->PoolGUID())
+ (*casefile)->ReEvaluate(event);
+ casefile = next;
+ }
+}
+
+CaseFile &
+CaseFile::Create(Vdev &vdev)
+{
+ CaseFile *activeCase;
+
+ activeCase = Find(vdev.PoolGUID(), vdev.GUID());
+ if (activeCase == NULL)
+ activeCase = new CaseFile(vdev);
+
+ return (*activeCase);
+}
+
+void
+CaseFile::DeSerialize()
+{
+ struct dirent **caseFiles;
+
+ int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
+ DeSerializeSelector, /*compar*/NULL));
+
+ if (numCaseFiles == -1)
+ return;
+ if (numCaseFiles == 0) {
+ free(caseFiles);
+ return;
+ }
+
+ for (int i = 0; i < numCaseFiles; i++) {
+
+ DeSerializeFile(caseFiles[i]->d_name);
+ free(caseFiles[i]);
+ }
+ free(caseFiles);
+}
+
+void
+CaseFile::LogAll()
+{
+ for (CaseFileList::iterator curCase = s_activeCases.begin();
+ curCase != s_activeCases.end(); curCase++)
+ (*curCase)->Log();
+}
+
+void
+CaseFile::PurgeAll()
+{
+ /*
+ * Serialize casefiles before deleting them so that they can be reread
+ * and revalidated during BuildCaseFiles.
+ * CaseFiles remove themselves from this list on destruction.
+ */
+ while (s_activeCases.size() != 0) {
+ CaseFile *casefile = s_activeCases.front();
+ casefile->Serialize();
+ delete casefile;
+ }
+
+}
+
+//- CaseFile Public Methods ----------------------------------------------------
+bool
+CaseFile::RefreshVdevState()
+{
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+ zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
+ if (casePool == NULL)
+ return (false);
+
+ Vdev vd(casePool, CaseVdev(casePool));
+ if (vd.DoesNotExist())
+ return (false);
+
+ m_vdevState = vd.State();
+ m_vdevPhysPath = vd.PhysicalPath();
+ return (true);
+}
+
+bool
+CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
+{
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+ zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
+
+ if (pool == NULL || !RefreshVdevState()) {
+ /*
+ * The pool or vdev for this case file is no longer
+ * part of the configuration. This can happen
+ * if we process a device arrival notification
+ * before seeing the ZFS configuration change
+ * event.
+ */
+ syslog(LOG_INFO,
+ "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. "
+ "Closing\n",
+ PoolGUIDString().c_str(),
+ VdevGUIDString().c_str());
+ Close();
+
+ /*
+ * Since this event was not used to close this
+ * case, do not report it as consumed.
+ */
+ return (/*consumed*/false);
+ }
+
+ if (VdevState() > VDEV_STATE_CANT_OPEN) {
+ /*
+ * For now, newly discovered devices only help for
+ * devices that are missing. In the future, we might
+ * use a newly inserted spare to replace a degraded
+ * or faulted device.
+ */
+ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
+ PoolGUIDString().c_str(), VdevGUIDString().c_str());
+ return (/*consumed*/false);
+ }
+
+ if (vdev != NULL
+ && vdev->PoolGUID() == m_poolGUID
+ && vdev->GUID() == m_vdevGUID) {
+
+ zpool_vdev_online(pool, vdev->GUIDString().c_str(),
+ ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
+ &m_vdevState);
+ syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n",
+ zpool_get_name(pool), vdev->GUIDString().c_str(),
+ devPath.c_str(),
+ zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+
+ /*
+ * Check the vdev state post the online action to see
+ * if we can retire this case.
+ */
+ CloseIfSolved();
+
+ return (/*consumed*/true);
+ }
+
+ /*
+ * If the auto-replace policy is enabled, and we have physical
+ * path information, try a physical path replacement.
+ */
+ if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
+ syslog(LOG_INFO,
+ "CaseFile(%s:%s:%s): AutoReplace not set. "
+ "Ignoring device insertion.\n",
+ PoolGUIDString().c_str(),
+ VdevGUIDString().c_str(),
+ zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+ return (/*consumed*/false);
+ }
+
+ if (PhysicalPath().empty()) {
+ syslog(LOG_INFO,
+ "CaseFile(%s:%s:%s): No physical path information. "
+ "Ignoring device insertion.\n",
+ PoolGUIDString().c_str(),
+ VdevGUIDString().c_str(),
+ zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+ return (/*consumed*/false);
+ }
+
+ if (physPath != PhysicalPath()) {
+ syslog(LOG_INFO,
+ "CaseFile(%s:%s:%s): Physical path mismatch. "
+ "Ignoring device insertion.\n",
+ PoolGUIDString().c_str(),
+ VdevGUIDString().c_str(),
+ zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+ return (/*consumed*/false);
+ }
+
+ /* Write a label on the newly inserted disk. */
+ if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
+ syslog(LOG_ERR,
+ "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
+ zpool_get_name(pool), VdevGUIDString().c_str(),
+ libzfs_error_action(g_zfsHandle),
+ libzfs_error_description(g_zfsHandle));
+ return (/*consumed*/false);
+ }
+
+ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
+ PoolGUIDString().c_str(), VdevGUIDString().c_str(),
+ devPath.c_str());
+ return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
+}
+
+bool
+CaseFile::ReEvaluate(const ZfsEvent &event)
+{
+ bool consumed(false);
+
+ if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
+ /*
+ * The Vdev we represent has been removed from the
+ * configuration. This case is no longer of value.
+ */
+ Close();
+
+ return (/*consumed*/true);
+ } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
+ /* This Pool has been destroyed. Discard the case */
+ Close();
+
+ return (/*consumed*/true);
+ } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
+ RefreshVdevState();
+ if (VdevState() < VDEV_STATE_HEALTHY)
+ consumed = ActivateSpare();
+ }
+
+
+ if (event.Value("class") == "resource.fs.zfs.removed") {
+ bool spare_activated;
+
+ if (!RefreshVdevState()) {
+ /*
+ * The pool or vdev for this case file is no longer
+ * part of the configuration. This can happen
+ * if we process a device arrival notification
+ * before seeing the ZFS configuration change
+ * event.
+ */
+ syslog(LOG_INFO,
+ "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
+ "unconfigured. Closing\n",
+ PoolGUIDString().c_str(),
+ VdevGUIDString().c_str());
+ /*
+ * Close the case now so we won't waste cycles in the
+ * system rescan
+ */
+ Close();
+
+ /*
+ * Since this event was not used to close this
+ * case, do not report it as consumed.
+ */
+ return (/*consumed*/false);
+ }
+
+ /*
+ * Discard any tentative I/O error events for
+ * this case. They were most likely caused by the
+ * hot-unplug of this device.
+ */
+ PurgeTentativeEvents();
+
+ /* Try to activate spares if they are available */
+ spare_activated = ActivateSpare();
+
+ /*
+ * Rescan the drives in the system to see if a recent
+ * drive arrival can be used to solve this case.
+ */
+ ZfsDaemon::RequestSystemRescan();
+
+ /*
+ * Consume the event if we successfully activated a spare.
+ * Otherwise, leave it in the unconsumed events list so that the
+ * future addition of a spare to this pool might be able to
+ * close the case
+ */
+ consumed = spare_activated;
+ } else if (event.Value("class") == "resource.fs.zfs.statechange") {
+ RefreshVdevState();
+ /*
+ * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
+ * activate a hotspare. Otherwise, ignore the event
+ */
+ if (VdevState() == VDEV_STATE_FAULTED ||
+ VdevState() == VDEV_STATE_DEGRADED ||
+ VdevState() == VDEV_STATE_CANT_OPEN)
+ (void) ActivateSpare();
+ consumed = true;
+ }
+ else if (event.Value("class") == "ereport.fs.zfs.io" ||
+ event.Value("class") == "ereport.fs.zfs.checksum") {
+
+ m_tentativeEvents.push_front(event.DeepCopy());
+ RegisterCallout(event);
+ consumed = true;
+ }
+
+ bool closed(CloseIfSolved());
+
+ return (consumed || closed);
+}
+
+
+bool
+CaseFile::ActivateSpare() {
+ nvlist_t *config, *nvroot;
+ nvlist_t **spares;
+ char *devPath, *vdev_type;
+ const char *poolname;
+ u_int nspares, i;
+ int error;
+
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+ zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
+ if (zhp == NULL) {
+ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+ "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID);
+ return (false);
+ }
+ poolname = zpool_get_name(zhp);
+ config = zpool_get_config(zhp, NULL);
+ if (config == NULL) {
+ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
+ "config for pool %s", poolname);
+ return (false);
+ }
+ error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
+ if (error != 0){
+ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
+ "tree for pool %s", poolname);
+ return (false);
+ }
+ nspares = 0;
+ nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+ &nspares);
+ if (nspares == 0) {
+ /* The pool has no spares configured */
+ syslog(LOG_INFO, "CaseFile::ActivateSpare: "
+ "No spares available for pool %s", poolname);
+ return (false);
+ }
+ for (i = 0; i < nspares; i++) {
+ uint64_t *nvlist_array;
+ vdev_stat_t *vs;
+ uint_t nstats;
+
+ if (nvlist_lookup_uint64_array(spares[i],
+ ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
+ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
+ "find vdev stats for pool %s, spare %d",
+ poolname, i);
+ return (false);
+ }
+ vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
+
+ if ((vs->vs_aux != VDEV_AUX_SPARED)
+ && (vs->vs_state == VDEV_STATE_HEALTHY)) {
+ /* We found a usable spare */
+ break;
+ }
+ }
+
+ if (i == nspares) {
+ /* No available spares were found */
+ return (false);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list