svn commit: r247405 - in head: sys/dev/watchdog sys/sys usr.sbin/watchdogd
Alfred Perlstein
alfred at FreeBSD.org
Wed Feb 27 19:03:33 UTC 2013
Author: alfred
Date: Wed Feb 27 19:03:31 2013
New Revision: 247405
URL: http://svnweb.freebsd.org/changeset/base/247405
Log:
watchdogd(8) and watchdog(4) enhancements.
The following support was added to watchdog(4):
- Support to query the outstanding timeout.
- Support to set a software pre-timeout function watchdog with an 'action'
- Support to set a software only watchdog with a configurable 'action'
'action' can be a mask specifying a single operation or a combination of:
log(9), printf(9), panic(9) and/or kdb_enter(9).
Support the following in watchdogged:
- Support to utilize the new additions to watchdog(4).
- Support to warn if a watchdog script runs for too long.
- Support for "dry run" where we do not actually arm the watchdog,
but only report on our timing.
Sponsored by: iXsystems, Inc.
MFC after: 1 month
Modified:
head/sys/dev/watchdog/watchdog.c
head/sys/sys/watchdog.h
head/usr.sbin/watchdogd/watchdogd.8
head/usr.sbin/watchdogd/watchdogd.c
Modified: head/sys/dev/watchdog/watchdog.c
==============================================================================
--- head/sys/dev/watchdog/watchdog.c Wed Feb 27 18:47:01 2013 (r247404)
+++ head/sys/dev/watchdog/watchdog.c Wed Feb 27 19:03:31 2013 (r247405)
@@ -1,5 +1,8 @@
/*-
* Copyright (c) 2004 Poul-Henning Kamp
+ * Copyright (c) 2013 iXsystems.com,
+ * author: Alfred Perlstein <alfred at freebsd.org>
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,21 +32,40 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/syslog.h>
#include <sys/watchdog.h>
#include <sys/bus.h>
#include <machine/bus.h>
+#include <sys/syscallsubr.h> /* kern_clock_gettime() */
+
+static int wd_set_pretimeout(int newtimeout, int disableiftoolong);
+static void wd_timeout_cb(void *arg);
+
+static struct callout wd_pretimeo_handle;
+static int wd_pretimeout;
+static int wd_pretimeout_act = WD_SOFT_LOG;
+
+static struct callout wd_softtimeo_handle;
+static int wd_softtimer; /* true = use softtimer instead of hardware
+ watchdog */
+static int wd_softtimeout_act = WD_SOFT_LOG; /* action for the software timeout */
+
static struct cdev *wd_dev;
-static volatile u_int wd_last_u;
+static volatile u_int wd_last_u; /* last timeout value set by kern_do_pat */
-static int
-kern_do_pat(u_int utim)
+static int wd_lastpat_valid = 0;
+static time_t wd_lastpat = 0; /* when the watchdog was last patted */
+
+int
+wdog_kern_pat(u_int utim)
{
int error;
@@ -51,11 +73,20 @@ kern_do_pat(u_int utim)
return (EINVAL);
if ((utim & WD_LASTVAL) != 0) {
+ /*
+ * if WD_LASTVAL is set, fill in the bits for timeout
+ * from the saved value in wd_last_u.
+ */
MPASS((wd_last_u & ~WD_INTERVAL) == 0);
utim &= ~WD_LASTVAL;
utim |= wd_last_u;
- } else
+ } else {
+ /*
+ * Otherwise save the new interval.
+ * This can be zero (to disable the watchdog)
+ */
wd_last_u = (utim & WD_INTERVAL);
+ }
if ((utim & WD_INTERVAL) == WD_TO_NEVER) {
utim = 0;
@@ -65,18 +96,49 @@ kern_do_pat(u_int utim)
/* Assume no watchdog available; watchdog flags success */
error = EOPNOTSUPP;
}
- EVENTHANDLER_INVOKE(watchdog_list, utim, &error);
+ if (wd_softtimer) {
+ if (utim == 0) {
+ callout_stop(&wd_softtimeo_handle);
+ } else {
+ (void) callout_reset(&wd_softtimeo_handle,
+ hz*utim, wd_timeout_cb, "soft");
+ }
+ error = 0;
+ } else {
+ EVENTHANDLER_INVOKE(watchdog_list, utim, &error);
+ }
+ wd_set_pretimeout(wd_pretimeout, true);
+ /*
+ * If we were able to arm/strobe the watchdog, then
+ * update the last time it was strobed for WDIOC_GETTIMELEFT
+ */
+ if (!error) {
+ struct timespec ts;
+
+ error = kern_clock_gettime(curthread /* XXX */,
+ CLOCK_MONOTONIC_FAST, &ts);
+ if (!error) {
+ wd_lastpat = ts.tv_sec;
+ wd_lastpat_valid = 1;
+ }
+ }
return (error);
}
static int
-wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
- int flags __unused, struct thread *td)
+wd_valid_act(int act)
+{
+
+ if ((act & ~(WD_SOFT_MASK)) != 0)
+ return false;
+ return true;
+}
+
+static int
+wd_ioctl_patpat(caddr_t data)
{
u_int u;
- if (cmd != WDIOCPATPAT)
- return (ENOIOCTL);
u = *(u_int *)data;
if (u & ~(WD_ACTIVE | WD_PASSIVE | WD_LASTVAL | WD_INTERVAL))
return (EINVAL);
@@ -89,24 +151,162 @@ wd_ioctl(struct cdev *dev __unused, u_lo
return (ENOSYS); /* XXX Not implemented yet */
u &= ~(WD_ACTIVE | WD_PASSIVE);
- return (kern_do_pat(u));
+ return (wdog_kern_pat(u));
}
-u_int
-wdog_kern_last_timeout(void)
+static int
+wd_get_time_left(struct thread *td, time_t *remainp)
{
+ struct timespec ts;
+ int error;
- return (wd_last_u);
+ error = kern_clock_gettime(td, CLOCK_MONOTONIC_FAST, &ts);
+ if (error)
+ return (error);
+ if (!wd_lastpat_valid)
+ return (ENOENT);
+ *remainp = ts.tv_sec - wd_lastpat;
+ return (0);
}
-int
-wdog_kern_pat(u_int utim)
+static void
+wd_timeout_cb(void *arg)
{
+ const char *type = arg;
- if (utim & ~(WD_LASTVAL | WD_INTERVAL))
- return (EINVAL);
+#ifdef DDB
+ if ((wd_pretimeout_act & WD_SOFT_DDB)) {
+ char kdb_why[80];
+ snprintf(kdb_why, sizeof(buf), "watchdog %s timeout", type);
+ kdb_backtrace();
+ kdb_enter(KDB_WHY_WATCHDOG, kdb_why);
+ }
+#endif
+ if ((wd_pretimeout_act & WD_SOFT_LOG))
+ log(LOG_EMERG, "watchdog %s-timeout, WD_SOFT_LOG", type);
+ if ((wd_pretimeout_act & WD_SOFT_PRINTF))
+ printf("watchdog %s-timeout, WD_SOFT_PRINTF\n", type);
+ if ((wd_pretimeout_act & WD_SOFT_PANIC))
+ panic("watchdog %s-timeout, WD_SOFT_PANIC set", type);
+}
- return (kern_do_pat(utim));
+/*
+ * Called to manage timeouts.
+ * newtimeout needs to be in the range of 0 to actual watchdog timeout.
+ * if 0, we disable the pre-timeout.
+ * otherwise we set the pre-timeout provided it's not greater than the
+ * current actual watchdog timeout.
+ */
+static int
+wd_set_pretimeout(int newtimeout, int disableiftoolong)
+{
+ u_int utime;
+
+ utime = wdog_kern_last_timeout();
+ /* do not permit a pre-timeout >= than the timeout. */
+ if (newtimeout >= utime) {
+ /*
+ * If 'disableiftoolong' then just fall through
+ * so as to disable the pre-watchdog
+ */
+ if (disableiftoolong)
+ newtimeout = 0;
+ else
+ return EINVAL;
+ }
+
+ /* disable the pre-timeout */
+ if (newtimeout == 0) {
+ wd_pretimeout = 0;
+ callout_stop(&wd_pretimeo_handle);
+ return 0;
+ }
+
+ /* We determined the value is sane, so reset the callout */
+ (void) callout_reset(&wd_pretimeo_handle, hz*(utime - newtimeout),
+ wd_timeout_cb, "pre-timeout");
+ wd_pretimeout = newtimeout;
+ return 0;
+}
+
+static int
+wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
+ int flags __unused, struct thread *td)
+{
+ u_int u;
+ time_t timeleft;
+ int error;
+
+ error = 0;
+
+ switch (cmd) {
+ case WDIOC_SETSOFT:
+ u = *(int *)data;
+ /* do nothing? */
+ if (u == wd_softtimer)
+ break;
+ /* If there is a pending timeout disallow this ioctl */
+ if (wd_last_u != 0) {
+ error = EINVAL;
+ break;
+ }
+ wd_softtimer = u;
+ break;
+ case WDIOC_SETSOFTTIMEOUTACT:
+ u = *(int *)data;
+ if (wd_valid_act(u)) {
+ wd_softtimeout_act = u;
+ } else {
+ error = EINVAL;
+ }
+ break;
+ case WDIOC_SETPRETIMEOUTACT:
+ u = *(int *)data;
+ if (wd_valid_act(u)) {
+ wd_pretimeout_act = u;
+ } else {
+ error = EINVAL;
+ }
+ break;
+ case WDIOC_GETPRETIMEOUT:
+ *(int *)data = (int)wd_pretimeout;
+ break;
+ case WDIOC_SETPRETIMEOUT:
+ error = wd_set_pretimeout(*(int *)data, false);
+ break;
+ case WDIOC_GETTIMELEFT:
+ error = wd_get_time_left(td, &timeleft);
+ if (error)
+ break;
+ *(int *)data = (int)timeleft;
+ break;
+ case WDIOC_SETTIMEOUT:
+ u = *(u_int *)data;
+ error = wdog_kern_pat(u);
+ break;
+ case WDIOC_GETTIMEOUT:
+ u = wdog_kern_last_timeout();
+ *(u_int *)data = u;
+ break;
+ case WDIOCPATPAT:
+ error = wd_ioctl_patpat(data);
+ break;
+ default:
+ error = ENOIOCTL;
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Return the last timeout set, this is NOT the seconds from NOW until timeout,
+ * rather it is the amount of seconds passed to WDIOCPATPAT/WDIOC_SETTIMEOUT.
+ */
+u_int
+wdog_kern_last_timeout(void)
+{
+
+ return (wd_last_u);
}
static struct cdevsw wd_cdevsw = {
@@ -120,10 +320,16 @@ watchdog_modevent(module_t mod __unused,
{
switch(type) {
case MOD_LOAD:
+ callout_init(&wd_pretimeo_handle, true);
+ callout_init(&wd_softtimeo_handle, true);
wd_dev = make_dev(&wd_cdevsw, 0,
UID_ROOT, GID_WHEEL, 0600, _PATH_WATCHDOG);
return 0;
case MOD_UNLOAD:
+ callout_stop(&wd_pretimeo_handle);
+ callout_stop(&wd_softtimeo_handle);
+ callout_drain(&wd_pretimeo_handle);
+ callout_drain(&wd_softtimeo_handle);
destroy_dev(wd_dev);
return 0;
case MOD_SHUTDOWN:
Modified: head/sys/sys/watchdog.h
==============================================================================
--- head/sys/sys/watchdog.h Wed Feb 27 18:47:01 2013 (r247404)
+++ head/sys/sys/watchdog.h Wed Feb 27 19:03:31 2013 (r247405)
@@ -1,5 +1,8 @@
/*-
* Copyright (c) 2003 Poul-Henning Kamp
+ * Copyright (c) 2013 iXsystems.com,
+ * author: Alfred Perlstein <alfred at freebsd.org>
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +35,18 @@
#define _PATH_WATCHDOG "fido"
-#define WDIOCPATPAT _IOW('W', 42, u_int)
+#define WDIOCPATPAT _IOW('W', 42, u_int) /* pat the watchdog */
+#define WDIOC_SETTIMEOUT _IOW('W', 43, int) /* set/reset the timer */
+#define WDIOC_GETTIMEOUT _IOR('W', 44, int) /* get total timeout */
+#define WDIOC_GETTIMELEFT _IOR('W', 45, int) /* get time left */
+#define WDIOC_GETPRETIMEOUT _IOR('W', 46, int) /* get the pre-timeout */
+#define WDIOC_SETPRETIMEOUT _IOW('W', 47, int) /* set the pre-timeout */
+/* set the action when a pre-timeout occurs see: WD_SOFT_* */
+#define WDIOC_SETPRETIMEOUTACT _IOW('W', 48, int)
+
+/* use software watchdog instead of hardware */
+#define WDIOC_SETSOFT _IOW('W', 49, int)
+#define WDIOC_SETSOFTTIMEOUTACT _IOW('W', 50, int)
#define WD_ACTIVE 0x8000000
/*
@@ -76,6 +90,15 @@
#define WD_TO_8SEC 33
#define WD_TO_16SEC 34
#define WD_TO_32SEC 35
+#define WD_TO_64SEC 36
+#define WD_TO_128SEC 37
+
+/* action on pre-timeout trigger */
+#define WD_SOFT_PANIC 0x01 /* panic */
+#define WD_SOFT_DDB 0x02 /* enter debugger */
+#define WD_SOFT_LOG 0x04 /* log(9) */
+#define WD_SOFT_PRINTF 0x08 /* printf(9) */
+#define WD_SOFT_MASK 0x0f /* all of the above */
#ifdef _KERNEL
Modified: head/usr.sbin/watchdogd/watchdogd.8
==============================================================================
--- head/usr.sbin/watchdogd/watchdogd.8 Wed Feb 27 18:47:01 2013 (r247404)
+++ head/usr.sbin/watchdogd/watchdogd.8 Wed Feb 27 19:03:31 2013 (r247405)
@@ -1,3 +1,5 @@
+.\" Copyright (c) 2013 iXsystems.com,
+.\" author: Alfred Perlstein <alfred at freebsd.org>
.\" Copyright (c) 2004 Poul-Henning Kamp <phk at FreeBSD.org>
.\" Copyright (c) 2003 Sean M. Kelly <smkelly at FreeBSD.org>
.\" All rights reserved.
@@ -25,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 2, 2006
+.Dd September 2, 2013
.Dt WATCHDOGD 8
.Os
.Sh NAME
@@ -33,11 +35,17 @@
.Nd watchdog daemon
.Sh SYNOPSIS
.Nm
-.Op Fl d
+.Op Fl dnw
+.Op Fl -debug
+.Op Fl -softtimeout
+.Op Fl -softtimeout-action Ar action
+.Op Fl -pretimeout Ar timeout
+.Op Fl -pretimeout-action Ar action
.Op Fl e Ar cmd
.Op Fl I Ar file
.Op Fl s Ar sleep
.Op Fl t Ar timeout
+.Op Fl T Ar script_timeout
.Sh DESCRIPTION
The
.Nm
@@ -62,6 +70,13 @@ is not specified, the daemon will perfor
check instead.
.Pp
The
+.Fl n
+argument 'dry-run' will cause watchdog not to arm the system watchdog and
+instead only run the watchdog function and report on failures.
+This is useful for developing new watchdogd scripts as the system will not
+reboot if there are problems with the script.
+.Pp
+The
.Fl s Ar sleep
argument can be used to control the sleep period between each execution
of the check and defaults to one second.
@@ -78,6 +93,16 @@ If this occurs,
will no longer execute and thus the kernel's watchdog routines will take
action after a configurable timeout.
.Pp
+The
+.Fl T Ar script_timeout
+specifies the threshold (in seconds) at which the watchdogd will complain
+that its script has run for too long.
+If unset
+.Ar script_timeout
+defaults to the value specified by the
+.Fl s Ar sleep
+option.
+.Pp
Upon receiving the
.Dv SIGTERM
or
@@ -90,17 +115,85 @@ will terminate.
The
.Nm
utility recognizes the following runtime options:
-.Bl -tag -width ".Fl I Ar file"
+.Bl -tag -width ".Fl -softtimeout-action Ar action "
.It Fl I Ar file
Write the process ID of the
.Nm
utility in the specified file.
-.It Fl d
+.It Fl d Fl -debug
Do not fork.
When this option is specified,
.Nm
will not fork into the background at startup.
+.Pp
+.It Fl w
+Complain when the watchdog script takes too long.
+This flag will cause watchdogd to complain when the amount of time to
+execute the watchdog script exceeds the threshold of 'sleep' option.
+.Pp
+.It Fl -pretimeout Ar timeout
+Set a "pretimeout" watchdog. At "timeout" seconds before the watchdog
+will fire attempt an action. The action is set by the --pretimeout-action
+flag. The default is just to log a message (WD_SOFT_LOG) via
+.Xr log 9 .
+.Pp
+.It Fl -pretimeout-action Ar action
+Set the timeout action for the pretimeout. See the section
+.Sx Timeout Actions .
+.Pp
+.It Fl -softtimeout
+Instead of arming the various hardware watchdogs, only use a basic software
+watchdog. The default action is just to
+.Xr log 9
+a message (WD_SOFT_LOG).
+.Pp
+.It Fl -softtimeout-action Ar action
+Set the timeout action for the softtimeout. See the section
+.Sx Timeout Actions .
+.Pp
.El
+.Sh Timeout Actions
+The following timeout actions are available via the
+.Fl -pretimeout-action
+and
+.Fl -softtimeout-action
+flags:
+.Bl -tag -width ".Ar printf "
+.It Ar panic
+Call
+.Xr panic 9
+when the timeout is reached.
+.Pp
+.It Ar ddb
+Enter the kernel debugger via
+.Xr kdb_enter 9
+when the timeout is reached.
+.Pp
+.It Ar log
+Log a message using
+.Xr log 9
+when the timeout is reached.
+.Pp
+.It Ar printf
+call the kernel
+.Xr printf 9
+to display a message to the console and
+.Xr dmesg 8
+buffer.
+.Pp
+.El
+Actions can be combined in a comma separated list as so:
+.Ar log,printf
+which would both
+.Xr printf 9
+and
+.Xr log 9
+which will send messages both to
+.Xr dmesg 8
+and the kernel
+.Xr log 4
+device for
+.Xr syslog 8 .
.Sh FILES
.Bl -tag -width ".Pa /var/run/watchdogd.pid" -compact
.It Pa /var/run/watchdogd.pid
@@ -125,3 +218,6 @@ and
.Pp
Some contributions made by
.An Jeff Roberson Aq jeff at FreeBSD.org .
+.Pp
+The pretimeout and softtimeout action system was added by
+.An Alfred Perlstein Aq alfred at freebsd.org .
Modified: head/usr.sbin/watchdogd/watchdogd.c
==============================================================================
--- head/usr.sbin/watchdogd/watchdogd.c Wed Feb 27 18:47:01 2013 (r247404)
+++ head/usr.sbin/watchdogd/watchdogd.c Wed Feb 27 19:03:31 2013 (r247405)
@@ -1,5 +1,8 @@
/*-
* Copyright (c) 2003-2004 Sean M. Kelly <smkelly at FreeBSD.org>
+ * Copyright (c) 2013 iXsystems.com,
+ * author: Alfred Perlstein <alfred at freebsd.org>
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,8 +53,11 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <strings.h>
#include <sysexits.h>
+#include <syslog.h>
#include <unistd.h>
+#include <getopt.h>
+
static void parseargs(int, char *[]);
static void sighandler(int);
static void watchdog_loop(void);
@@ -63,13 +69,39 @@ static void usage(void);
static int debugging = 0;
static int end_program = 0;
static const char *pidfile = _PATH_VARRUN "watchdogd.pid";
-static u_int timeout = WD_TO_16SEC;
+static u_int timeout = WD_TO_128SEC;
+static u_int pretimeout = 0;
static u_int passive = 0;
static int is_daemon = 0;
+static int is_dry_run = 0; /* do not arm the watchdog, only
+ report on timing of the watch
+ program */
+static int do_timedog = 0;
+static int do_syslog = 0;
static int fd = -1;
static int nap = 1;
+static int carp_thresh_seconds = -1;
static char *test_cmd = NULL;
+static const char *getopt_shortopts;
+
+static int pretimeout_set;
+static int pretimeout_act;
+static int pretimeout_act_set;
+
+static int softtimeout_set;
+static int softtimeout_act;
+static int softtimeout_act_set;
+
+static struct option longopts[] = {
+ { "debug", no_argument, &debugging, 1 },
+ { "pretimeout", required_argument, &pretimeout_set, 1 },
+ { "pretimeout-action", required_argument, &pretimeout_act_set, 1 },
+ { "softtimeout", no_argument, &softtimeout_set, 1 },
+ { "softtimeout-action", required_argument, &softtimeout_act_set, 1 },
+ { NULL, 0, NULL, 0}
+};
+
/*
* Ask malloc() to map minimum-sized chunks of virtual address space at a time,
* so that mlockall() won't needlessly wire megabytes of unused memory into the
@@ -93,12 +125,18 @@ main(int argc, char *argv[])
parseargs(argc, argv);
+ if (do_syslog) {
+ openlog("watchdogd", LOG_CONS|LOG_NDELAY|LOG_PERROR,
+ LOG_DAEMON);
+
+ }
+
rtp.type = RTP_PRIO_REALTIME;
rtp.prio = 0;
if (rtprio(RTP_SET, 0, &rtp) == -1)
err(EX_OSERR, "rtprio");
- if (watchdog_init() == -1)
+ if (!is_dry_run && watchdog_init() == -1)
errx(EX_SOFTWARE, "unable to initialize watchdog");
if (is_daemon) {
@@ -108,6 +146,7 @@ main(int argc, char *argv[])
pfh = pidfile_open(pidfile, 0600, &otherpid);
if (pfh == NULL) {
if (errno == EEXIST) {
+ watchdog_onoff(0);
errx(EX_SOFTWARE, "%s already running, pid: %d",
getprogname(), otherpid);
}
@@ -164,6 +203,9 @@ static int
watchdog_init(void)
{
+ if (is_dry_run)
+ return 0;
+
fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
if (fd >= 0)
return (0);
@@ -172,26 +214,98 @@ watchdog_init(void)
}
/*
+ * If we are doing timing, then get the time.
+ */
+static int
+watchdog_getuptime(struct timespec *tp)
+{
+ int error;
+
+ if (!do_timedog)
+ return 0;
+
+ error = clock_gettime(CLOCK_UPTIME_FAST, tp);
+ if (error)
+ warn("clock_gettime");
+ return (error);
+}
+
+static long
+watchdog_check_dogfunction_time(struct timespec *tp_start,
+ struct timespec *tp_end)
+{
+ struct timeval tv_start, tv_end, tv;
+ const char *cmd_prefix, *cmd;
+ int sec;
+
+ if (!do_timedog)
+ return (0);
+
+ TIMESPEC_TO_TIMEVAL(&tv_start, tp_start);
+ TIMESPEC_TO_TIMEVAL(&tv_end, tp_end);
+ timersub(&tv_end, &tv_start, &tv);
+ sec = tv.tv_sec;
+ if (sec < carp_thresh_seconds)
+ return (sec);
+
+ if (test_cmd) {
+ cmd_prefix = "Watchdog program";
+ cmd = test_cmd;
+ } else {
+ cmd_prefix = "Watchdog operation";
+ cmd = "stat(\"/etc\", &sb)";
+ }
+ if (do_syslog)
+ syslog(LOG_CRIT, "%s: '%s' took too long: "
+ "%d.%06ld seconds >= %d seconds threshhold",
+ cmd_prefix, cmd, sec, (long)tv.tv_usec,
+ carp_thresh_seconds);
+ warnx("%s: '%s' took too long: "
+ "%d.%06ld seconds >= %d seconds threshhold",
+ cmd_prefix, cmd, sec, (long)tv.tv_usec, carp_thresh_seconds);
+ return (sec);
+}
+
+
+/*
* Main program loop which is iterated every second.
*/
static void
watchdog_loop(void)
{
+ struct timespec ts_start, ts_end;
struct stat sb;
- int failed;
+ long waited;
+ int error, failed;
while (end_program != 2) {
failed = 0;
+ error = watchdog_getuptime(&ts_start);
+ if (error) {
+ end_program = 1;
+ goto try_end;
+ }
+
if (test_cmd != NULL)
failed = system(test_cmd);
else
failed = stat("/etc", &sb);
+ error = watchdog_getuptime(&ts_end);
+ if (error) {
+ end_program = 1;
+ goto try_end;
+ }
+
+ waited = watchdog_check_dogfunction_time(&ts_start, &ts_end);
+
if (failed == 0)
watchdog_patpat(timeout|WD_ACTIVE);
- sleep(nap);
+ if (nap - waited > 0)
+ sleep(nap - waited);
+try_end:
if (end_program != 0) {
if (watchdog_onoff(0) == 0) {
end_program = 2;
@@ -211,6 +325,9 @@ static int
watchdog_patpat(u_int t)
{
+ if (is_dry_run)
+ return 0;
+
return ioctl(fd, WDIOCPATPAT, &t);
}
@@ -221,11 +338,62 @@ watchdog_patpat(u_int t)
static int
watchdog_onoff(int onoff)
{
+ int error;
- if (onoff)
+ /* fake successful watchdog op if a dry run */
+ if (is_dry_run)
+ return 0;
+
+ if (onoff) {
+ /*
+ * Call the WDIOC_SETSOFT regardless of softtimeout_set
+ * because we'll need to turn it off if someone had turned
+ * it on.
+ */
+ error = ioctl(fd, WDIOC_SETSOFT, &softtimeout_set);
+ if (error) {
+ warn("setting WDIOC_SETSOFT %d", softtimeout_set);
+ return (error);
+ }
+ error = watchdog_patpat((timeout|WD_ACTIVE));
+ if (error) {
+ warn("watchdog_patpat failed");
+ goto failsafe;
+ }
+ if (softtimeout_act_set) {
+ error = ioctl(fd, WDIOC_SETSOFTTIMEOUTACT,
+ &softtimeout_act);
+ if (error) {
+ warn("setting WDIOC_SETSOFTTIMEOUTACT %d",
+ softtimeout_act);
+ goto failsafe;
+ }
+ }
+ if (pretimeout_set) {
+ error = ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
+ if (error) {
+ warn("setting WDIOC_SETPRETIMEOUT %d",
+ pretimeout);
+ goto failsafe;
+ }
+ }
+ if (pretimeout_act_set) {
+ error = ioctl(fd, WDIOC_SETPRETIMEOUTACT,
+ &pretimeout_act);
+ if (error) {
+ warn("setting WDIOC_SETPRETIMEOUTACT %d",
+ pretimeout_act);
+ goto failsafe;
+ }
+ }
+ /* pat one more time for good measure */
return watchdog_patpat((timeout|WD_ACTIVE));
- else
+ } else {
return watchdog_patpat(0);
+ }
+failsafe:
+ watchdog_patpat(0);
+ return (error);
}
/*
@@ -235,27 +403,132 @@ static void
usage(void)
{
if (is_daemon)
- fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n");
+ fprintf(stderr, "usage:\n"
+" watchdogd [-dnw] [-e cmd] [-I file] [-s sleep] [-t timeout]\n"
+" [-T script_timeout]\n"
+" [--debug]\n"
+" [--pretimeout seconds] [-pretimeout-action action]\n"
+" [--softtimeout] [-softtimeout-action action]\n"
+);
else
fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n");
exit(EX_USAGE);
}
+static long
+fetchtimeout(int opt, const char *longopt, const char *myoptarg)
+{
+ const char *errstr;
+ char *p;
+ long rv;
+
+ errstr = NULL;
+ p = NULL;
+ errno = 0;
+ rv = strtol(myoptarg, &p, 0);
+ if ((p != NULL && *p != '\0') || errno != 0)
+ errstr = "is not a number";
+ if (rv <= 0)
+ errstr = "must be greater than zero";
+ if (errstr) {
+ if (longopt)
+ errx(EX_USAGE, "--%s argument %s", longopt, errstr);
+ else
+ errx(EX_USAGE, "-%c argument %s", opt, errstr);
+ }
+ return (rv);
+}
+
+struct act_tbl {
+ const char *at_act;
+ int at_value;
+};
+
+struct act_tbl act_tbl[] = {
+ { "panic", WD_SOFT_PANIC },
+ { "ddb", WD_SOFT_DDB },
+ { "log", WD_SOFT_LOG },
+ { "printf", WD_SOFT_PRINTF },
+ { NULL, 0 }
+};
+
+static void
+timeout_act_error(const char *lopt, const char *badact)
+{
+ char *opts, *oldopts;
+ int i;
+
+ opts = NULL;
+ for (i = 0; act_tbl[i].at_act != NULL; i++) {
+ oldopts = opts;
+ if (asprintf(&opts, "%s%s%s",
+ oldopts == NULL ? "" : oldopts,
+ oldopts == NULL ? "" : ", ",
+ act_tbl[i].at_act) == -1)
+ err(EX_OSERR, "malloc");
+ free(oldopts);
+ }
+ warnx("bad --%s argument '%s' must be one of (%s).",
+ lopt, badact, opts);
+ usage();
+}
+
+/*
+ * Take a comma separated list of actions and or the flags
+ * together for the ioctl.
+ */
+static int
+timeout_act_str2int(const char *lopt, const char *acts)
+{
+ int i;
+ char *dupacts, *tofree;
+ char *o;
+ int rv = 0;
+
+ tofree = dupacts = strdup(acts);
+ if (!tofree)
+ err(EX_OSERR, "malloc");
+ while ((o = strsep(&dupacts, ",")) != NULL) {
+ for (i = 0; act_tbl[i].at_act != NULL; i++) {
+ if (!strcmp(o, act_tbl[i].at_act)) {
+ rv |= act_tbl[i].at_value;
+ break;
+ }
+ }
+ if (act_tbl[i].at_act == NULL)
+ timeout_act_error(lopt, o);
+ }
+ free(tofree);
+ return rv;
+}
+
/*
* Handle the few command line arguments supported.
*/
static void
parseargs(int argc, char *argv[])
{
+ int longindex;
int c;
char *p;
+ const char *lopt;
double a;
+ /*
+ * if we end with a 'd' aka 'watchdogd' then we are the daemon program,
+ * otherwise run as a command line utility.
+ */
c = strlen(argv[0]);
if (argv[0][c - 1] == 'd')
is_daemon = 1;
- while ((c = getopt(argc, argv,
- is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
+
+ if (is_daemon)
+ getopt_shortopts = "I:de:ns:t:ST:w?";
+ else
+ getopt_shortopts = "dt:?";
+
+ while ((c = getopt_long(argc, argv, getopt_shortopts, longopts,
+ &longindex)) != -1) {
switch (c) {
case 'I':
pidfile = optarg;
@@ -266,17 +539,19 @@ parseargs(int argc, char *argv[])
case 'e':
test_cmd = strdup(optarg);
break;
+ case 'n':
+ is_dry_run = 1;
+ break;
#ifdef notyet
case 'p':
passive = 1;
break;
#endif
case 's':
- p = NULL;
- errno = 0;
- nap = strtol(optarg, &p, 0);
- if ((p != NULL && *p != '\0') || errno != 0)
- errx(EX_USAGE, "-s argument is not a number");
+ nap = fetchtimeout(c, NULL, optarg);
+ break;
+ case 'S':
+ do_syslog = 1;
break;
case 't':
p = NULL;
@@ -286,6 +561,7 @@ parseargs(int argc, char *argv[])
errx(EX_USAGE, "-t argument is not a number");
if (a < 0)
errx(EX_USAGE, "-t argument must be positive");
+
if (a == 0)
timeout = WD_TO_NEVER;
else
@@ -294,12 +570,39 @@ parseargs(int argc, char *argv[])
printf("Timeout is 2^%d nanoseconds\n",
timeout);
break;
+ case 'T':
+ carp_thresh_seconds = fetchtimeout(c, "NULL", optarg);
+ break;
+ case 'w':
+ do_timedog = 1;
+ break;
+ case 0:
+ lopt = longopts[longindex].name;
+ if (!strcmp(lopt, "pretimeout")) {
+ pretimeout = fetchtimeout(0, lopt, optarg);
+ } else if (!strcmp(lopt, "pretimeout-action")) {
+ pretimeout_act = timeout_act_str2int(lopt,
+ optarg);
+ } else if (!strcmp(lopt, "softtimeout-action")) {
+ softtimeout_act = timeout_act_str2int(lopt,
+ optarg);
+ } else {
+ /* warnx("bad option at index %d: %s", optind,
+ argv[optind]);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list