ichwd: option to globally disable SMI

Andriy Gapon avg at icyb.net.ua
Mon Apr 6 07:28:07 PDT 2009


Our ichwd driver attempts to disable TCO SMI generation to avoid potentially
unhelpful SMI handler's handling of watchdog timeout.

Recent ICH versions allow TCO SMI bit to be locked down (typically by BIOS), so
that it can not be changed later. ichwd driver at present doesn't check result of
changing the bit, so it doesn't report a failure.
If the bit is locked and SMI handler is indeed unhelpful the only remaining
possibility is to try to disable SMI completely. This bit too can be locked, but
there is a chance that it is not.

I am attaching a patch that makes ichwd check and report result of SMI bit
operations and also try to disable SMI globally if permitted by
hw.ichwd.use_global_smi tunable.

This patch could be useful to you if the following two conditions are met:
1. existing ichwd driver successfully attaches and does not produce any
errors/warnings;
2. kill -9 on watchdogd doesn't result in machine being rebooted after timeout;

I am using this patch on DG33TL motherboard for couple of weeks now. I do not see
any side-effects from disabling SMI and watchdog works as expected.

P.S. I tried to contact Intel's customer support about my problem and I was told
that watchdog is supported by Intel only on Extreme Edition motherboards, which my
motherboard is not. I wonder if anybody here uses FreeBSD with such a motherboard
and ichwd works properly indeed.

-- 
Andriy Gapon
-------------- next part --------------
diff --git a/sys/dev/ichwd/ichwd.c b/sys/dev/ichwd/ichwd.c
index 35f8b5f..a4387a2 100644
--- a/sys/dev/ichwd/ichwd.c
+++ b/sys/dev/ichwd/ichwd.c
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/systm.h>
+#include <sys/sysctl.h>
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
@@ -72,6 +73,14 @@ __FBSDID("$FreeBSD$");
 
 #include <dev/ichwd/ichwd.h>
 
+
+static int use_global_smi = 0;
+
+SYSCTL_NODE(_hw, OID_AUTO, ichwd, CTLFLAG_RD, 0, "ichwd driver global parameters");
+TUNABLE_INT("hw.ichwd.use_global_smi", &use_global_smi);
+SYSCTL_INT(_hw_ichwd, OID_AUTO, use_global_smi, CTLFLAG_RDTUN, &use_global_smi, 0,
+	   "ichwd may try to change global SMI Enable bit if TCO SMI bit is locked");
+
 static struct ichwd_device ichwd_devices[] = {
 	{ DEVICEID_82801AA,  "Intel 82801AA watchdog timer",	1 },
 	{ DEVICEID_82801AB,  "Intel 82801AB watchdog timer",	1 },
@@ -141,26 +150,108 @@ static devclass_t ichwd_devclass;
 			device_printf(dev, __VA_ARGS__);\
 	} while (0)
 
+/*
+ * Disable the watchdog timeout SMI handler.
+ *
+ * Apparently, some BIOSes install handlers that reset or disable the
+ * watchdog timer instead of resetting the system, so we disable the SMI
+ * (by clearing the SMI_TCO_EN bit of the SMI_EN register) to prevent this
+ * from happening.
+ */
 static __inline void
-ichwd_intr_enable(struct ichwd_softc *sc)
+ichwd_smi_disable(struct ichwd_softc *sc)
 {
-	ichwd_write_smi_4(sc, SMI_EN, ichwd_read_smi_4(sc, SMI_EN) & ~SMI_TCO_EN);
+	uint32_t val;
+
+	val = ichwd_read_smi_4(sc, SMI_EN);
+
+	/* check if either TCO SMI is disabled or SMI is disabled globally */
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) != (SMI_TCO_EN | SMI_GBL_EN))
+		return;
+	/* first try to disabled TCO SMI */
+	val &= ~SMI_TCO_EN;
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check if TCO SMI got disabled */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if ((val & SMI_TCO_EN) == 0)
+		return;
+	printf("could not disable TCO SMI, bit might be locked by BIOS\n");
+
+	/* check if user allowed to fiddle with global SMI settings */
+	if (!use_global_smi)
+		return;
+
+	/* try to disable SMI globally */
+	val &= ~SMI_GBL_EN;
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check results */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if (val & SMI_GBL_EN)
+		printf("could not disable SMI globally, bit might be locked by BIOS\n");
+	else
+		printf("disabled SMI globally (permitted by use_global_smi tunable)\n");
 }
 
+/*
+ * Enable the watchdog timeout SMI handler.  See above for details.
+ */
 static __inline void
-ichwd_intr_disable(struct ichwd_softc *sc)
+ichwd_smi_enable(struct ichwd_softc *sc)
 {
-	ichwd_write_smi_4(sc, SMI_EN, ichwd_read_smi_4(sc, SMI_EN) | SMI_TCO_EN);
+	uint32_t val;
+
+	val = ichwd_read_smi_4(sc, SMI_EN);
+
+	/* check if both TCO SMI and global SMI are already enabled */
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) == (SMI_TCO_EN | SMI_GBL_EN))
+		return;
+
+	/* try to enable TCO SMI and global SMI if permitted */
+	val |= SMI_TCO_EN;
+	if (use_global_smi)
+		val |= SMI_GBL_EN;
+
+	/* bail out if SMI is globally disabled and user doesn't allow to change that */
+	if ((val & SMI_GBL_EN) == 0) {
+		printf("could not enable TCO SMI because SMI is globally disabled\n");
+		return;
+	}
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check if we succeeded */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) != (SMI_TCO_EN | SMI_GBL_EN))
+		printf("could not enable SMIs, bits might be locked by BIOS\n");
 }
 
+/*
+ * Reset the watchdog status bits.
+ */
 static __inline void
 ichwd_sts_reset(struct ichwd_softc *sc)
 {
+	/*
+	 * The watchdog status bits are set to 1 by the hardware to
+	 * indicate various conditions.  They can be cleared by software
+	 * by writing a 1, not a 0.
+	 */
 	ichwd_write_tco_2(sc, TCO1_STS, TCO_TIMEOUT);
+	/*
+	 * XXX The datasheet says that TCO_SECOND_TO_STS must be cleared
+	 * before TCO_BOOT_STS, not the other way around.
+	 */
 	ichwd_write_tco_2(sc, TCO2_STS, TCO_BOOT_STS);
 	ichwd_write_tco_2(sc, TCO2_STS, TCO_SECOND_TO_STS);
 }
 
+/*
+ * Enable the watchdog timer by clearing the TCO_TMR_HALT bit in the
+ * TCO1_CNT register.  This is complicated by the need to preserve bit 9
+ * of that same register, and the requirement that all other bits must be
+ * written back as zero.
+ */
 static __inline void
 ichwd_tmr_enable(struct ichwd_softc *sc)
 {
@@ -172,6 +263,9 @@ ichwd_tmr_enable(struct ichwd_softc *sc)
 	ichwd_verbose_printf(sc->device, "timer enabled\n");
 }
 
+/*
+ * Disable the watchdog timer.  See above for details.
+ */
 static __inline void
 ichwd_tmr_disable(struct ichwd_softc *sc)
 {
@@ -183,6 +277,11 @@ ichwd_tmr_disable(struct ichwd_softc *sc)
 	ichwd_verbose_printf(sc->device, "timer disabled\n");
 }
 
+/*
+ * Reload the watchdog timer: writing anything to any of the lower five
+ * bits of the TCO_RLD register reloads the timer from the last value
+ * written to TCO_TMR.
+ */
 static __inline void
 ichwd_tmr_reload(struct ichwd_softc *sc)
 {
@@ -194,6 +293,10 @@ ichwd_tmr_reload(struct ichwd_softc *sc)
 	ichwd_verbose_printf(sc->device, "timer reloaded\n");
 }
 
+/*
+ * Set the initial timeout value.  Note that this must always be followed
+ * by a reload.
+ */
 static __inline void
 ichwd_tmr_set(struct ichwd_softc *sc, unsigned int timeout)
 {
@@ -220,8 +323,8 @@ ichwd_tmr_set(struct ichwd_softc *sc, unsigned int timeout)
 		uint16_t tmr_val16 = ichwd_read_tco_2(sc, TCO_TMR2);
 
 		tmr_val16 &= 0xfc00;
-		if (timeout > 0x0bff)
-			timeout = 0x0bff;
+		if (timeout > 0x03ff)
+			timeout = 0x03ff;
 		tmr_val16 |= timeout;
 		ichwd_write_tco_2(sc, TCO_TMR2, tmr_val16);
 	}
@@ -262,7 +365,8 @@ ichwd_clear_noreboot(struct ichwd_softc *sc)
 }
 
 /*
- * Watchdog event handler.
+ * Watchdog event handler - called by the framework to enable or disable
+ * the watchdog or change the initial timeout value.
  */
 static void
 ichwd_event(void *arg, unsigned int cmd, int *error)
@@ -426,6 +530,13 @@ ichwd_attach(device_t dev)
 	device_printf(dev, "%s (ICH%d or equivalent)\n",
 	    device_get_desc(dev), sc->ich_version);
 
+	/*
+	 * XXX we should check the status registers (specifically, the
+	 * TCO_SECOND_TO_STS bit in the TCO2_STS register) to see if we
+	 * just came back from a watchdog-induced reset, and let the user
+	 * know.
+	 */
+
 	/* reset the watchdog status registers */
 	ichwd_sts_reset(sc);
 
@@ -435,8 +546,8 @@ ichwd_attach(device_t dev)
 	/* register the watchdog event handler */
 	sc->ev_tag = EVENTHANDLER_REGISTER(watchdog_list, ichwd_event, sc, 0);
 
-	/* enable watchdog timeout interrupts */
-	ichwd_intr_enable(sc);
+	/* disable the SMI handler */
+	ichwd_smi_disable(sc);
 
 	return (0);
  fail:
@@ -466,8 +577,8 @@ ichwd_detach(device_t dev)
 	if (sc->active)
 		ichwd_tmr_disable(sc);
 
-	/* disable watchdog timeout interrupts */
-	ichwd_intr_disable(sc);
+	/* enable the SMI handler */
+	ichwd_smi_enable(sc);
 
 	/* deregister event handler */
 	if (sc->ev_tag != NULL)
diff --git a/sys/dev/ichwd/ichwd.h b/sys/dev/ichwd/ichwd.h
index 51ed26d..8e2bdba 100644
--- a/sys/dev/ichwd/ichwd.h
+++ b/sys/dev/ichwd/ichwd.h
@@ -131,9 +131,9 @@ struct ichwd_softc {
 #define TCO1_CNT		0x08 /* TCO Control 1 */
 #define TCO2_CNT		0x08 /* TCO Control 2 */
 
-/* bit definitions for SMI_EN and SMI_STS */
+/* SMI-related bit definitions */
+#define SMI_GBL_EN		0x0001
 #define SMI_TCO_EN		0x2000
-#define SMI_TCO_STS		0x2000
 
 /* timer value mask for TCO_RLD and TCO_TMR */
 #define TCO_TIMER_MASK		0x1f
-------------- next part --------------
diff --git a/sys/dev/ichwd/ichwd.c b/sys/dev/ichwd/ichwd.c
index 71662a5..a4387a2 100644
--- a/sys/dev/ichwd/ichwd.c
+++ b/sys/dev/ichwd/ichwd.c
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/systm.h>
+#include <sys/sysctl.h>
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
@@ -72,6 +73,14 @@ __FBSDID("$FreeBSD$");
 
 #include <dev/ichwd/ichwd.h>
 
+
+static int use_global_smi = 0;
+
+SYSCTL_NODE(_hw, OID_AUTO, ichwd, CTLFLAG_RD, 0, "ichwd driver global parameters");
+TUNABLE_INT("hw.ichwd.use_global_smi", &use_global_smi);
+SYSCTL_INT(_hw_ichwd, OID_AUTO, use_global_smi, CTLFLAG_RDTUN, &use_global_smi, 0,
+	   "ichwd may try to change global SMI Enable bit if TCO SMI bit is locked");
+
 static struct ichwd_device ichwd_devices[] = {
 	{ DEVICEID_82801AA,  "Intel 82801AA watchdog timer",	1 },
 	{ DEVICEID_82801AB,  "Intel 82801AB watchdog timer",	1 },
@@ -152,7 +161,37 @@ static devclass_t ichwd_devclass;
 static __inline void
 ichwd_smi_disable(struct ichwd_softc *sc)
 {
-	ichwd_write_smi_4(sc, SMI_EN, ichwd_read_smi_4(sc, SMI_EN) & ~SMI_TCO_EN);
+	uint32_t val;
+
+	val = ichwd_read_smi_4(sc, SMI_EN);
+
+	/* check if either TCO SMI is disabled or SMI is disabled globally */
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) != (SMI_TCO_EN | SMI_GBL_EN))
+		return;
+	/* first try to disabled TCO SMI */
+	val &= ~SMI_TCO_EN;
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check if TCO SMI got disabled */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if ((val & SMI_TCO_EN) == 0)
+		return;
+	printf("could not disable TCO SMI, bit might be locked by BIOS\n");
+
+	/* check if user allowed to fiddle with global SMI settings */
+	if (!use_global_smi)
+		return;
+
+	/* try to disable SMI globally */
+	val &= ~SMI_GBL_EN;
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check results */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if (val & SMI_GBL_EN)
+		printf("could not disable SMI globally, bit might be locked by BIOS\n");
+	else
+		printf("disabled SMI globally (permitted by use_global_smi tunable)\n");
 }
 
 /*
@@ -161,7 +200,30 @@ ichwd_smi_disable(struct ichwd_softc *sc)
 static __inline void
 ichwd_smi_enable(struct ichwd_softc *sc)
 {
-	ichwd_write_smi_4(sc, SMI_EN, ichwd_read_smi_4(sc, SMI_EN) | SMI_TCO_EN);
+	uint32_t val;
+
+	val = ichwd_read_smi_4(sc, SMI_EN);
+
+	/* check if both TCO SMI and global SMI are already enabled */
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) == (SMI_TCO_EN | SMI_GBL_EN))
+		return;
+
+	/* try to enable TCO SMI and global SMI if permitted */
+	val |= SMI_TCO_EN;
+	if (use_global_smi)
+		val |= SMI_GBL_EN;
+
+	/* bail out if SMI is globally disabled and user doesn't allow to change that */
+	if ((val & SMI_GBL_EN) == 0) {
+		printf("could not enable TCO SMI because SMI is globally disabled\n");
+		return;
+	}
+	ichwd_write_smi_4(sc, SMI_EN, val);
+
+	/* check if we succeeded */
+	val = ichwd_read_smi_4(sc, SMI_EN);
+	if ((val & (SMI_TCO_EN | SMI_GBL_EN)) != (SMI_TCO_EN | SMI_GBL_EN))
+		printf("could not enable SMIs, bits might be locked by BIOS\n");
 }
 
 /*
diff --git a/sys/dev/ichwd/ichwd.h b/sys/dev/ichwd/ichwd.h
index 51ed26d..8e2bdba 100644
--- a/sys/dev/ichwd/ichwd.h
+++ b/sys/dev/ichwd/ichwd.h
@@ -131,9 +131,9 @@ struct ichwd_softc {
 #define TCO1_CNT		0x08 /* TCO Control 1 */
 #define TCO2_CNT		0x08 /* TCO Control 2 */
 
-/* bit definitions for SMI_EN and SMI_STS */
+/* SMI-related bit definitions */
+#define SMI_GBL_EN		0x0001
 #define SMI_TCO_EN		0x2000
-#define SMI_TCO_STS		0x2000
 
 /* timer value mask for TCO_RLD and TCO_TMR */
 #define TCO_TIMER_MASK		0x1f


More information about the freebsd-current mailing list