svn commit: r192050 - in head/sys: amd64/amd64 amd64/include conf i386/i386 i386/include

John Baldwin jhb at FreeBSD.org
Wed May 13 17:53:05 UTC 2009


Author: jhb
Date: Wed May 13 17:53:04 2009
New Revision: 192050
URL: http://svn.freebsd.org/changeset/base/192050

Log:
  Implement simple machine check support for amd64 and i386.
  - For CPUs that only support MCE (the machine check exception) but not MCA
    (i.e. Pentium), all this does is print out the value of the machine check
    registers and then panic when a machine check exception occurs.
  - For CPUs that support MCA (the machine check architecture), the support is
    a bit more involved.
    - First, there is limited support for decoding the CPU-independent MCA
      error codes in the kernel, and the kernel uses this to output a short
      description of any machine check events that occur.
    - When a machine check exception occurs, all of the MCx banks on the
      current CPU are scanned and any events are reported to the console
      before panic'ing.
    - To catch events for correctable errors, a periodic timer kicks off a
      task which scans the MCx banks on all CPUs.  The frequency of these
      checks is controlled via the "hw.mca.interval" sysctl.
    - Userland can request an immediate scan of the MCx banks by writing
      a non-zero value to "hw.mca.force_scan".
    - If any correctable events are encountered, the appropriate details
      are stored in a 'struct mca_record' (defined in <machine/mca.h>).
      The "hw.mca.count" is a count of such records and each record may
      be queried via the "hw.mca.records" tree by specifying the record
      index (0 .. count - 1) as the next name in the MIB similar to using
      PIDs with the kern.proc.* sysctls.  The idea is to export machine
      check events to userland for more detailed processing.
    - The periodic timer and hw.mca sysctls are only present if the CPU
      supports MCA.
  
  Discussed with:	emaste (briefly)
  MFC after:	1 month

Added:
  head/sys/amd64/amd64/mca.c   (contents, props changed)
  head/sys/amd64/include/mca.h   (contents, props changed)
  head/sys/i386/i386/mca.c   (contents, props changed)
  head/sys/i386/include/mca.h   (contents, props changed)
Modified:
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/trap.c
  head/sys/amd64/include/specialreg.h
  head/sys/conf/files.amd64
  head/sys/conf/files.i386
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/trap.c
  head/sys/i386/include/specialreg.h

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/amd64/amd64/machdep.c	Wed May 13 17:53:04 2009	(r192050)
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
+#include <machine/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pc/bios.h>
@@ -274,6 +275,7 @@ cpu_startup(dummy)
 	vm_pager_bufferinit();
 
 	cpu_setregs();
+	mca_init();
 }
 
 /*

Added: head/sys/amd64/amd64/mca.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/amd64/mca.c	Wed May 13 17:53:04 2009	(r192050)
@@ -0,0 +1,530 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for x86 machine check architecture.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <machine/mca.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+struct mca_internal {
+	struct mca_record rec;
+	int		logged;
+	STAILQ_ENTRY(mca_internal) link;
+};
+
+static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
+
+static struct sysctl_oid *mca_sysctl_tree;
+
+static int mca_count;		/* Number of records stored. */
+
+static STAILQ_HEAD(, mca_internal) mca_records;
+static struct callout mca_timer;
+static int mca_ticks = 3600;	/* Check hourly by default. */
+static struct task mca_task;
+static struct mtx mca_lock;
+
+static int
+sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
+{
+	int error, value;
+
+	value = mca_ticks;
+	error = sysctl_handle_int(oidp, &value, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+	if (value <= 0)
+		return (EINVAL);
+	mca_ticks = value;
+	return (0);
+}
+
+static int
+sysctl_mca_records(SYSCTL_HANDLER_ARGS)
+{
+	int *name = (int *)arg1;
+	u_int namelen = arg2;
+	struct mca_record record;
+	struct mca_internal *rec;
+	int i;
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	if (name[0] < 0 || name[0] >= mca_count)
+		return (EINVAL);
+
+	mtx_lock_spin(&mca_lock);
+	if (name[0] >= mca_count) {
+		mtx_unlock_spin(&mca_lock);
+		return (EINVAL);
+	}
+	i = 0;
+	STAILQ_FOREACH(rec, &mca_records, link) {
+		if (i == name[0]) {
+			record = rec->rec;
+			break;
+		}
+		i++;
+	}
+	mtx_unlock_spin(&mca_lock);
+	return (SYSCTL_OUT(req, &record, sizeof(record)));
+}
+
+static struct mca_record *
+mca_record_entry(int bank)
+{
+	struct mca_internal *rec;
+	uint64_t status;
+	u_int p[4];
+
+	status = rdmsr(MSR_MC_STATUS(bank));
+	if (!(status & MC_STATUS_VAL))
+		return (NULL);
+
+	rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO);
+	if (rec == NULL) {
+		printf("MCA: Unable to allocate space for an event.\n");
+		return (NULL);
+	}
+
+	/* Save exception information. */
+	rec->rec.mr_status = status;
+	if (status & MC_STATUS_ADDRV)
+		rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank));
+	if (status & MC_STATUS_MISCV)
+		rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank));
+	rec->rec.mr_tsc = rdtsc();
+	rec->rec.mr_apic_id = PCPU_GET(apic_id);
+
+	/*
+	 * Clear machine check.  Don't do this for uncorrectable
+	 * errors so that the BIOS can see them.
+	 */
+	if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
+		wrmsr(MSR_MC_STATUS(bank), 0);
+		do_cpuid(0, p);
+	}
+
+	mtx_lock_spin(&mca_lock);
+	STAILQ_INSERT_TAIL(&mca_records, rec, link);
+	mca_count++;
+	mtx_unlock_spin(&mca_lock);
+	return (&rec->rec);
+}
+
+static const char *
+mca_error_ttype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x000c) >> 2) {
+	case 0:
+		return ("I");
+	case 1:
+		return ("D");
+	case 2:
+		return ("G");
+	}
+	return ("?");
+}
+
+static const char *
+mca_error_level(uint16_t mca_error)
+{
+
+	switch (mca_error & 0x0003) {
+	case 0:
+		return ("L0");
+	case 1:
+		return ("L1");
+	case 2:
+		return ("L2");
+	case 3:
+		return ("LG");
+	}
+	return ("L?");
+}
+
+static const char *
+mca_error_request(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x00f0) >> 4) {
+	case 0x0:
+		return ("ERR");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("DRD");
+	case 0x4:
+		return ("DWR");
+	case 0x5:
+		return ("IRD");
+	case 0x6:
+		return ("PREFETCH");
+	case 0x7:
+		return ("EVICT");
+	case 0x8:
+		return ("SNOOP");
+	}
+	return ("???");
+}
+
+/* Dump details about a single machine check. */
+static void
+mca_log(struct mca_record *rec)
+{
+	uint16_t mca_error;
+
+	printf("MCA: CPU %d ", rec->mr_apic_id);
+	if (rec->mr_status & MC_STATUS_UC)
+		printf("UNCOR ");
+	else
+		printf("COR ");
+	if (rec->mr_status & MC_STATUS_PCC)
+		printf("PCC ");
+	if (rec->mr_status & MC_STATUS_OVER)
+		printf("OVER ");
+	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
+	switch (mca_error) {
+		/* Simple error codes. */
+	case 0x0000:
+		printf("no error");
+		break;
+	case 0x0001:
+		printf("unclassified error");
+		break;
+	case 0x0002:
+		printf("ucode ROM parity error");
+		break;
+	case 0x0003:
+		printf("external error");
+		break;
+	case 0x0004:
+		printf("FRC error");
+		break;
+	case 0x0400:
+		printf("internal timer error");
+		break;
+	default:
+		if ((mca_error & 0xfc00) == 0x0400) {
+			printf("internal error %x", mca_error & 0x03ff);
+			break;
+		}
+
+		/* Compound error codes. */
+
+		/* Memory hierarchy error. */
+		if ((mca_error & 0xeffc) == 0x000c) {
+			printf("%s memory error", mca_error_level(mca_error));
+			break;
+		}
+
+		/* TLB error. */
+		if ((mca_error & 0xeff0) == 0x0010) {
+			printf("%sTLB %s error", mca_error_ttype(mca_error),
+			    mca_error_level(mca_error));
+			break;
+		}
+
+		/* Cache error. */
+		if ((mca_error & 0xef00) == 0x0100) {
+			printf("%sCACHE %s %s error",
+			    mca_error_ttype(mca_error),
+			    mca_error_level(mca_error),
+			    mca_error_request(mca_error));
+			break;
+		}
+
+		/* Bus and/or Interconnect error. */
+		if ((mca_error & 0xe800) == 0x0800) {			
+			printf("BUS%s ", mca_error_level(mca_error));
+			switch ((mca_error & 0x0600) >> 9) {
+			case 0:
+				printf("Source");
+				break;
+			case 1:
+				printf("Responder");
+				break;
+			case 2:
+				printf("Observer");
+				break;
+			default:
+				printf("???");
+				break;
+			}
+			printf(" %s ", mca_error_request(mca_error));
+			switch ((mca_error & 0x000c) >> 2) {
+			case 0:
+				printf("Memory");
+				break;
+			case 2:
+				printf("I/O");
+				break;
+			case 3:
+				printf("Other");
+				break;
+			default:
+				printf("???");
+				break;
+			}
+			if (mca_error & 0x0100)
+				printf(" timed out");
+			break;
+		}
+
+		printf("unknown error %x", mca_error);
+		break;
+	}
+	printf("\n");
+	if (rec->mr_status & MC_STATUS_ADDRV)
+		printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
+}
+
+/*
+ * This scans all the machine check banks of the current CPU to see if
+ * there are any machine checks.  Any non-recoverable errors are
+ * reported immediately via mca_log().  The current thread must be
+ * pinned when this is called.  The 'mcip' parameter indicates if we
+ * are being called from the MC exception handler.  In that case this
+ * function returns true if the system is restartable.  Otherwise, it
+ * returns a count of the number of valid MC records found.
+ */
+static int
+mca_scan(int mcip)
+{
+	struct mca_record *rec;
+	uint64_t mcg_cap, ucmask;
+	int count, i, recoverable;
+
+	count = 0;
+	recoverable = 1;
+	ucmask = MC_STATUS_UC | MC_STATUS_PCC;
+
+	/* When handling a MCE#, treat the OVER flag as non-restartable. */
+	if (mcip)
+		ucmask = MC_STATUS_OVER;
+	mcg_cap = rdmsr(MSR_MCG_CAP);
+	for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
+		rec = mca_record_entry(i);
+		if (rec != NULL) {
+			count++;
+			if (rec->mr_status & ucmask) {
+				recoverable = 0;
+				mca_log(rec);
+			}
+		}
+	}
+	return (mcip ? recoverable : count);
+}
+
+/*
+ * Scan the machine check banks on all CPUs by binding to each CPU in
+ * turn.  If any of the CPUs contained new machine check records, log
+ * them to the console.
+ */
+static void
+mca_scan_cpus(void *context, int pending)
+{
+	struct mca_internal *mca;
+	struct thread *td;
+	int count, cpu;
+
+	td = curthread;
+	count = 0;
+	thread_lock(td);
+	for (cpu = 0; cpu <= mp_maxid; cpu++) {
+		if (CPU_ABSENT(cpu))
+			continue;
+		sched_bind(td, cpu);
+		thread_unlock(td);
+		count += mca_scan(0);
+		thread_lock(td);
+		sched_unbind(td);
+	}
+	thread_unlock(td);
+	if (count != 0) {
+		mtx_lock_spin(&mca_lock);
+		STAILQ_FOREACH(mca, &mca_records, link) {
+			if (!mca->logged) {
+				mca->logged = 1;
+				mtx_unlock_spin(&mca_lock);
+				mca_log(&mca->rec);
+				mtx_lock_spin(&mca_lock);
+			}
+		}
+		mtx_unlock_spin(&mca_lock);
+	}
+}
+
+static void
+mca_periodic_scan(void *arg)
+{
+
+	taskqueue_enqueue(taskqueue_thread, &mca_task);
+	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
+}
+
+static int
+sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
+{
+	int error, i;
+
+	i = 0;
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error)
+		return (error);
+	if (i)
+		taskqueue_enqueue(taskqueue_thread, &mca_task);
+	return (0);
+}
+
+static void
+mca_startup(void *dummy)
+{
+
+	if (!(cpu_feature & CPUID_MCA))
+		return;
+
+	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan,
+		    NULL);
+}
+SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
+
+static void
+mca_setup(void)
+{
+
+	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
+	STAILQ_INIT(&mca_records);
+	TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL);
+	callout_init(&mca_timer, CALLOUT_MPSAFE);
+	mca_sysctl_tree = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw),
+	    OID_AUTO, "mca", CTLFLAG_RW, NULL, "MCA container");
+	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
+	    "count", CTLFLAG_RD, &mca_count, 0, "Record count");
+	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
+	    "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
+	    0, sysctl_mca_ticks, "I",
+	    "Periodic interval in seconds to scan for machine checks");
+	SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
+	    "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
+	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
+	    "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
+	    sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
+}
+
+/* Must be executed on each CPU. */
+void
+mca_init(void)
+{
+	uint64_t mcg_cap;
+	int i;
+
+	/* MCE is required. */
+	if (!(cpu_feature & CPUID_MCE))
+		return;
+
+	if (cpu_feature & CPUID_MCA) {
+		if (PCPU_GET(cpuid) == 0)
+			mca_setup();
+
+		sched_pin();
+		mcg_cap = rdmsr(MSR_MCG_CAP);
+		if (mcg_cap & MCG_CAP_CTL_P)
+			/* Enable MCA features. */
+			wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+
+		for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
+			/*
+			 * Enable logging of all errors.  For P6
+			 * processors, MC0_CTL is always enabled.
+			 *
+			 * XXX: Better CPU test needed here?
+			 */
+			if (!(i == 0 && (cpu_id & 0xf00) == 0x600))
+				wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL);
+
+			/* XXX: Better CPU test needed here. */
+			if ((cpu_id & 0xf00) == 0xf00)
+				mca_record_entry(i);
+
+			/* Clear all errors. */
+			wrmsr(MSR_MC_STATUS(i), 0);
+		}
+		sched_unpin();
+	}
+
+	load_cr4(rcr4() | CR4_MCE);
+}
+
+/* Called when a machine check exception fires. */
+int
+mca_intr(void)
+{
+	uint64_t mcg_status;
+	int recoverable;
+
+	if (!(cpu_feature & CPUID_MCA)) {
+		/*
+		 * Just print the values of the old Pentium registers
+		 * and panic.
+		 */
+		printf("MC Type: 0x%lx  Address: 0x%lx\n",
+		    rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR));
+		return (0);
+	}
+
+	/* Scan the banks and check for any non-recoverable errors. */
+	recoverable = mca_scan(1);
+	mcg_status = rdmsr(MSR_MCG_STATUS);
+	if (!(mcg_status & MCG_STATUS_RIPV))
+		recoverable = 0;
+
+	/* Clear MCIP. */
+	wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
+	return (recoverable);
+}

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/amd64/amd64/mp_machdep.c	Wed May 13 17:53:04 2009	(r192050)
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/cpufunc.h>
+#include <machine/mca.h>
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
@@ -667,6 +668,8 @@ init_secondary(void)
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
+	mca_init();
+
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* Init local apic for irq's */

Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/amd64/amd64/trap.c	Wed May 13 17:53:04 2009	(r192050)
@@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
+#include <machine/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
@@ -266,6 +267,12 @@ trap(struct trapframe *frame)
 		goto out;
 #endif
 
+	if (type == T_MCHK) {
+		if (!mca_intr())
+			trap_fatal(frame, 0);
+		goto out;
+	}
+
 #ifdef KDTRACE_HOOKS
 	/*
 	 * A trap can occur while DTrace executes a probe. Before

Added: head/sys/amd64/include/mca.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/amd64/include/mca.h	Wed May 13 17:53:04 2009	(r192050)
@@ -0,0 +1,48 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __MACHINE_MCA_H__
+#define	__MACHINE_MCA_H__
+
+struct mca_record {
+	uint64_t	mr_status;
+	uint64_t	mr_addr;
+	uint64_t	mr_misc;
+	uint64_t	mr_tsc;
+	int		mr_apic_id;
+};
+
+#ifdef _KERNEL
+
+void	mca_init(void);
+int	mca_intr(void);
+
+#endif
+
+#endif /* !__MACHINE_MCA_H__ */

Modified: head/sys/amd64/include/specialreg.h
==============================================================================
--- head/sys/amd64/include/specialreg.h	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/amd64/include/specialreg.h	Wed May 13 17:53:04 2009	(r192050)
@@ -346,6 +346,34 @@
 #define	DIR1			0xff
 
 /*
+ * Machine Check register constants.
+ */
+#define	MCG_CAP_COUNT		0x000000ff
+#define	MCG_CAP_CTL_P		0x00000100
+#define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_TES_P		0x00000800
+#define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_STATUS_RIPV		0x00000001
+#define	MCG_STATUS_EIPV		0x00000002
+#define	MCG_STATUS_MCIP		0x00000004
+#define	MCG_CTL_ENABLE		0xffffffffffffffffUL
+#define	MCG_CTL_DISABLE		0x0000000000000000UL
+#define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
+#define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
+#define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
+#define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MC_STATUS_MCA_ERROR	0x000000000000ffffUL
+#define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000UL
+#define	MC_STATUS_OTHER_INFO	0x01ffffff00000000UL
+#define	MC_STATUS_PCC		0x0200000000000000UL
+#define	MC_STATUS_ADDRV		0x0400000000000000UL
+#define	MC_STATUS_MISCV		0x0800000000000000UL
+#define	MC_STATUS_EN		0x1000000000000000UL
+#define	MC_STATUS_UC		0x2000000000000000UL
+#define	MC_STATUS_OVER		0x4000000000000000UL
+#define	MC_STATUS_VAL		0x8000000000000000UL
+
+/*
  * The following four 3-byte registers control the non-cacheable regions.
  * These registers must be written as three separate bytes.
  *

Modified: head/sys/conf/files.amd64
==============================================================================
--- head/sys/conf/files.amd64	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/conf/files.amd64	Wed May 13 17:53:04 2009	(r192050)
@@ -110,6 +110,7 @@ amd64/amd64/legacy.c		standard
 amd64/amd64/local_apic.c	standard
 amd64/amd64/locore.S		standard	no-obj
 amd64/amd64/machdep.c		standard
+amd64/amd64/mca.c		standard
 amd64/amd64/mem.c		optional	mem
 amd64/amd64/minidump_machdep.c	standard
 amd64/amd64/mp_machdep.c	optional	smp

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/conf/files.i386	Wed May 13 17:53:04 2009	(r192050)
@@ -278,6 +278,7 @@ i386/xen/locore.s		optional xen	no-obj
 i386/i386/longrun.c		optional cpu_enable_longrun
 i386/i386/machdep.c		standard
 i386/xen/xen_machdep.c		optional xen
+i386/i386/mca.c			standard
 i386/i386/mem.c			optional mem
 i386/i386/minidump_machdep.c	standard
 i386/i386/mp_clock.c		optional smp

Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c	Wed May 13 17:22:33 2009	(r192049)
+++ head/sys/i386/i386/machdep.c	Wed May 13 17:53:04 2009	(r192050)
@@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
+#include <machine/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pc/bios.h>
@@ -320,6 +321,7 @@ cpu_startup(dummy)
 #ifndef XEN
 	cpu_setregs();
 #endif
+	mca_init();
 }
 
 /*

Added: head/sys/i386/i386/mca.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/i386/i386/mca.c	Wed May 13 17:53:04 2009	(r192050)
@@ -0,0 +1,530 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for x86 machine check architecture.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <machine/mca.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+struct mca_internal {
+	struct mca_record rec;
+	int		logged;
+	STAILQ_ENTRY(mca_internal) link;
+};
+
+static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
+
+static struct sysctl_oid *mca_sysctl_tree;
+
+static int mca_count;		/* Number of records stored. */
+
+static STAILQ_HEAD(, mca_internal) mca_records;
+static struct callout mca_timer;
+static int mca_ticks = 3600;	/* Check hourly by default. */
+static struct task mca_task;
+static struct mtx mca_lock;
+
+static int
+sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
+{
+	int error, value;
+
+	value = mca_ticks;
+	error = sysctl_handle_int(oidp, &value, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+	if (value <= 0)
+		return (EINVAL);
+	mca_ticks = value;
+	return (0);
+}
+
+static int
+sysctl_mca_records(SYSCTL_HANDLER_ARGS)
+{
+	int *name = (int *)arg1;
+	u_int namelen = arg2;
+	struct mca_record record;
+	struct mca_internal *rec;
+	int i;
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	if (name[0] < 0 || name[0] >= mca_count)
+		return (EINVAL);
+
+	mtx_lock_spin(&mca_lock);
+	if (name[0] >= mca_count) {
+		mtx_unlock_spin(&mca_lock);
+		return (EINVAL);
+	}
+	i = 0;
+	STAILQ_FOREACH(rec, &mca_records, link) {
+		if (i == name[0]) {
+			record = rec->rec;
+			break;
+		}
+		i++;
+	}
+	mtx_unlock_spin(&mca_lock);
+	return (SYSCTL_OUT(req, &record, sizeof(record)));
+}
+
+static struct mca_record *
+mca_record_entry(int bank)
+{
+	struct mca_internal *rec;
+	uint64_t status;
+	u_int p[4];
+
+	status = rdmsr(MSR_MC_STATUS(bank));
+	if (!(status & MC_STATUS_VAL))
+		return (NULL);
+
+	rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO);
+	if (rec == NULL) {
+		printf("MCA: Unable to allocate space for an event.\n");
+		return (NULL);
+	}
+
+	/* Save exception information. */
+	rec->rec.mr_status = status;
+	if (status & MC_STATUS_ADDRV)
+		rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank));
+	if (status & MC_STATUS_MISCV)
+		rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank));
+	rec->rec.mr_tsc = rdtsc();
+	rec->rec.mr_apic_id = PCPU_GET(apic_id);
+
+	/*
+	 * Clear machine check.  Don't do this for uncorrectable
+	 * errors so that the BIOS can see them.
+	 */
+	if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
+		wrmsr(MSR_MC_STATUS(bank), 0);
+		do_cpuid(0, p);
+	}
+
+	mtx_lock_spin(&mca_lock);
+	STAILQ_INSERT_TAIL(&mca_records, rec, link);
+	mca_count++;
+	mtx_unlock_spin(&mca_lock);
+	return (&rec->rec);
+}
+
+static const char *
+mca_error_ttype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x000c) >> 2) {
+	case 0:
+		return ("I");
+	case 1:
+		return ("D");
+	case 2:
+		return ("G");
+	}
+	return ("?");
+}
+
+static const char *
+mca_error_level(uint16_t mca_error)
+{
+
+	switch (mca_error & 0x0003) {
+	case 0:
+		return ("L0");
+	case 1:
+		return ("L1");
+	case 2:
+		return ("L2");
+	case 3:
+		return ("LG");
+	}
+	return ("L?");
+}
+
+static const char *
+mca_error_request(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x00f0) >> 4) {
+	case 0x0:
+		return ("ERR");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("DRD");
+	case 0x4:
+		return ("DWR");
+	case 0x5:
+		return ("IRD");
+	case 0x6:
+		return ("PREFETCH");
+	case 0x7:
+		return ("EVICT");
+	case 0x8:
+		return ("SNOOP");
+	}
+	return ("???");
+}
+
+/* Dump details about a single machine check. */
+static void
+mca_log(struct mca_record *rec)
+{
+	uint16_t mca_error;
+
+	printf("MCA: CPU %d ", rec->mr_apic_id);
+	if (rec->mr_status & MC_STATUS_UC)
+		printf("UNCOR ");
+	else
+		printf("COR ");
+	if (rec->mr_status & MC_STATUS_PCC)
+		printf("PCC ");
+	if (rec->mr_status & MC_STATUS_OVER)
+		printf("OVER ");
+	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
+	switch (mca_error) {
+		/* Simple error codes. */
+	case 0x0000:
+		printf("no error");
+		break;
+	case 0x0001:
+		printf("unclassified error");
+		break;
+	case 0x0002:
+		printf("ucode ROM parity error");
+		break;
+	case 0x0003:
+		printf("external error");
+		break;
+	case 0x0004:
+		printf("FRC error");
+		break;
+	case 0x0400:
+		printf("internal timer error");
+		break;
+	default:
+		if ((mca_error & 0xfc00) == 0x0400) {
+			printf("internal error %x", mca_error & 0x03ff);
+			break;
+		}
+
+		/* Compound error codes. */
+
+		/* Memory hierarchy error. */
+		if ((mca_error & 0xeffc) == 0x000c) {
+			printf("%s memory error", mca_error_level(mca_error));
+			break;
+		}
+
+		/* TLB error. */
+		if ((mca_error & 0xeff0) == 0x0010) {
+			printf("%sTLB %s error", mca_error_ttype(mca_error),
+			    mca_error_level(mca_error));
+			break;
+		}
+
+		/* Cache error. */
+		if ((mca_error & 0xef00) == 0x0100) {
+			printf("%sCACHE %s %s error",
+			    mca_error_ttype(mca_error),
+			    mca_error_level(mca_error),
+			    mca_error_request(mca_error));

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list