svn commit: r205690 - in stable/7/sys: amd64/amd64 amd64/include i386/i386 i386/include

John Baldwin jhb at FreeBSD.org
Fri Mar 26 13:50:19 UTC 2010


Author: jhb
Date: Fri Mar 26 13:50:19 2010
New Revision: 205690
URL: http://svn.freebsd.org/changeset/base/205690

Log:
  MFC 205214:
  - Extend the machine check record structure to include several fields useful
    for parsing model-specific and other fields in machine check events
    including the global machine check capabilities and status registers,
    CPU identification, and the FreeBSD CPU ID.
  - Report these added fields in the console log of a machine check so that
    a record structure can be reconstituted from the console messages.
  - Parse new architectural errors including memory controller errors.

Modified:
  stable/7/sys/amd64/amd64/mca.c
  stable/7/sys/amd64/include/mca.h
  stable/7/sys/amd64/include/specialreg.h
  stable/7/sys/i386/i386/mca.c
  stable/7/sys/i386/include/mca.h
  stable/7/sys/i386/include/specialreg.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/amd64/amd64/mca.c
==============================================================================
--- stable/7/sys/amd64/amd64/mca.c	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/amd64/amd64/mca.c	Fri Mar 26 13:50:19 2010	(r205690)
@@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
 	return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x70) >> 4) {
+	case 0x0:
+		return ("GEN");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("AC");
+	case 0x4:
+		return ("MS");
+	}
+	return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
 	uint16_t mca_error;
 
-	printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: CPU %d ", rec->mr_apic_id);
+	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+	    rec->mr_cpu_id, rec->mr_apic_id);
+	printf("MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
 		printf("UNCOR ");
-	else
+	else {
 		printf("COR ");
+		if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+			printf("(%lld) ", ((long long)rec->mr_status &
+			    MC_STATUS_COR_COUNT) >> 38);
+	}
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
 	if (rec->mr_status & MC_STATUS_OVER)
@@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
 	case 0x0004:
 		printf("FRC error");
 		break;
+	case 0x0005:
+		printf("internal parity error");
+		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
@@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
 			break;
 		}
 
+		/* Memory controller error. */
+		if ((mca_error & 0xef80) == 0x0080) {
+			printf("%s channel ", mca_error_mmtype(mca_error));
+			if ((mca_error & 0x000f) != 0x000f)
+				printf("%d", mca_error & 0x000f);
+			else
+				printf("??");
+			printf(" memory error");
+			break;
+		}
+		
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
@@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re
 		rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
 	rec->mr_tsc = rdtsc();
 	rec->mr_apic_id = PCPU_GET(apic_id);
+	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+	rec->mr_cpu_id = cpu_id;
+	rec->mr_cpu_vendor_id = cpu_vendor_id;
+	rec->mr_cpu = PCPU_GET(cpuid);
 
 	/*
 	 * Clear machine check.  Don't do this for uncorrectable

Modified: stable/7/sys/amd64/include/mca.h
==============================================================================
--- stable/7/sys/amd64/include/mca.h	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/amd64/include/mca.h	Fri Mar 26 13:50:19 2010	(r205690)
@@ -37,6 +37,11 @@ struct mca_record {
 	uint64_t	mr_tsc;
 	int		mr_apic_id;
 	int		mr_bank;
+	uint64_t	mr_mcg_cap;
+	uint64_t	mr_mcg_status;
+	int		mr_cpu_id;
+	int		mr_cpu_vendor_id;
+	int		mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: stable/7/sys/amd64/include/specialreg.h
==============================================================================
--- stable/7/sys/amd64/include/specialreg.h	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/amd64/include/specialreg.h	Fri Mar 26 13:50:19 2010	(r205690)
@@ -260,6 +260,7 @@
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
+#define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
@@ -345,8 +346,10 @@
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
@@ -356,9 +359,14 @@
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
+#define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
@@ -366,6 +374,10 @@
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
+#define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
+#define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
+#define	MC_CTL2_THRESHOLD	0x0000000000003fff
+#define	MC_CTL2_CMCI_EN		0x0000000040000000
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.

Modified: stable/7/sys/i386/i386/mca.c
==============================================================================
--- stable/7/sys/i386/i386/mca.c	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/i386/i386/mca.c	Fri Mar 26 13:50:19 2010	(r205690)
@@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
 	return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+	switch ((mca_error & 0x70) >> 4) {
+	case 0x0:
+		return ("GEN");
+	case 0x1:
+		return ("RD");
+	case 0x2:
+		return ("WR");
+	case 0x3:
+		return ("AC");
+	case 0x4:
+		return ("MS");
+	}
+	return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
 	uint16_t mca_error;
 
-	printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: CPU %d ", rec->mr_apic_id);
+	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+	    rec->mr_cpu_id, rec->mr_apic_id);
+	printf("MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
 		printf("UNCOR ");
-	else
+	else {
 		printf("COR ");
+		if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+			printf("(%lld) ", ((long long)rec->mr_status &
+			    MC_STATUS_COR_COUNT) >> 38);
+	}
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
 	if (rec->mr_status & MC_STATUS_OVER)
@@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
 	case 0x0004:
 		printf("FRC error");
 		break;
+	case 0x0005:
+		printf("internal parity error");
+		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
@@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
 			break;
 		}
 
+		/* Memory controller error. */
+		if ((mca_error & 0xef80) == 0x0080) {
+			printf("%s channel ", mca_error_mmtype(mca_error));
+			if ((mca_error & 0x000f) != 0x000f)
+				printf("%d", mca_error & 0x000f);
+			else
+				printf("??");
+			printf(" memory error");
+			break;
+		}
+		
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
@@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re
 		rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
 	rec->mr_tsc = rdtsc();
 	rec->mr_apic_id = PCPU_GET(apic_id);
+	rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+	rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+	rec->mr_cpu_id = cpu_id;
+	rec->mr_cpu_vendor_id = cpu_vendor_id;
+	rec->mr_cpu = PCPU_GET(cpuid);
 
 	/*
 	 * Clear machine check.  Don't do this for uncorrectable

Modified: stable/7/sys/i386/include/mca.h
==============================================================================
--- stable/7/sys/i386/include/mca.h	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/i386/include/mca.h	Fri Mar 26 13:50:19 2010	(r205690)
@@ -37,6 +37,11 @@ struct mca_record {
 	uint64_t	mr_tsc;
 	int		mr_apic_id;
 	int		mr_bank;
+	uint64_t	mr_mcg_cap;
+	uint64_t	mr_mcg_status;
+	int		mr_cpu_id;
+	int		mr_cpu_vendor_id;
+	int		mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: stable/7/sys/i386/include/specialreg.h
==============================================================================
--- stable/7/sys/i386/include/specialreg.h	Fri Mar 26 13:49:46 2010	(r205689)
+++ stable/7/sys/i386/include/specialreg.h	Fri Mar 26 13:50:19 2010	(r205690)
@@ -266,6 +266,7 @@
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
+#define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
@@ -414,8 +415,10 @@
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
@@ -425,9 +428,14 @@
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
+#define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
@@ -435,6 +443,10 @@
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
+#define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
+#define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
+#define	MC_CTL2_THRESHOLD	0x0000000000003fff
+#define	MC_CTL2_CMCI_EN		0x0000000040000000
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.


More information about the svn-src-stable mailing list