git: 3bdba24c7460 - main - mca: Decode new Intel status bits.

From: Alexander Motin <mav_at_FreeBSD.org>
Date: Wed, 08 Dec 2021 17:03:37 UTC
The branch main has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=3bdba24c74604b1bb27623cd8304476bbbed69d1

commit 3bdba24c74604b1bb27623cd8304476bbbed69d1
Author:     Alexander Motin <mav@FreeBSD.org>
AuthorDate: 2021-12-08 17:01:48 +0000
Commit:     Alexander Motin <mav@FreeBSD.org>
CommitDate: 2021-12-08 17:03:28 +0000

    mca: Decode new Intel status bits.
    
    MFC after:      1 week
---
 sys/x86/include/specialreg.h | 11 +++++
 sys/x86/x86/mca.c            | 96 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index 22672d50efed..ddac2b9ea2b2 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -608,6 +608,7 @@
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
+#define	MSR_MCG_EXT_CTL		0x4d0
 #define	MSR_RAPL_POWER_UNIT	0x606
 #define	MSR_PKG_ENERGY_STATUS	0x611
 #define	MSR_DRAM_ENERGY_STATUS	0x619
@@ -770,6 +771,7 @@
 #define	IA32_FEATURE_CONTROL_LOCK	0x01	/* lock bit */
 #define	IA32_FEATURE_CONTROL_SMX_EN	0x02	/* enable VMX inside SMX */
 #define	IA32_FEATURE_CONTROL_VMX_EN	0x04	/* enable VMX outside SMX */
+#define	IA32_FEATURE_CONTROL_LMCE_EN	0x100000 /* enable local MCE */
 
 /* MSR IA32_MISC_ENABLE */
 #define	IA32_MISC_EN_FASTSTR	0x0000000000000001ULL
@@ -948,9 +950,13 @@
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
 #define	MCG_CAP_SER_P		0x01000000
+#define	MCG_CAP_EMC_P		0x02000000
+#define	MCG_CAP_ELOG_P		0x04000000
+#define	MCG_CAP_LMCE_P		0x08000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
+#define	MCG_STATUS_LMCS		0x00000008		/* if MCG_CAP_LMCE_P */
 #define	MCG_CTL_ENABLE		0xffffffffffffffff
 #define	MCG_CTL_DISABLE		0x0000000000000000
 #define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
@@ -974,6 +980,11 @@
 #define	MC_STATUS_VAL		0x8000000000000000
 #define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
 #define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
+#define	MC_MISC_PCIE_RID	0x00000000ffff0000
+#define	MC_MISC_PCIE_FUNC	0x0000000000070000
+#define	MC_MISC_PCIE_SLOT	0x0000000000f80000
+#define	MC_MISC_PCIE_BUS	0x00000000ff000000
+#define	MC_MISC_PCIE_SEG	0x000000ff00000000
 #define	MC_CTL2_THRESHOLD	0x0000000000007fff
 #define	MC_CTL2_CMCI_EN		0x0000000040000000
 #define	MC_AMDNB_BANK		4
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index 1b34fc398068..2f4fca0ca062 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -227,6 +227,26 @@ cmci_supported(uint64_t mcg_cap)
 	return ((mcg_cap & MCG_CAP_CMCI_P) != 0);
 }
 
+static inline bool
+tes_supported(uint64_t mcg_cap)
+{
+
+	/*
+	 * MCG_CAP_TES_P bit is reserved in AMD documentation.  Until
+	 * it is defined, do not use it to check for TES support.
+	 */
+	if (cpu_vendor_id != CPU_VENDOR_INTEL)
+		return (false);
+	return ((mcg_cap & MCG_CAP_TES_P) != 0);
+}
+
+static inline bool
+ser_supported(uint64_t mcg_cap)
+{
+
+	return (tes_supported(mcg_cap) && (mcg_cap & MCG_CAP_SER_P) != 0);
+}
+
 static int
 sysctl_positive_int(SYSCTL_HANDLER_ARGS)
 {
@@ -352,6 +372,25 @@ mca_error_mmtype(uint16_t mca_error)
 	return ("???");
 }
 
+static const char *
+mca_addres_mode(uint64_t mca_misc)
+{
+
+	switch ((mca_misc & MC_MISC_ADDRESS_MODE) >> 6) {
+	case 0x0:
+		return ("Segment Offset");
+	case 0x1:
+		return ("Linear Address");
+	case 0x2:
+		return ("Physical Address");
+	case 0x3:
+		return ("Memory Address");
+	case 0x7:
+		return ("Generic");
+	}
+	return ("???");
+}
+
 static int
 mca_mute(const struct mca_record *rec)
 {
@@ -403,9 +442,25 @@ mca_log(const struct mca_record *rec)
 		if (cmci_supported(rec->mr_mcg_cap))
 			printf("(%lld) ", ((long long)rec->mr_status &
 			    MC_STATUS_COR_COUNT) >> 38);
+		if (tes_supported(rec->mr_mcg_cap)) {
+			switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) {
+			case 0x1:
+				printf("(Green) ");
+			case 0x2:
+				printf("(Yellow) ");
+			}
+		}
 	}
+	if (rec->mr_status & MC_STATUS_EN)
+		printf("EN ");
 	if (rec->mr_status & MC_STATUS_PCC)
 		printf("PCC ");
+	if (ser_supported(rec->mr_mcg_cap)) {
+		if (rec->mr_status & MC_STATUS_S)
+			printf("S ");
+		if (rec->mr_status & MC_STATUS_AR)
+			printf("AR ");
+	}
 	if (rec->mr_status & MC_STATUS_OVER)
 		printf("OVER ");
 	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
@@ -429,9 +484,23 @@ mca_log(const struct mca_record *rec)
 	case 0x0005:
 		printf("internal parity error");
 		break;
+	case 0x0006:
+		printf("SMM handler code access violation");
+		break;
 	case 0x0400:
 		printf("internal timer error");
 		break;
+	case 0x0e0b:
+		printf("generic I/O error");
+		if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL &&
+		    (rec->mr_status & MC_STATUS_MISCV)) {
+			printf(" (pci%d:%d:%d:%d)",
+			    (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32),
+			    (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24),
+			    (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19),
+			    (int)((rec->mr_misc & MC_MISC_PCIE_FUNC) >> 16));
+		}
+		break;
 	default:
 		if ((mca_error & 0xfc00) == 0x0400) {
 			printf("internal error %x", mca_error & 0x03ff);
@@ -463,7 +532,7 @@ mca_log(const struct mca_record *rec)
 			printf(" memory error");
 			break;
 		}
-		
+
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
 			printf("%sCACHE %s %s error",
@@ -473,8 +542,19 @@ mca_log(const struct mca_record *rec)
 			break;
 		}
 
+		/* Extended memory error. */
+		if ((mca_error & 0xef80) == 0x0280) {
+			printf("%s channel ", mca_error_mmtype(mca_error));
+			if ((mca_error & 0x000f) != 0x000f)
+				printf("%d", mca_error & 0x000f);
+			else
+				printf("??");
+			printf(" extended memory error");
+			break;
+		}
+
 		/* Bus and/or Interconnect error. */
-		if ((mca_error & 0xe800) == 0x0800) {			
+		if ((mca_error & 0xe800) == 0x0800) {
 			printf("BUS%s ", mca_error_level(mca_error));
 			switch ((mca_error & 0x0600) >> 9) {
 			case 0:
@@ -514,8 +594,16 @@ mca_log(const struct mca_record *rec)
 		break;
 	}
 	printf("\n");
-	if (rec->mr_status & MC_STATUS_ADDRV)
-		printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
+	if (rec->mr_status & MC_STATUS_ADDRV) {
+		printf("MCA: Address 0x%llx", (long long)rec->mr_addr);
+		if (ser_supported(rec->mr_mcg_cap) &&
+		    (rec->mr_status & MC_STATUS_MISCV)) {
+			printf(" (Mode: %s, LSB: %d)",
+			    mca_addres_mode(rec->mr_misc),
+			    (int)(rec->mr_misc & MC_MISC_RA_LSB));
+		}
+		printf("\n");
+	}
 	if (rec->mr_status & MC_STATUS_MISCV)
 		printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
 }