svn commit: r204907 - in head/sys/amd64: amd64 include

Alan Cox alc at FreeBSD.org
Tue Mar 9 03:30:32 UTC 2010


Author: alc
Date: Tue Mar  9 03:30:31 2010
New Revision: 204907
URL: http://svn.freebsd.org/changeset/base/204907

Log:
  Implement AMD's recommended workaround for Erratum 383 on Family 10h
  processors.  With this workaround, superpage promotion can be re-enabled
  under virtualization.  Moreover, machine check exceptions can safely be
  enabled when FreeBSD is running natively on Family 10h processors.
  
  Most of the credit should go to Andriy Gapon for diagnosing the error and
  working with Borislav Petkov at AMD to document it.  Andriy also reviewed
  and tested my patches.
  
  Discussed with:	jhb
  MFC after:	3 weeks

Modified:
  head/sys/amd64/amd64/mca.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/md_var.h
  head/sys/amd64/include/specialreg.h

Modified: head/sys/amd64/amd64/mca.c
==============================================================================
--- head/sys/amd64/amd64/mca.c	Tue Mar  9 02:28:20 2010	(r204906)
+++ head/sys/amd64/amd64/mca.c	Tue Mar  9 03:30:31 2010	(r204907)
@@ -65,6 +65,15 @@ TUNABLE_INT("hw.mca.enabled", &mca_enabl
 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
     "Administrative toggle for machine check support");
 
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+    "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+    "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
 static STAILQ_HEAD(, mca_internal) mca_records;
 static struct callout mca_timer;
 static int mca_ticks = 3600;	/* Check hourly by default. */
@@ -481,7 +490,7 @@ void
 mca_init(void)
 {
 	uint64_t mcg_cap;
-	uint64_t ctl;
+	uint64_t ctl, mask;
 	int skip;
 	int i;
 
@@ -489,6 +498,15 @@ mca_init(void)
 	if (!mca_enabled || !(cpu_feature & CPUID_MCE))
 		return;
 
+	/*
+	 * On AMD Family 10h processors, unless logging of level one TLB
+	 * parity (L1TP) errors is disabled, enable the recommended workaround
+	 * for Erratum 383.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_AMD &&
+	    CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+		workaround_erratum383 = 1;
+
 	if (cpu_feature & CPUID_MCA) {
 		if (PCPU_GET(cpuid) == 0)
 			mca_setup();
@@ -499,6 +517,22 @@ mca_init(void)
 			/* Enable MCA features. */
 			wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
 
+		/*
+		 * Disable logging of level one TLB parity (L1TP) errors by
+		 * the data and instruction caches as an alternative
+		 * workaround for AMD Family 10h Erratum 383.  Unlike the
+		 * recommended workaround, there is no performance penalty to
+		 * this workaround.  However, L1TP errors will go unreported.
+		 */
+		if (cpu_vendor_id == CPU_VENDOR_AMD &&
+		    CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+			mask = rdmsr(MSR_MC0_CTL_MASK);
+			if ((mask & (1UL << 5)) == 0)
+				wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+			mask = rdmsr(MSR_MC1_CTL_MASK);
+			if ((mask & (1UL << 5)) == 0)
+				wrmsr(MSR_MC1_CTL_MASK, mask | (1UL << 5));
+		}
 		for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
 			/* By default enable logging of all errors. */
 			ctl = 0xffffffffffffffffUL;

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Tue Mar  9 02:28:20 2010	(r204906)
+++ head/sys/amd64/amd64/pmap.c	Tue Mar  9 03:30:31 2010	(r204907)
@@ -7,7 +7,7 @@
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -255,6 +255,9 @@ static void pmap_remove_entry(struct pma
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -686,13 +689,13 @@ pmap_init(void)
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
-	 * Disable large page mappings by default if the kernel is running in
-	 * a virtual machine on an AMD Family 10h processor.  This is a work-
-	 * around for Erratum 383.
+	 * If the kernel is running in a virtual machine on an AMD Family 10h
+	 * processor, then it must assume that MCA is enabled by the virtual
+	 * machine monitor.
 	 */
 	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x10)
-		pg_ps_enabled = 0;
+		workaround_erratum383 = 1;
 
 	/*
 	 * Are large page mappings enabled?
@@ -848,6 +851,42 @@ pmap_cache_bits(int mode, boolean_t is_p
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB.  Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+	u_long cr4;
+
+	if ((newpde & PG_PS) == 0)
+		/* Demotion: flush a specific 2MB page mapping. */
+		invlpg(va);
+	else if ((newpde & PG_G) == 0)
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB
+		 * because there are too many to flush individually.
+		 */
+		invltlb();
+	else {
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB,
+		 * including any global (PG_G) mappings.
+		 */
+		cr4 = rcr4();
+		load_cr4(cr4 & ~CR4_PGE);
+		/*
+		 * Although preemption at this point could be detrimental to
+		 * performance, it would not lead to an error.
+		 */
+		load_cr4(cr4);
+	}
+}
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
@@ -944,6 +983,69 @@ pmap_invalidate_cache(void)
 	smp_cache_flush();
 	sched_unpin();
 }
+
+struct pde_action {
+	cpumask_t store;	/* processor that updates the PDE */
+	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	vm_offset_t va;
+	pd_entry_t *pde;
+	pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_action(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if (act->store == PCPU_GET(cpumask))
+		pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+		pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes.  This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+	struct pde_action act;
+	cpumask_t active, cpumask;
+
+	sched_pin();
+	cpumask = PCPU_GET(cpumask);
+	if (pmap == kernel_pmap)
+		active = all_cpus;
+	else
+		active = pmap->pm_active;
+	if ((active & PCPU_GET(other_cpus)) != 0) {
+		act.store = cpumask;
+		act.invalidate = active;
+		act.va = va;
+		act.pde = pde;
+		act.newpde = newpde;
+		smp_rendezvous_cpus(cpumask | active,
+		    smp_no_rendevous_barrier, pmap_update_pde_action,
+		    pmap_update_pde_teardown, &act);
+	} else {
+		pde_store(pde, newpde);
+		if ((active & cpumask) != 0)
+			pmap_update_pde_invalidate(va, newpde);
+	}
+	sched_unpin();
+}
 #else /* !SMP */
 /*
  * Normal, non-SMP, invalidation functions.
@@ -981,6 +1083,15 @@ pmap_invalidate_cache(void)
 
 	wbinvd();
 }
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+	pde_store(pde, newpde);
+	if (pmap == kernel_pmap || pmap->pm_active)
+		pmap_update_pde_invalidate(va, newpde);
+}
 #endif /* !SMP */
 
 static void
@@ -2361,7 +2472,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
-	pde_store(pde, newpde);	
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, newpde);
+	else
+		pde_store(pde, newpde);
 
 	/*
 	 * Invalidate a stale recursive mapping of the page table page.
@@ -2977,7 +3091,10 @@ setpte:
 	/*
 	 * Map the superpage.
 	 */
-	pde_store(pde, PG_PS | newpde);
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+	else
+		pde_store(pde, PG_PS | newpde);
 
 	pmap_pde_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"

Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h	Tue Mar  9 02:28:20 2010	(r204906)
+++ head/sys/amd64/include/md_var.h	Tue Mar  9 03:30:31 2010	(r204907)
@@ -61,6 +61,7 @@ extern	char	sigcode[];
 extern	int	szsigcode;
 extern	uint64_t *vm_page_dump;
 extern	int	vm_page_dump_size;
+extern	int	workaround_erratum383;
 extern	int	_udatasel;
 extern	int	_ucodesel;
 extern	int	_ucode32sel;

Modified: head/sys/amd64/include/specialreg.h
==============================================================================
--- head/sys/amd64/include/specialreg.h	Tue Mar  9 02:28:20 2010	(r204906)
+++ head/sys/amd64/include/specialreg.h	Tue Mar  9 03:30:31 2010	(r204907)
@@ -494,6 +494,8 @@
 #define	MSR_TOP_MEM	0xc001001a	/* boundary for ram below 4G */
 #define	MSR_TOP_MEM2	0xc001001d	/* boundary for ram above 4G */
 #define	MSR_K8_UCODE_UPDATE	0xc0010020	/* update microcode */
+#define	MSR_MC0_CTL_MASK	0xc0010044
+#define	MSR_MC1_CTL_MASK	0xc0010045
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */


More information about the svn-src-head mailing list