svn commit: r205573 - in head/sys/i386: i386 include

Alan Cox alc at FreeBSD.org
Wed Mar 24 03:07:36 UTC 2010


Author: alc
Date: Wed Mar 24 03:07:35 2010
New Revision: 205573
URL: http://svn.freebsd.org/changeset/base/205573

Log:
  Adapt r204907 and r205402, the amd64 implementation of the workaround for
  AMD Family 10h Erratum 383, to i386.
  
  Enable machine check exceptions by default, just like r204913 for amd64.
  
  Enable superpage promotion only if the processor actually supports large
  pages, i.e., PG_PS.
  
  MFC after:	2 weeks

Modified:
  head/sys/i386/i386/mca.c
  head/sys/i386/i386/pmap.c
  head/sys/i386/include/md_var.h
  head/sys/i386/include/specialreg.h

Modified: head/sys/i386/i386/mca.c
==============================================================================
--- head/sys/i386/i386/mca.c	Wed Mar 24 02:02:02 2010	(r205572)
+++ head/sys/i386/i386/mca.c	Wed Mar 24 03:07:35 2010	(r205573)
@@ -60,11 +60,20 @@ static int mca_count;		/* Number of reco
 
 SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
 
-static int mca_enabled = 0;
+static int mca_enabled = 1;
 TUNABLE_INT("hw.mca.enabled", &mca_enabled);
 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
     "Administrative toggle for machine check support");
 
+static int amd10h_L1TP = 1;
+TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
+SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
+    "Administrative toggle for logging of level one TLB parity (L1TP) errors");
+
+int workaround_erratum383;
+SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
+    "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+
 static STAILQ_HEAD(, mca_internal) mca_records;
 static struct callout mca_timer;
 static int mca_ticks = 3600;	/* Check hourly by default. */
@@ -527,7 +536,7 @@ void
 mca_init(void)
 {
 	uint64_t mcg_cap;
-	uint64_t ctl;
+	uint64_t ctl, mask;
 	int skip;
 	int i;
 
@@ -535,6 +544,15 @@ mca_init(void)
 	if (!mca_enabled || !(cpu_feature & CPUID_MCE))
 		return;
 
+	/*
+	 * On AMD Family 10h processors, unless logging of level one TLB
+	 * parity (L1TP) errors is disabled, enable the recommended workaround
+	 * for Erratum 383.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_AMD &&
+	    CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
+		workaround_erratum383 = 1;
+
 	if (cpu_feature & CPUID_MCA) {
 		if (PCPU_GET(cpuid) == 0)
 			mca_setup();
@@ -545,6 +563,19 @@ mca_init(void)
 			/* Enable MCA features. */
 			wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
 
+		/*
+		 * Disable logging of level one TLB parity (L1TP) errors by
+		 * the data cache as an alternative workaround for AMD Family
+		 * 10h Erratum 383.  Unlike the recommended workaround, there
+		 * is no performance penalty to this workaround.  However,
+		 * L1TP errors will go unreported.
+		 */
+		if (cpu_vendor_id == CPU_VENDOR_AMD &&
+		    CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
+			mask = rdmsr(MSR_MC0_CTL_MASK);
+			if ((mask & (1UL << 5)) == 0)
+				wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
+		}
 		for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
 			/* By default enable logging of all errors. */
 			ctl = 0xffffffffffffffffUL;

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c	Wed Mar 24 02:02:02 2010	(r205572)
+++ head/sys/i386/i386/pmap.c	Wed Mar 24 03:07:35 2010	(r205573)
@@ -5,7 +5,7 @@
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
- * Copyright (c) 2005-2008 Alan L. Cox <alc at cs.rice.edu>
+ * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -297,6 +297,7 @@ static void pmap_insert_pt_page(pmap_t p
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
@@ -315,6 +316,9 @@ static void pmap_remove_entry(struct pma
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+    pd_entry_t newpde);
+static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
@@ -380,6 +384,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
+
+	/*
+	 * Request a spin mutex so that changes to allpmaps cannot be
+	 * preempted by smp_rendezvous_cpus().  Otherwise,
+	 * pmap_update_pde_kernel() could access allpmaps while it is
+	 * being changed.
+	 */
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
@@ -692,19 +703,21 @@ pmap_init(void)
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
-	 * Disable large page mappings by default if the kernel is running in
-	 * a virtual machine on an AMD Family 10h processor.  This is a work-
-	 * around for Erratum 383.
+	 * If the kernel is running in a virtual machine on an AMD Family 10h
+	 * processor, then it must assume that MCA is enabled by the virtual
+	 * machine monitor.
 	 */
 	if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x10)
-		pg_ps_enabled = 0;
+		workaround_erratum383 = 1;
 
 	/*
-	 * Are large page mappings enabled?
+	 * Are large page mappings supported and enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
-	if (pg_ps_enabled) {
+	if (pseflag == 0)
+		pg_ps_enabled = 0;
+	else if (pg_ps_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = NBPDR;
@@ -850,6 +863,66 @@ pmap_cache_bits(int mode, boolean_t is_p
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
+
+/*
+ * The caller is responsible for maintaining TLB consistency.
+ */
+static void
+pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
+{
+	pd_entry_t *pde;
+	pmap_t pmap;
+	boolean_t PTD_updated;
+
+	PTD_updated = FALSE;
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_FOREACH(pmap, &allpmaps, pm_list) {
+		if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
+		    PG_FRAME))
+			PTD_updated = TRUE;
+		pde = pmap_pde(pmap, va);
+		pde_store(pde, newpde);
+	}
+	mtx_unlock_spin(&allpmaps_lock);
+	KASSERT(PTD_updated,
+	    ("pmap_kenter_pde: current page table is not in allpmaps"));
+}
+
+/*
+ * After changing the page size for the specified virtual address in the page
+ * table, flush the corresponding entries from the processor's TLB.  Only the
+ * calling processor's TLB is affected.
+ *
+ * The calling thread must be pinned to a processor.
+ */
+static void
+pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+{
+	u_long cr4;
+
+	if ((newpde & PG_PS) == 0)
+		/* Demotion: flush a specific 2MB page mapping. */
+		invlpg(va);
+	else if ((newpde & PG_G) == 0)
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB
+		 * because there are too many to flush individually.
+		 */
+		invltlb();
+	else {
+		/*
+		 * Promotion: flush every 4KB page mapping from the TLB,
+		 * including any global (PG_G) mappings.
+		 */
+		cr4 = rcr4();
+		load_cr4(cr4 & ~CR4_PGE);
+		/*
+		 * Although preemption at this point could be detrimental to
+		 * performance, it would not lead to an error.
+		 */
+		load_cr4(cr4);
+	}
+}
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
@@ -946,6 +1019,92 @@ pmap_invalidate_cache(void)
 	smp_cache_flush();
 	sched_unpin();
 }
+
+struct pde_action {
+	cpumask_t store;	/* processor that updates the PDE */
+	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	vm_offset_t va;
+	pd_entry_t *pde;
+	pd_entry_t newpde;
+};
+
+static void
+pmap_update_pde_kernel(void *arg)
+{
+	struct pde_action *act = arg;
+	pd_entry_t *pde;
+	pmap_t pmap;
+
+	if (act->store == PCPU_GET(cpumask))
+		/*
+		 * Elsewhere, this operation requires allpmaps_lock for
+		 * synchronization.  Here, it does not because it is being
+		 * performed in the context of an all_cpus rendezvous.
+		 */
+		LIST_FOREACH(pmap, &allpmaps, pm_list) {
+			pde = pmap_pde(pmap, act->va);
+			pde_store(pde, act->newpde);
+		}
+}
+
+static void
+pmap_update_pde_user(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if (act->store == PCPU_GET(cpumask))
+		pde_store(act->pde, act->newpde);
+}
+
+static void
+pmap_update_pde_teardown(void *arg)
+{
+	struct pde_action *act = arg;
+
+	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+		pmap_update_pde_invalidate(act->va, act->newpde);
+}
+
+/*
+ * Change the page size for the specified virtual address in a way that
+ * prevents any possibility of the TLB ever having two entries that map the
+ * same virtual address using different page sizes.  This is the recommended
+ * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
+ * machine check exception for a TLB state that is improperly diagnosed as a
+ * hardware error.
+ */
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+	struct pde_action act;
+	cpumask_t active, cpumask;
+
+	sched_pin();
+	cpumask = PCPU_GET(cpumask);
+	if (pmap == kernel_pmap)
+		active = all_cpus;
+	else
+		active = pmap->pm_active;
+	if ((active & PCPU_GET(other_cpus)) != 0) {
+		act.store = cpumask;
+		act.invalidate = active;
+		act.va = va;
+		act.pde = pde;
+		act.newpde = newpde;
+		smp_rendezvous_cpus(cpumask | active,
+		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
+		    pmap_update_pde_kernel : pmap_update_pde_user,
+		    pmap_update_pde_teardown, &act);
+	} else {
+		if (pmap == kernel_pmap)
+			pmap_kenter_pde(va, newpde);
+		else
+			pde_store(pde, newpde);
+		if ((active & cpumask) != 0)
+			pmap_update_pde_invalidate(va, newpde);
+	}
+	sched_unpin();
+}
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
@@ -983,6 +1142,18 @@ pmap_invalidate_cache(void)
 
 	wbinvd();
 }
+
+static void
+pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+	if (pmap == kernel_pmap)
+		pmap_kenter_pde(va, newpde);
+	else
+		pde_store(pde, newpde);
+	if (pmap == kernel_pmap || pmap->pm_active)
+		pmap_update_pde_invalidate(va, newpde);
+}
 #endif /* !SMP */
 
 void
@@ -1856,12 +2027,9 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTL
 void
 pmap_growkernel(vm_offset_t addr)
 {
-	struct pmap *pmap;
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
-	pt_entry_t *pde;
-	boolean_t updated_PTD;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	if (kernel_vm_end == 0) {
@@ -1903,18 +2071,7 @@ pmap_growkernel(vm_offset_t addr)
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 
-		updated_PTD = FALSE;
-		mtx_lock_spin(&allpmaps_lock);
-		LIST_FOREACH(pmap, &allpmaps, pm_list) {
-			if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
-			    PG_FRAME))
-				updated_PTD = TRUE;
-			pde = pmap_pde(pmap, kernel_vm_end);
-			pde_store(pde, newpdir);
-		}
-		mtx_unlock_spin(&allpmaps_lock);
-		KASSERT(updated_PTD,
-		    ("pmap_growkernel: current page table is not in allpmaps"));
+		pmap_kenter_pde(kernel_vm_end, newpdir);
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
@@ -2358,7 +2515,6 @@ static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
-	pmap_t allpmaps_entry;
 	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
@@ -2464,25 +2620,11 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
-	if (pmap == kernel_pmap) {
-		/*
-		 * A harmless race exists between this loop and the bcopy()
-		 * in pmap_pinit() that initializes the kernel segment of
-		 * the new page table directory.  Specifically, that bcopy()
-		 * may copy the new PDE from the PTD to the new page table
-		 * before this loop updates that new page table.
-		 */
-		mtx_lock_spin(&allpmaps_lock);
-		LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
-			pde = pmap_pde(allpmaps_entry, va);
-			KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) ==
-			    (oldpde & PG_PTE_PROMOTE),
-			    ("pmap_demote_pde: pde was %#jx, expected %#jx",
-			    (uintmax_t)*pde, (uintmax_t)oldpde));
-			pde_store(pde, newpde);
-		}
-		mtx_unlock_spin(&allpmaps_lock);
-	} else
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, newpde);
+	else if (pmap == kernel_pmap)
+		pmap_kenter_pde(va, newpde);
+	else
 		pde_store(pde, newpde);	
 	if (firstpte == PADDR2)
 		mtx_unlock(&PMAP2mutex);
@@ -3001,7 +3143,6 @@ static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
-	pmap_t allpmaps_entry;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
 	vm_offset_t oldpteva;
 	vm_page_t mpte;
@@ -3105,14 +3246,11 @@ setpte:
 	/*
 	 * Map the superpage.
 	 */
-	if (pmap == kernel_pmap) {
-		mtx_lock_spin(&allpmaps_lock);
-		LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) {
-			pde = pmap_pde(allpmaps_entry, va);
-			pde_store(pde, PG_PS | newpde);
-		}
-		mtx_unlock_spin(&allpmaps_lock);
-	} else
+	if (workaround_erratum383)
+		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
+	else if (pmap == kernel_pmap)
+		pmap_kenter_pde(va, PG_PS | newpde);
+	else
 		pde_store(pde, PG_PS | newpde);
 
 	pmap_pde_promotions++;

Modified: head/sys/i386/include/md_var.h
==============================================================================
--- head/sys/i386/include/md_var.h	Wed Mar 24 02:02:02 2010	(r205572)
+++ head/sys/i386/include/md_var.h	Wed Mar 24 03:07:35 2010	(r205573)
@@ -73,6 +73,7 @@ extern	int	szosigcode;
 #endif
 extern	uint32_t *vm_page_dump;
 extern	int	vm_page_dump_size;
+extern	int	workaround_erratum383;
 
 typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 struct	thread;

Modified: head/sys/i386/include/specialreg.h
==============================================================================
--- head/sys/i386/include/specialreg.h	Wed Mar 24 02:02:02 2010	(r205572)
+++ head/sys/i386/include/specialreg.h	Wed Mar 24 03:07:35 2010	(r205573)
@@ -551,6 +551,7 @@
 /* AMD64 MSR's */
 #define	MSR_EFER	0xc0000080	/* extended features */
 #define	MSR_K8_UCODE_UPDATE	0xc0010020	/* update microcode */
+#define	MSR_MC0_CTL_MASK	0xc0010044
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */


More information about the svn-src-all mailing list