git: 5272c66a00c5 - stable/13 - hwpmc: Fix amd/arm64/armv7/uncore sampling overflow race

From: Jessica Clarke <jrtc27_at_FreeBSD.org>
Date: Tue, 25 Jan 2022 01:40:20 UTC
The branch stable/13 has been updated by jrtc27:

URL: https://cgit.FreeBSD.org/src/commit/?id=5272c66a00c510b332c6477bbeacaa0179f96ff3

commit 5272c66a00c510b332c6477bbeacaa0179f96ff3
Author:     Jessica Clarke <jrtc27@FreeBSD.org>
AuthorDate: 2022-01-10 14:30:05 +0000
Commit:     Jessica Clarke <jrtc27@FreeBSD.org>
CommitDate: 2022-01-25 00:00:01 +0000

    hwpmc: Fix amd/arm64/armv7/uncore sampling overflow race
    
    If a counter more than overflows just as we read it on switch out then,
    if using sampling mode, we will negate this small value to give a huge
    reload count, and if we later switch back in that context we will
    validate that value against pm_reloadcount and panic an INVARIANTS
    kernel with:
    
      panic: [pmc,1470] pmcval outside of expected range cpu=2 ri=16 pmcval=fffff292 pm_reloadcount=10000
    
    or similar. Presumably in a non-INVARIANTS kernel we will instead just
    use the provided value as the reload count, which would lead to the
    overflow not happing for a very long time (e.g. 78 minutes for a 48-bit
    counter incrementing at an averate rate of 1GHz).
    
    Instead, clamp the reload count to 0 (which corresponds precisely to the
    value we would compute if it had just overflowed and no more), which
    will result in hwpmc using the full original reload count again. This is
    the approach used by core for Intel (for both fixed and programmable
    counters).
    
    As part of this, armv7 and arm64 are made conceptually simpler; rather
    than skipping modifying the overflow count for sampling mode counters so
    it's always kept as ~0, those special cases are removed so it's always
    applicable and the concatentation of it and the hardware counter can
    always be viewed as a 64-bit counter, which also makes them look more
    like other architectures.
    
    Whilst here, fix an instance of UB (shifting a 1 into the sign bit) for
    amd in its sign-extension code.
    
    Reviewed by:    andrew, mhorne, kib
    MFC after:      1 week
    Differential Revision:  https://reviews.freebsd.org/D33654
    
    (cherry picked from commit e74c7ffcb11b6ac879167249adc23a1f9ee5aab6)
---
 sys/dev/hwpmc/hwpmc_amd.c    | 15 ++++++++++++---
 sys/dev/hwpmc/hwpmc_arm64.c  | 25 ++++++++++++++++---------
 sys/dev/hwpmc/hwpmc_armv7.c  | 26 +++++++++++++++++---------
 sys/dev/hwpmc/hwpmc_uncore.c |  4 ++++
 4 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c
index a95615926bc3..f0b202af8038 100644
--- a/sys/dev/hwpmc/hwpmc_amd.c
+++ b/sys/dev/hwpmc/hwpmc_amd.c
@@ -431,9 +431,18 @@ amd_read_pmc(int cpu, int ri, pmc_value_t *v)
 	tmp = rdmsr(pd->pm_perfctr); /* RDMSR serializes */
 	PMCDBG2(MDP,REA,2,"amd-read (pre-munge) id=%d -> %jd", ri, tmp);
 	if (PMC_IS_SAMPLING_MODE(mode)) {
-		/* Sign extend 48 bit value to 64 bits. */
-		tmp = (pmc_value_t) (((int64_t) tmp << 16) >> 16);
-		tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
+		/*
+		 * Clamp value to 0 if the counter just overflowed,
+		 * otherwise the returned reload count would wrap to a
+		 * huge value.
+		 */
+		if ((tmp & (1ULL << 47)) == 0)
+			tmp = 0;
+		else {
+			/* Sign extend 48 bit value to 64 bits. */
+			tmp = (pmc_value_t) ((int64_t)(tmp << 16) >> 16);
+			tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
+		}
 	}
 	*v = tmp;
 
diff --git a/sys/dev/hwpmc/hwpmc_arm64.c b/sys/dev/hwpmc/hwpmc_arm64.c
index ea433ca191d2..675e93c5771d 100644
--- a/sys/dev/hwpmc/hwpmc_arm64.c
+++ b/sys/dev/hwpmc/hwpmc_arm64.c
@@ -219,8 +219,7 @@ arm64_read_pmc(int cpu, int ri, pmc_value_t *v)
 	if ((READ_SPECIALREG(pmovsclr_el0) & reg) != 0) {
 		/* Clear Overflow Flag */
 		WRITE_SPECIALREG(pmovsclr_el0, reg);
-		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
-			pm->pm_pcpu_state[cpu].pps_overflowcnt++;
+		pm->pm_pcpu_state[cpu].pps_overflowcnt++;
 
 		/* Reread counter in case we raced. */
 		tmp = arm64_pmcn_read(ri);
@@ -229,10 +228,18 @@ arm64_read_pmc(int cpu, int ri, pmc_value_t *v)
 	intr_restore(s);
 
 	PMCDBG2(MDP, REA, 2, "arm64-read id=%d -> %jd", ri, tmp);
-	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
-		*v = ARMV8_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
-	else
-		*v = tmp;
+	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+		/*
+		 * Clamp value to 0 if the counter just overflowed,
+		 * otherwise the returned reload count would wrap to a
+		 * huge value.
+		 */
+		if ((tmp & (1ull << 63)) == 0)
+			tmp = 0;
+		else
+			tmp = ARMV8_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
+	}
+	*v = tmp;
 
 	return (0);
 }
@@ -380,10 +387,10 @@ arm64_intr(struct trapframe *tf)
 
 		retval = 1; /* Found an interrupting PMC. */
 
-		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
-			pm->pm_pcpu_state[cpu].pps_overflowcnt += 1;
+		pm->pm_pcpu_state[cpu].pps_overflowcnt += 1;
+
+		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			continue;
-		}
 
 		if (pm->pm_state != PMC_STATE_RUNNING)
 			continue;
diff --git a/sys/dev/hwpmc/hwpmc_armv7.c b/sys/dev/hwpmc/hwpmc_armv7.c
index 84a983bbc69c..eaef95932c60 100644
--- a/sys/dev/hwpmc/hwpmc_armv7.c
+++ b/sys/dev/hwpmc/hwpmc_armv7.c
@@ -191,8 +191,7 @@ armv7_read_pmc(int cpu, int ri, pmc_value_t *v)
 	if ((cp15_pmovsr_get() & reg) != 0) {
 		/* Clear Overflow Flag */
 		cp15_pmovsr_set(reg);
-		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
-			pm->pm_pcpu_state[cpu].pps_overflowcnt++;
+		pm->pm_pcpu_state[cpu].pps_overflowcnt++;
 
 		/* Reread counter in case we raced. */
 		tmp = armv7_pmcn_read(ri, pm->pm_md.pm_armv7.pm_armv7_evsel);
@@ -201,10 +200,18 @@ armv7_read_pmc(int cpu, int ri, pmc_value_t *v)
 	intr_restore(s);
 
 	PMCDBG2(MDP, REA, 2, "armv7-read id=%d -> %jd", ri, tmp);
-	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
-		*v = ARMV7_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
-	else
-		*v = tmp;
+	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+		/*
+		 * Clamp value to 0 if the counter just overflowed,
+		 * otherwise the returned reload count would wrap to a
+		 * huge value.
+		 */
+		if ((tmp & (1ull << 63)) == 0)
+			tmp = 0;
+		else
+			tmp = ARMV7_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
+	}
+	*v = tmp;
 
 	return 0;
 }
@@ -362,10 +369,11 @@ armv7_intr(struct trapframe *tf)
 
 		retval = 1; /* Found an interrupting PMC. */
 
-		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
-			pm->pm_pcpu_state[cpu].pps_overflowcnt += 1;
+		pm->pm_pcpu_state[cpu].pps_overflowcnt += 1;
+
+		if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			continue;
-		}
+
 		if (pm->pm_state != PMC_STATE_RUNNING)
 			continue;
 
diff --git a/sys/dev/hwpmc/hwpmc_uncore.c b/sys/dev/hwpmc/hwpmc_uncore.c
index 2c638833dcd9..a5e3d9bb2f8a 100644
--- a/sys/dev/hwpmc/hwpmc_uncore.c
+++ b/sys/dev/hwpmc/hwpmc_uncore.c
@@ -175,6 +175,10 @@ uncore_pcpu_fini(struct pmc_mdep *md, int cpu)
 static pmc_value_t
 ucf_perfctr_value_to_reload_count(pmc_value_t v)
 {
+
+	/* If the PMC has overflowed, return a reload count of zero. */
+	if ((v & (1ULL << (uncore_ucf_width - 1))) == 0)
+		return (0);
 	v &= (1ULL << uncore_ucf_width) - 1;
 	return (1ULL << uncore_ucf_width) - v;
 }