git: 7f36d7a9505a - main - hwpstate_amd(4): Consistency of cached CPPC_REQUEST value

From: Olivier Certner <olce_at_FreeBSD.org>
Date: Fri, 20 Feb 2026 10:13:24 UTC
The branch main has been updated by olce:

URL: https://cgit.FreeBSD.org/src/commit/?id=7f36d7a9505ab21f67ed806b18fbbe365043ed50

commit 7f36d7a9505ab21f67ed806b18fbbe365043ed50
Author:     Olivier Certner <olce@FreeBSD.org>
AuthorDate: 2026-02-09 16:21:57 +0000
Commit:     Olivier Certner <olce@FreeBSD.org>
CommitDate: 2026-02-20 10:12:48 +0000

    hwpstate_amd(4): Consistency of cached CPPC_REQUEST value
    
    If writing to the CPPC_REQUEST MSR fails, make sure we do not set the
    softc's 'cppc.request' field to the intended new value.  Both
    set_cppc_request_cb() and enable_cppc_cb() were changed to this effect.
    
    In case enable_cppc_cb() could not read CPPC_REQUEST, mark that through
    a new softc flag, HWPFL_CPPC_REQUEST_NOT_READ, so that we do not keep
    and use a wrong value when the content of CPPC_REQUEST is read/written
    through sysctl(9) knobs, but instead retry reading the MSR (this is the
    purpose of the new get_cppc_request() sub-function).
    
    When setting CPPC_REQUEST has failed, distinguish the case where it
    could not be read at all from the case where it could not be written, by
    respectively returning EIO and EOPNOTSUPP in these cases.  The previous
    return value of EFAULT was confusing as sysctl(3) documents it as
    happening if the passed arguments are invalid.
    
    While here, add some herald comment before sysctl_cppc_dump_handler()
    clarifying that the intent of this function is to always query the
    hardware directly, bypassing any cached value in the softc.
    
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D55251
---
 sys/x86/cpufreq/hwpstate_amd.c | 139 ++++++++++++++++++++++++++++++++---------
 1 file changed, 108 insertions(+), 31 deletions(-)

diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c
index 7e845e172620..bbb8f5b864f8 100644
--- a/sys/x86/cpufreq/hwpstate_amd.c
+++ b/sys/x86/cpufreq/hwpstate_amd.c
@@ -149,7 +149,8 @@ struct hwpstate_setting {
 	int	pstate_id;	/* P-State id */
 };
 
-#define HWPFL_USE_CPPC		(1 << 0)
+#define HWPFL_USE_CPPC			(1 << 0)
+#define HWPFL_CPPC_REQUEST_NOT_READ	(1 << 1)
 
 /*
  * Atomicity is achieved by only modifying a given softc on its associated CPU
@@ -321,6 +322,9 @@ get_cppc_regs_cb(void *args)
 		data->res |= HWP_ERROR_CPPC_REQUEST;
 }
 
+/*
+ * Debug: Read all MSRs (bypassing the softc) and dump them.
+ */
 static int
 sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)
 {
@@ -364,26 +368,58 @@ sysctl_cppc_dump_handler(SYSCTL_HANDLER_ARGS)
 	return (error);
 }
 
+/*
+ * Read CPPC_REQUEST's value in the softc, if not already present.
+ */
+static int
+get_cppc_request(struct hwpstate_softc *const sc)
+{
+	uint64_t val;
+	int error;
+
+	check_cppc_in_use(sc, __func__);
+
+	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
+		error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &val);
+		if (error != 0)
+			return (EIO);
+		sc->flags &= ~HWPFL_CPPC_REQUEST_NOT_READ;
+		sc->cppc.request = val;
+	}
+
+	return (0);
+}
 
 struct set_cppc_request_cb {
 	struct hwpstate_softc	*sc;
 	uint64_t		 request;
 	uint64_t		 mask;
-	int			 res; /* 0 or HWP_ERROR_CPPC_REQUEST_WRITE */
+	int			 res; /* 0 or HWP_ERROR_CPPC_REQUEST* */
 };
 
 static void
 set_cppc_request_cb(void *args)
 {
 	struct set_cppc_request_cb *const data = args;
-	uint64_t *const req = &data->sc->cppc.request;
+	uint64_t *const sc_req = &data->sc->cppc.request;
+	uint64_t new_req;
 	int error;
 
-	*req &= ~data->mask;
-	*req |= data->request & data->mask;
+	/* We proceed sequentially, so we'll clear out errors on progress. */
+	data->res = HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
 
-	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, *req);
-	data->res = error == 0 ? 0 : HWP_ERROR_CPPC_REQUEST_WRITE;
+	error = get_cppc_request(data->sc);
+	if (error != 0)
+		return;
+	data->res &= ~HWP_ERROR_CPPC_REQUEST;
+
+	new_req = (*sc_req & ~data->mask) | (data->request & data->mask);
+
+	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, new_req);
+	if (error != 0)
+		return;
+	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
+	*sc_req = new_req;
 }
 
 static inline void
@@ -396,6 +432,21 @@ set_cppc_request_send_one(struct set_cppc_request_cb *const data, device_t dev)
 	    set_cppc_request_cb, smp_no_rendezvous_barrier, data);
 }
 
+static inline void
+set_cppc_request_update_error(const struct set_cppc_request_cb *const data,
+    int *const error)
+{
+	/* A read error has precedence on a write error. */
+	if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST))
+		*error = EIO;
+	else if (hwp_has_error(data->res, HWP_ERROR_CPPC_REQUEST_WRITE) &&
+	    *error != EIO)
+		*error = EOPNOTSUPP;
+	else if (data->res != 0)
+		/* Fallback case (normally not needed; defensive). */
+		*error = EFAULT;
+}
+
 static int
 set_cppc_request(device_t hwp_dev, uint64_t request, uint64_t mask)
 {
@@ -404,29 +455,35 @@ set_cppc_request(device_t hwp_dev, uint64_t request, uint64_t mask)
 		.mask = mask,
 		/* 'sc' filled by set_cppc_request_send_one(). */
 	};
-	int error;
+	int error = 0;
 
 	if (hwpstate_pkg_ctrl_enable) {
 		const devclass_t dc = devclass_find(HWP_AMD_CLASSNAME);
 		const int units = devclass_get_maxunit(dc);
 
-		error = 0;
 		for (int i = 0; i < units; ++i) {
 			const device_t dev = devclass_get_device(dc, i);
 
 			set_cppc_request_send_one(&data, dev);
-			if (data.res != 0)
-				/* Note the error, but continue. */
-				error = EFAULT;
+			/* Note errors, but always continue. */
+			set_cppc_request_update_error(&data, &error);
 		}
 	} else {
 		set_cppc_request_send_one(&data, hwp_dev);
-		error = data.res != 0 ? EFAULT : 0;
+		set_cppc_request_update_error(&data, &error);
 	}
 
 	return (error);
 }
 
+static void
+get_cppc_request_cb(void *args)
+{
+	struct hwpstate_softc *const sc = args;
+
+	(void)get_cppc_request(sc);
+}
+
 static int
 sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)
 {
@@ -439,6 +496,16 @@ sysctl_cppc_request_field_handler(SYSCTL_HANDLER_ARGS)
 	/* Sysctl knob does not exist if HWPFL_USE_CPPC is not set. */
 	check_cppc_in_use(sc, __func__);
 
+	if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0) {
+		const u_int cpuid = cpu_get_pcpu(dev)->pc_cpuid;
+
+		smp_rendezvous_cpu(cpuid, smp_no_rendezvous_barrier,
+		    get_cppc_request_cb, smp_no_rendezvous_barrier, sc);
+
+		if ((sc->flags & HWPFL_CPPC_REQUEST_NOT_READ) != 0)
+			return (EIO);
+	}
+
 	val = BITS_VALUE(arg2, sc->cppc.request);
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
@@ -709,6 +776,8 @@ enable_cppc_cb(void *args)
 	data->res = HWP_ERROR_CPPC_ENABLE | HWP_ERROR_CPPC_CAPS |
 	    HWP_ERROR_CPPC_REQUEST | HWP_ERROR_CPPC_REQUEST_WRITE;
 
+	sc->flags |= HWPFL_CPPC_REQUEST_NOT_READ;
+
 	error = wrmsr_safe(MSR_AMD_CPPC_ENABLE, 1);
 	if (error != 0)
 		return;
@@ -719,23 +788,21 @@ enable_cppc_cb(void *args)
 		return;
 	data->res &= ~HWP_ERROR_CPPC_CAPS;
 
-	error = rdmsr_safe(MSR_AMD_CPPC_REQUEST, &sc->cppc.request);
+	error = get_cppc_request(sc);
 	if (error != 0)
 		return;
 	data->res &= ~HWP_ERROR_CPPC_REQUEST;
-	/* The CPPC_REQUEST value before we tweak it. */
 	data->init_request = sc->cppc.request;
 
+	data->request = sc->cppc.request;
 	/*
 	 * In Intel's reference manual, the default value of EPP is 0x80u which
 	 * is the balanced mode. For consistency, we set the same value in AMD's
 	 * CPPC driver.
 	 */
-	SET_BITS_VALUE(sc->cppc.request, AMD_CPPC_REQUEST_EPP_BITS, 0x80);
-
+	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_EPP_BITS, 0x80);
 	/* Enable autonomous mode by setting desired performance to 0. */
-	SET_BITS_VALUE(sc->cppc.request, AMD_CPPC_REQUEST_DES_PERF_BITS, 0);
-
+	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_DES_PERF_BITS, 0);
 	/*
 	 * When MSR_AMD_CPPC_CAPS_1 stays at its reset value (0) before CPPC
 	 * activation (not supposed to happen, but happens in the field), we use
@@ -752,16 +819,16 @@ enable_cppc_cb(void *args)
 		lowest_perf = 0;
 		highest_perf = -1;
 	}
-	SET_BITS_VALUE(sc->cppc.request, AMD_CPPC_REQUEST_MIN_PERF_BITS,
+	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MIN_PERF_BITS,
 	    lowest_perf);
-	SET_BITS_VALUE(sc->cppc.request, AMD_CPPC_REQUEST_MAX_PERF_BITS,
+	SET_BITS_VALUE(data->request, AMD_CPPC_REQUEST_MAX_PERF_BITS,
 	    highest_perf);
 
-	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, sc->cppc.request);
+	error = wrmsr_safe(MSR_AMD_CPPC_REQUEST, data->request);
 	if (error != 0)
 		return;
 	data->res &= ~HWP_ERROR_CPPC_REQUEST_WRITE;
-	data->request = sc->cppc.request;
+	sc->cppc.request = data->request;
 }
 
 static int
@@ -784,9 +851,10 @@ enable_cppc(struct hwpstate_softc *sc)
 	device_printf(dev, "CPU%u: CPPC enabled.\n", cpuid);
 
 	/*
-	 * Now that we have enabled CPPC, we can't go back, so we'll attach even
-	 * in case of further malfunction, allowing the user to retry setting
-	 * CPPC_REQUEST via the sysctl knobs.
+	 * Now that we have enabled CPPC, we can't go back (hardware does not
+	 * support doing so), so we'll attach even in case of further
+	 * malfunction, allowing the user to retry retrieving/setting MSRs via
+	 * the sysctl knobs.
 	 */
 
 	sb = sbuf_new(&sbs, NULL, 0, SBUF_AUTOEXTEND);
@@ -802,11 +870,20 @@ enable_cppc(struct hwpstate_softc *sc)
 		print_cppc_no_request(sb);
 	else if (hwpstate_verbose)
 		print_cppc_request(sb, data.init_request);
-	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST_WRITE))
-		device_printf(dev, "CPU%u: Could not write into "
-		    MSR_AMD_CPPC_REQUEST_NAME "!\n",
-		    cpuid);
-	else if (hwpstate_verbose) {
+	if (hwp_has_error(data.res, HWP_ERROR_CPPC_REQUEST_WRITE)) {
+		const bool request_read = !hwp_has_error(data.res,
+		    HWP_ERROR_CPPC_REQUEST);
+
+		/* This is printed first, as it is not printed into 'sb'. */
+		device_printf(dev, "CPU%u: %s not write into "
+		    MSR_AMD_CPPC_REQUEST_NAME "!\n", cpuid,
+		    request_read ? "Could" : "Did");
+		if (request_read) {
+			sbuf_printf(sb, "CPU%u: Failed when trying to set:",
+			    cpuid);
+			print_cppc_request(sb, data.request);
+		}
+	} else if (hwpstate_verbose) {
 		sbuf_printf(sb, "CPU%u: Tweaked MSR values:\n", cpuid);
 		print_cppc_request(sb, data.request);
 	}