git: 095cbb1bb7f7 - main - hwpstate_amd: Expose nodes as much as possible in legacy pstate

From: ShengYi Hung <aokblast_at_FreeBSD.org>
Date: Wed, 08 Apr 2026 07:48:47 UTC
The branch main has been updated by aokblast:

URL: https://cgit.FreeBSD.org/src/commit/?id=095cbb1bb7f7b4f742b0a500aa2c61a8c97d4b41

commit 095cbb1bb7f7b4f742b0a500aa2c61a8c97d4b41
Author:     ShengYi Hung <aokblast@FreeBSD.org>
AuthorDate: 2026-04-08 07:45:11 +0000
Commit:     ShengYi Hung <aokblast@FreeBSD.org>
CommitDate: 2026-04-08 07:48:34 +0000

    hwpstate_amd: Expose nodes as much as possible in legacy pstate
    
    Reviewed by:    olce
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D55606
---
 sys/x86/cpufreq/hwpstate_amd.c | 156 ++++++++++++++++++++++++-----------------
 1 file changed, 91 insertions(+), 65 deletions(-)

diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c
index ccf13f06a6d1..4be295075482 100644
--- a/sys/x86/cpufreq/hwpstate_amd.c
+++ b/sys/x86/cpufreq/hwpstate_amd.c
@@ -552,6 +552,20 @@ hwpstate_amd_iscale(int val, int div)
 	return (val);
 }
 
+static void
+hwpstate_pstate_read_limit(int cpu, uint64_t *msr)
+{
+	(void)x86_msr_op(MSR_AMD_10H_11H_LIMIT,
+	    MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
+}
+
+static void
+hwpstate_pstate_read_status(int cpu, uint64_t *msr)
+{
+	(void)x86_msr_op(MSR_AMD_10H_11H_STATUS,
+	    MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
+}
+
 /*
  * Go to Px-state on all cpus, considering the limit register (if so
  * configured).
@@ -561,11 +575,13 @@ hwpstate_goto_pstate(device_t dev, int id)
 {
 	sbintime_t sbt;
 	uint64_t msr;
-	int cpu, i, j, limit;
+	int cpu, j, limit;
+
+	cpu = cpu_get_pcpu(dev)->pc_cpuid;
 
 	if (hwpstate_pstate_limit) {
 		/* get the current pstate limit */
-		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+		hwpstate_pstate_read_limit(cpu, &msr);
 		limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
 		if (limit > id) {
 			HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
@@ -574,53 +590,31 @@ hwpstate_goto_pstate(device_t dev, int id)
 		}
 	}
 
-	cpu = curcpu;
 	HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
 	/* Go To Px-state */
-	wrmsr(MSR_AMD_10H_11H_CONTROL, id);
-
-	/*
-	 * We are going to the same Px-state on all cpus.
-	 * Probably should take _PSD into account.
-	 */
-	CPU_FOREACH(i) {
-		if (i == cpu)
-			continue;
-
-		/* Bind to each cpu. */
-		thread_lock(curthread);
-		sched_bind(curthread, i);
-		thread_unlock(curthread);
-		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
-		/* Go To Px-state */
-		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
-	}
+	x86_msr_op(MSR_AMD_10H_11H_CONTROL,
+	    MSR_OP_WRITE | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), id, NULL);
 
 	/*
 	 * Verify whether each core is in the requested P-state.
 	 */
 	if (hwpstate_verify) {
-		CPU_FOREACH(i) {
-			thread_lock(curthread);
-			sched_bind(curthread, i);
-			thread_unlock(curthread);
-			/* wait loop (100*100 usec is enough ?) */
-			for (j = 0; j < 100; j++) {
-				/* get the result. not assure msr=id */
-				msr = rdmsr(MSR_AMD_10H_11H_STATUS);
-				if (msr == id)
-					break;
-				sbt = SBT_1MS / 10;
-				tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
-				    sbt >> tc_precexp, 0);
-			}
-			HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
-			    (int)msr, i);
-			if (msr != id) {
-				HWPSTATE_DEBUG(dev,
-				    "error: loop is not enough.\n");
-				return (ENXIO);
-			}
+		/* wait loop (100*100 usec is enough ?) */
+		for (j = 0; j < 100; j++) {
+			/* get the result. not assure msr=id */
+
+			hwpstate_pstate_read_status(cpu, &msr);
+			if (msr == id)
+				break;
+			sbt = SBT_1MS / 10;
+			tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
+			    sbt >> tc_precexp, 0);
+		}
+		HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr,
+		    cpu);
+		if (msr != id) {
+			HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
+			return (ENXIO);
 		}
 	}
 
@@ -670,7 +664,6 @@ hwpstate_get_cppc(device_t dev, struct cf_setting *cf)
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
-
 	memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
 	cf->dev = dev;
 	if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate)))
@@ -685,13 +678,14 @@ hwpstate_get_pstate(device_t dev, struct cf_setting *cf)
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting set;
 	uint64_t msr;
+	int cpu;
 
 	sc = device_get_softc(dev);
-	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+	cpu = cpu_get_pcpu(dev)->pc_cpuid;
+	hwpstate_pstate_read_status(cpu, &msr);
 	if (msr >= sc->cfnum)
 		return (EINVAL);
 	set = sc->hwpstate_settings[msr];
-
 	cf->freq = set.freq;
 	cf->volts = set.volts;
 	cf->power = set.power;
@@ -967,8 +961,10 @@ hwpstate_probe_pstate(device_t dev)
 	device_t perf_dev;
 	int error, type;
 	uint64_t msr;
+	int cpu;
 
 	sc = device_get_softc(dev);
+	cpu = cpu_get_pcpu(dev)->pc_cpuid;
 	/*
 	 * Check if acpi_perf has INFO only flag.
 	 */
@@ -985,15 +981,15 @@ hwpstate_probe_pstate(device_t dev)
 				 */
 				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
 				return (ENXIO);
-			} else {
-				/*
-				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
-				 * we can get _PSS info from acpi_perf
-				 * without going into ACPI.
-				 */
-				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
-				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
 			}
+			/*
+			 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
+			 * we can get _PSS info from acpi_perf
+			 * without going into ACPI.
+			 */
+			HWPSTATE_DEBUG(dev,
+			    "going to fetch info from acpi_perf\n");
+			error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
 		}
 	}
 
@@ -1002,7 +998,7 @@ hwpstate_probe_pstate(device_t dev)
 		 * Now we get _PSS info from acpi_perf without error.
 		 * Let's check it.
 		 */
-		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+		hwpstate_pstate_read_limit(cpu, &msr);
 		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
 			HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
 			    " count mismatch\n", (intmax_t)msr, sc->cfnum);
@@ -1042,15 +1038,8 @@ hwpstate_probe(device_t dev)
 		sc->flags |= HWPFL_USE_CPPC;
 		device_set_desc(dev,
 		    "AMD Collaborative Processor Performance Control (CPPC)");
-	} else {
-		/*
-		 * No CPPC support.  Only keep hwpstate0, it goes well with
-		 * acpi_throttle.
-		 */
-		if (device_get_unit(dev) != 0)
-			return (ENXIO);
+	} else
 		device_set_desc(dev, "Cool`n'Quiet 2.0");
-	}
 
 	sc->dev = dev;
 	if ((sc->flags & HWPFL_USE_CPPC) != 0) {
@@ -1119,22 +1108,59 @@ hwpstate_attach(device_t dev)
 	return (cpufreq_register(dev));
 }
 
+struct hwpstate_pstate_read_settings_cb {
+	struct hwpstate_softc *sc;
+	uint64_t *vals;
+	int err;
+};
+
+static void
+hwpstate_pstate_read_settings_cb(void *args)
+{
+	struct hwpstate_pstate_read_settings_cb *req = args;
+	int i;
+
+	req->err = 0;
+	for (i = 0; i < req->sc->cfnum; i++) {
+		req->err = rdmsr_safe(MSR_AMD_10H_11H_CONFIG + i,
+		    &req->vals[i]);
+		if (req->err != 0)
+			return;
+	}
+}
+
+static int
+hwpstate_pstate_read_settings(struct hwpstate_softc *sc, uint64_t vals[])
+{
+	struct hwpstate_pstate_read_settings_cb req;
+	device_t dev;
+
+	req.sc = sc;
+	req.vals = vals;
+	dev = sc->dev;
+	smp_rendezvous_cpu(cpu_get_pcpu(dev)->pc_cpuid,
+	    smp_no_rendezvous_barrier, hwpstate_pstate_read_settings_cb,
+	    smp_no_rendezvous_barrier, &req);
+	return (req.err);
+}
+
 static int
 hwpstate_get_info_from_msr(device_t dev)
 {
 	struct hwpstate_softc *sc;
 	struct hwpstate_setting *hwpstate_set;
-	uint64_t msr;
+	uint64_t state_settings[AMD_10H_11H_MAX_STATES], msr;
 	int family, i, fid, did;
 
 	family = CPUID_TO_FAMILY(cpu_id);
 	sc = device_get_softc(dev);
 	/* Get pstate count */
-	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+	hwpstate_pstate_read_limit(cpu_get_pcpu(dev)->pc_cpuid, &msr);
 	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
 	hwpstate_set = sc->hwpstate_settings;
+	hwpstate_pstate_read_settings(sc, state_settings);
 	for (i = 0; i < sc->cfnum; i++) {
-		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
+		msr = state_settings[i];
 		if ((msr & ((uint64_t)1 << 63)) == 0) {
 			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
 			return (ENXIO);