git: 095cbb1bb7f7 - main - hwpstate_amd: Expose nodes as much as possible in legacy pstate
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 08 Apr 2026 07:48:47 UTC
The branch main has been updated by aokblast:
URL: https://cgit.FreeBSD.org/src/commit/?id=095cbb1bb7f7b4f742b0a500aa2c61a8c97d4b41
commit 095cbb1bb7f7b4f742b0a500aa2c61a8c97d4b41
Author: ShengYi Hung <aokblast@FreeBSD.org>
AuthorDate: 2026-04-08 07:45:11 +0000
Commit: ShengYi Hung <aokblast@FreeBSD.org>
CommitDate: 2026-04-08 07:48:34 +0000
hwpstate_amd: Expose nodes as much as possible in legacy pstate
Reviewed by: olce
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D55606
---
sys/x86/cpufreq/hwpstate_amd.c | 156 ++++++++++++++++++++++++-----------------
1 file changed, 91 insertions(+), 65 deletions(-)
diff --git a/sys/x86/cpufreq/hwpstate_amd.c b/sys/x86/cpufreq/hwpstate_amd.c
index ccf13f06a6d1..4be295075482 100644
--- a/sys/x86/cpufreq/hwpstate_amd.c
+++ b/sys/x86/cpufreq/hwpstate_amd.c
@@ -552,6 +552,20 @@ hwpstate_amd_iscale(int val, int div)
return (val);
}
+static void
+hwpstate_pstate_read_limit(int cpu, uint64_t *msr)
+{
+ (void)x86_msr_op(MSR_AMD_10H_11H_LIMIT,
+ MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
+}
+
+static void
+hwpstate_pstate_read_status(int cpu, uint64_t *msr)
+{
+ (void)x86_msr_op(MSR_AMD_10H_11H_STATUS,
+ MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
+}
+
/*
* Go to Px-state on all cpus, considering the limit register (if so
* configured).
@@ -561,11 +575,13 @@ hwpstate_goto_pstate(device_t dev, int id)
{
sbintime_t sbt;
uint64_t msr;
- int cpu, i, j, limit;
+ int cpu, j, limit;
+
+ cpu = cpu_get_pcpu(dev)->pc_cpuid;
if (hwpstate_pstate_limit) {
/* get the current pstate limit */
- msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+ hwpstate_pstate_read_limit(cpu, &msr);
limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
if (limit > id) {
HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
@@ -574,53 +590,31 @@ hwpstate_goto_pstate(device_t dev, int id)
}
}
- cpu = curcpu;
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
/* Go To Px-state */
- wrmsr(MSR_AMD_10H_11H_CONTROL, id);
-
- /*
- * We are going to the same Px-state on all cpus.
- * Probably should take _PSD into account.
- */
- CPU_FOREACH(i) {
- if (i == cpu)
- continue;
-
- /* Bind to each cpu. */
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
- /* Go To Px-state */
- wrmsr(MSR_AMD_10H_11H_CONTROL, id);
- }
+ x86_msr_op(MSR_AMD_10H_11H_CONTROL,
+ MSR_OP_WRITE | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), id, NULL);
/*
* Verify whether each core is in the requested P-state.
*/
if (hwpstate_verify) {
- CPU_FOREACH(i) {
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- /* wait loop (100*100 usec is enough ?) */
- for (j = 0; j < 100; j++) {
- /* get the result. not assure msr=id */
- msr = rdmsr(MSR_AMD_10H_11H_STATUS);
- if (msr == id)
- break;
- sbt = SBT_1MS / 10;
- tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
- sbt >> tc_precexp, 0);
- }
- HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
- (int)msr, i);
- if (msr != id) {
- HWPSTATE_DEBUG(dev,
- "error: loop is not enough.\n");
- return (ENXIO);
- }
+ /* wait loop (100*100 usec is enough ?) */
+ for (j = 0; j < 100; j++) {
+ /* get the result. not assure msr=id */
+
+ hwpstate_pstate_read_status(cpu, &msr);
+ if (msr == id)
+ break;
+ sbt = SBT_1MS / 10;
+ tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
+ sbt >> tc_precexp, 0);
+ }
+ HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr,
+ cpu);
+ if (msr != id) {
+ HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
+ return (ENXIO);
}
}
@@ -670,7 +664,6 @@ hwpstate_get_cppc(device_t dev, struct cf_setting *cf)
pc = cpu_get_pcpu(dev);
if (pc == NULL)
return (ENXIO);
-
memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
cf->dev = dev;
if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate)))
@@ -685,13 +678,14 @@ hwpstate_get_pstate(device_t dev, struct cf_setting *cf)
struct hwpstate_softc *sc;
struct hwpstate_setting set;
uint64_t msr;
+ int cpu;
sc = device_get_softc(dev);
- msr = rdmsr(MSR_AMD_10H_11H_STATUS);
+ cpu = cpu_get_pcpu(dev)->pc_cpuid;
+ hwpstate_pstate_read_status(cpu, &msr);
if (msr >= sc->cfnum)
return (EINVAL);
set = sc->hwpstate_settings[msr];
-
cf->freq = set.freq;
cf->volts = set.volts;
cf->power = set.power;
@@ -967,8 +961,10 @@ hwpstate_probe_pstate(device_t dev)
device_t perf_dev;
int error, type;
uint64_t msr;
+ int cpu;
sc = device_get_softc(dev);
+ cpu = cpu_get_pcpu(dev)->pc_cpuid;
/*
* Check if acpi_perf has INFO only flag.
*/
@@ -985,15 +981,15 @@ hwpstate_probe_pstate(device_t dev)
*/
HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
return (ENXIO);
- } else {
- /*
- * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
- * we can get _PSS info from acpi_perf
- * without going into ACPI.
- */
- HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
- error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
}
+ /*
+ * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
+ * we can get _PSS info from acpi_perf
+ * without going into ACPI.
+ */
+ HWPSTATE_DEBUG(dev,
+ "going to fetch info from acpi_perf\n");
+ error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
}
}
@@ -1002,7 +998,7 @@ hwpstate_probe_pstate(device_t dev)
* Now we get _PSS info from acpi_perf without error.
* Let's check it.
*/
- msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+ hwpstate_pstate_read_limit(cpu, &msr);
if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
" count mismatch\n", (intmax_t)msr, sc->cfnum);
@@ -1042,15 +1038,8 @@ hwpstate_probe(device_t dev)
sc->flags |= HWPFL_USE_CPPC;
device_set_desc(dev,
"AMD Collaborative Processor Performance Control (CPPC)");
- } else {
- /*
- * No CPPC support. Only keep hwpstate0, it goes well with
- * acpi_throttle.
- */
- if (device_get_unit(dev) != 0)
- return (ENXIO);
+ } else
device_set_desc(dev, "Cool`n'Quiet 2.0");
- }
sc->dev = dev;
if ((sc->flags & HWPFL_USE_CPPC) != 0) {
@@ -1119,22 +1108,59 @@ hwpstate_attach(device_t dev)
return (cpufreq_register(dev));
}
+struct hwpstate_pstate_read_settings_cb {
+ struct hwpstate_softc *sc;
+ uint64_t *vals;
+ int err;
+};
+
+static void
+hwpstate_pstate_read_settings_cb(void *args)
+{
+ struct hwpstate_pstate_read_settings_cb *req = args;
+ int i;
+
+ req->err = 0;
+ for (i = 0; i < req->sc->cfnum; i++) {
+ req->err = rdmsr_safe(MSR_AMD_10H_11H_CONFIG + i,
+ &req->vals[i]);
+ if (req->err != 0)
+ return;
+ }
+}
+
+static int
+hwpstate_pstate_read_settings(struct hwpstate_softc *sc, uint64_t vals[])
+{
+ struct hwpstate_pstate_read_settings_cb req;
+ device_t dev;
+
+ req.sc = sc;
+ req.vals = vals;
+ dev = sc->dev;
+ smp_rendezvous_cpu(cpu_get_pcpu(dev)->pc_cpuid,
+ smp_no_rendezvous_barrier, hwpstate_pstate_read_settings_cb,
+ smp_no_rendezvous_barrier, &req);
+ return (req.err);
+}
+
static int
hwpstate_get_info_from_msr(device_t dev)
{
struct hwpstate_softc *sc;
struct hwpstate_setting *hwpstate_set;
- uint64_t msr;
+ uint64_t state_settings[AMD_10H_11H_MAX_STATES], msr;
int family, i, fid, did;
family = CPUID_TO_FAMILY(cpu_id);
sc = device_get_softc(dev);
/* Get pstate count */
- msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+ hwpstate_pstate_read_limit(cpu_get_pcpu(dev)->pc_cpuid, &msr);
sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
hwpstate_set = sc->hwpstate_settings;
+ hwpstate_pstate_read_settings(sc, state_settings);
for (i = 0; i < sc->cfnum; i++) {
- msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
+ msr = state_settings[i];
if ((msr & ((uint64_t)1 << 63)) == 0) {
HWPSTATE_DEBUG(dev, "msr is not valid.\n");
return (ENXIO);