PATCH - update TSC freq when cpufreq changes it

Nate Lawson nate at root.org
Tue Feb 27 23:06:07 UTC 2007


Attached is a patch that uses eventhandlers to update the TSC freq.
This is important because DELAY() uses TSC directly (on i386 and amd64)
but the rate calculated at boot changes if cpufreq is in use.

It maintains current behavior that cpufreq transitions are denied if TSC
is the active timecounter.  The API is that there is a pre and post
transition eventhandler that is called by the cpufreq core.  The pre
handler is passed the next state (including freq, power, etc.) and can
store a non-zero status value in the output arg to indicate it wants to
reject the transition.  The post handler also is passed the next state
and the result of the transition (0 on success).

Once any issues are addressed, I'll update this for amd64, ALTQ, and
possibly PC98.  Non-x86 archs can stick with the current behavior if
they're satisfied or hook the eventhandlers provided to DTRT.

-- 
Nate
-------------- next part --------------
Index: sys/i386/i386/tsc.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/tsc.c,v
retrieving revision 1.206
diff -u -r1.206 tsc.c
--- sys/i386/i386/tsc.c	4 Aug 2006 07:56:35 -0000	1.206
+++ sys/i386/i386/tsc.c	27 Feb 2007 21:57:46 -0000
@@ -30,6 +30,8 @@
 #include "opt_clock.h"
 
 #include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
@@ -52,14 +54,20 @@
 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
 #endif
 
+static eventhandler_tag evh_pre_tag, evh_post_tag;
+
 static	unsigned tsc_get_timecount(struct timecounter *tc);
+static	void tsc_freq_changing(void *arg, const struct cf_level *level,
+    int *status);
+static	void tsc_freq_changed(void *arg, const struct cf_level *level,
+    int *status);
 
 static struct timecounter tsc_timecounter = {
 	tsc_get_timecount,	/* get_timecount */
 	0,			/* no poll_pps */
- 	~0u,			/* counter_mask */
+	~0u,			/* counter_mask */
 	0,			/* frequency */
-	 "TSC",			/* name */
+	"TSC",			/* name */
 	800,			/* quality (adjusted in code) */
 };
 
@@ -87,8 +95,13 @@
 	if (bootverbose)
 		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
 	set_cputicker(rdtsc, tsc_freq, 1);
-}
 
+	/* Register to find out about changes in CPU frequency. */
+	evh_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
+	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_ANY);
+	evh_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
+	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY);
+}
 
 void
 init_TSC_tc(void)
@@ -128,6 +141,38 @@
 	}
 }
 
+/*
+ * If the TSC timecounter is in use, veto the pending change.  It may be
+ * possible in the future to handle a dynamically-changing timecounter rate.
+ */
+static void
+tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
+{
+	static int once;
+
+	if (*status == 0 && timecounter == &tsc_timecounter) {
+		if (!once) {
+			printf("timecounter TSC must not be in use when "
+			     "changing frequencies; change denied\n");
+			once = 1;
+		}
+		*status = EBUSY;
+	}
+}
+
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int *status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (*status != 0)
+		return;
+
+	/* Total setting gives the new frequency in MHz. */
+	tsc_freq = level->total_set.freq * 1000000;
+	tsc_timecounter.tc_frequency = tsc_freq;
+}
+
 static int
 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
 {
Index: sys/kern/kern_cpu.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_cpu.c,v
retrieving revision 1.23
diff -u -r1.23 kern_cpu.c
--- sys/kern/kern_cpu.c	3 Mar 2006 02:06:04 -0000	1.23
+++ sys/kern/kern_cpu.c	27 Feb 2007 21:59:59 -0000
@@ -95,7 +95,6 @@
 
 static int	cpufreq_attach(device_t dev);
 static int	cpufreq_detach(device_t dev);
-static void	cpufreq_evaluate(void *arg);
 static int	cf_set_method(device_t dev, const struct cf_level *level,
 		    int priority);
 static int	cf_get_method(device_t dev, struct cf_level *level);
@@ -127,8 +126,6 @@
 static devclass_t cpufreq_dc;
 DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
 
-static eventhandler_tag	cf_ev_tag;
-
 static int		cf_lowest_freq;
 static int		cf_verbose;
 TUNABLE_INT("debug.cpufreq.lowest", &cf_lowest_freq);
@@ -176,8 +173,6 @@
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
 	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
-	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
-	    NULL, EVENTHANDLER_PRI_ANY);
 
 	return (0);
 }
@@ -202,18 +197,11 @@
 	numdevs = devclass_get_count(cpufreq_dc);
 	if (numdevs == 1) {
 		CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev));
-		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
 	}
 
 	return (0);
 }
 
-static void
-cpufreq_evaluate(void *arg)
-{
-	/* TODO: Re-evaluate when notified of changes to drivers. */
-}
-
 static int
 cf_set_method(device_t dev, const struct cf_level *level, int priority)
 {
@@ -222,26 +210,16 @@
 	struct cf_saved_freq *saved_freq, *curr_freq;
 	struct pcpu *pc;
 	int cpu_id, error, i;
-	static int once;
 
 	sc = device_get_softc(dev);
 	error = 0;
 	set = NULL;
 	saved_freq = NULL;
 
-	/*
-	 * Check that the TSC isn't being used as a timecounter.
-	 * If it is, then return EBUSY and refuse to change the
-	 * clock speed.
-	 */
-	if (strcmp(timecounter->tc_name, "TSC") == 0) {
-		if (!once) {
-			printf("cpufreq: frequency change with timecounter"
-				" TSC not allowed, see cpufreq(4)\n");
-			once = 1;
-		}
-		return (EBUSY);
-	}
+	/* We are going to change levels so notify the pre-change handler. */
+	EVENTHANDLER_INVOKE(cpufreq_pre_change, level, &error);
+	if (error != 0)
+		return (error);
 
 	CF_MTX_LOCK(&sc->lock);
 
@@ -378,8 +356,15 @@
 
 out:
 	CF_MTX_UNLOCK(&sc->lock);
+
+	/*
+	 * We changed levels (or attempted to) so notify the post-change
+	 * handler of new frequency or error.
+	 */
+	EVENTHANDLER_INVOKE(cpufreq_post_change, level, &error);
 	if (error && set)
 		device_printf(set->dev, "set freq failed, err %d\n", error);
+
 	return (error);
 }
 
Index: sys/sys/cpu.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/cpu.h,v
retrieving revision 1.3
diff -u -r1.3 cpu.h
--- sys/sys/cpu.h	19 Feb 2005 06:13:25 -0000	1.3
+++ sys/sys/cpu.h	27 Feb 2007 19:49:37 -0000
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 Nate Lawson (SDG)
+ * Copyright (c) 2005-2007 Nate Lawson (SDG)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
 #ifndef _SYS_CPU_H_
 #define _SYS_CPU_H_
 
+#include <sys/eventhandler.h>
+
 /*
  * CPU device support.
  */
@@ -118,6 +120,11 @@
 int	cpufreq_register(device_t dev);
 int	cpufreq_unregister(device_t dev);
 
+/* Eventhandlers that are called before and after a change in frequency */
+typedef void (*cpufreq_notify_fn)(void *, const struct cf_level *, int *);
+EVENTHANDLER_DECLARE(cpufreq_pre_change, cpufreq_notify_fn);
+EVENTHANDLER_DECLARE(cpufreq_post_change, cpufreq_notify_fn);
+
 /* Allow values to be +/- a bit since sometimes we have to estimate. */
 #define CPUFREQ_CMP(x, y)	(abs((x) - (y)) < 25)
 


More information about the freebsd-current mailing list