PATCH: cpu freq notifiers and eventhandler register from SYSINIT

Nate Lawson nate at root.org
Tue Mar 20 01:28:49 UTC 2007


Nate Lawson wrote:
> Attached is an updated patch for handling cpu freq changes.  It updates
> tsc_freq and all direct consumers of it.
> 
> I also would like to add something to eventhandler.h.  As I was doing
> this, I really wanted a way to declare an eventhandler tag and call
> eventhandler_register() from a SYSINIT.  I ended up doing that manually
> in a lot of cases where I couldn't register up front because it was too
> early in boot (i.e. identcpu).  Comments on adding this macro, similar
> to TASQUEUE_DEFINE in taskqueue.h?  Maybe EVENTHANDLER_REGISTER_INIT()?
> 
> I wasn't sure what to do for guprof since it is using TSC at times.  I
> decided to just have it print a warning if the TSC freq changed while
> profiling was active.  Let me know if I should do something else here.
> 
> For altq, it should be correct but please check my change.  I'm not sure
> what to do for packets that are being processed if machclk_freq changes.
>  It might be ok.

Attached is what I think is the pre-final patch.  It uses the new
eventhandler macro EVENTHANDLER_DEFINE(), similar to TASKQUEUE_DEFINE()
to register an eventhandler with a SYSINIT.  It also adds support for
TSC figuring out the max clockrate on boot based on whatever cpufreq
drivers are available.  If drivers are later loaded, this gets updated.
 This gets rid of the "calcru: went backwards" messages.

I've tested that it builds and works fully on i386 with cpufreq.  It
would be nice to get an amd64 test and any comments on the changes.
Also, if anyone can comment on the altq change, that would be nice.
ALTQ assumes 4 ghz clock max, which is kind of a weakness.  I don't fix
that or break it any worse though.

I plan to commit it in 3 days if no objections.

Thanks,
-- 
Nate
-------------- next part --------------
Index: sys/sys/eventhandler.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/eventhandler.h,v
retrieving revision 1.35
diff -u -r1.35 eventhandler.h
--- sys/sys/eventhandler.h	15 Aug 2006 12:10:57 -0000	1.35
+++ sys/sys/eventhandler.h	18 Mar 2007 04:29:51 -0000
@@ -104,6 +104,17 @@
 };									\
 struct __hack
 
+#define EVENTHANDLER_DEFINE(name, func, arg, priority)			\
+	static eventhandler_tag name ## _tag;				\
+	static void name ## _evh_init(void *ctx)			\
+	{								\
+		name ## _tag = EVENTHANDLER_REGISTER(name, func, ctx,	\
+		    priority);						\
+	}								\
+	SYSINIT(name ## _evh_init, SI_SUB_CONFIGURE, SI_ORDER_ANY,	\
+	    name ## _evh_init, arg)					\
+	struct __hack
+
 #define EVENTHANDLER_INVOKE(name, ...)					\
 do {									\
 	struct eventhandler_list *_el;					\
Index: sys/sys/cpu.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/cpu.h,v
retrieving revision 1.3
diff -u -r1.3 cpu.h
--- sys/sys/cpu.h	19 Feb 2005 06:13:25 -0000	1.3
+++ sys/sys/cpu.h	19 Mar 2007 19:21:10 -0000
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 Nate Lawson (SDG)
+ * Copyright (c) 2005-2007 Nate Lawson (SDG)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
 #ifndef _SYS_CPU_H_
 #define _SYS_CPU_H_
 
+#include <sys/eventhandler.h>
+
 /*
  * CPU device support.
  */
@@ -118,6 +120,36 @@
 int	cpufreq_register(device_t dev);
 int	cpufreq_unregister(device_t dev);
 
+/*
+ * Notify the cpufreq core that the number of or values for settings have
+ * changed.
+ */
+int	cpufreq_settings_changed(device_t dev);
+
+/*
+ * Eventhandlers that are called before and after a change in frequency.
+ * The new level and the result of the change (0 is success) is passed in.
+ * If the driver wishes to revoke the change from cpufreq_pre_change, it
+ * stores a non-zero error code in the result parameter and the change will
+ * not be made.  If the post-change eventhandler gets a non-zero result, 
+ * no change was made and the previous level remains in effect.  If a change
+ * is revoked, the post-change eventhandler is still called with the error
+ * value supplied by the revoking driver.  This gives listeners who cached
+ * some data in preparation for a level change a chance to clean up.
+ */
+typedef void (*cpufreq_pre_notify_fn)(void *, const struct cf_level *, int *);
+typedef void (*cpufreq_post_notify_fn)(void *, const struct cf_level *, int);
+EVENTHANDLER_DECLARE(cpufreq_pre_change, cpufreq_pre_notify_fn);
+EVENTHANDLER_DECLARE(cpufreq_post_change, cpufreq_post_notify_fn);
+
+/*
+ * Eventhandler called when the available list of levels changed.
+ * The unit number of the device (i.e. "cpufreq0") whose levels changed
+ * is provided so the listener can retrieve the new list of levels.
+ */
+typedef void (*cpufreq_levels_notify_fn)(void *, int);
+EVENTHANDLER_DECLARE(cpufreq_levels_changed, cpufreq_levels_notify_fn);
+
 /* Allow values to be +/- a bit since sometimes we have to estimate. */
 #define CPUFREQ_CMP(x, y)	(abs((x) - (y)) < 25)
 
Index: sys/kern/kern_cpu.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_cpu.c,v
retrieving revision 1.23
diff -u -r1.23 kern_cpu.c
--- sys/kern/kern_cpu.c	3 Mar 2006 02:06:04 -0000	1.23
+++ sys/kern/kern_cpu.c	19 Mar 2007 20:36:53 -0000
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004-2005 Nate Lawson (SDG)
+ * Copyright (c) 2004-2007 Nate Lawson (SDG)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/timetc.h>
+#include <sys/taskqueue.h>
 
 #include "cpufreq_if.h"
 
@@ -73,6 +74,7 @@
 	int				max_mhz;
 	device_t			dev;
 	struct sysctl_ctx_list		sysctl_ctx;
+	struct task			startup_task;
 };
 
 struct cf_setting_array {
@@ -94,8 +96,8 @@
 	} while (0)
 
 static int	cpufreq_attach(device_t dev);
+static void	cpufreq_startup_task(void *ctx, int pending);
 static int	cpufreq_detach(device_t dev);
-static void	cpufreq_evaluate(void *arg);
 static int	cf_set_method(device_t dev, const struct cf_level *level,
 		    int priority);
 static int	cf_get_method(device_t dev, struct cf_level *level);
@@ -127,8 +129,6 @@
 static devclass_t cpufreq_dc;
 DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
 
-static eventhandler_tag	cf_ev_tag;
-
 static int		cf_lowest_freq;
 static int		cf_verbose;
 TUNABLE_INT("debug.cpufreq.lowest", &cf_lowest_freq);
@@ -176,12 +176,25 @@
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
 	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
-	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
-	    NULL, EVENTHANDLER_PRI_ANY);
+
+	/*
+	 * Queue a one-shot broadcast that levels have changed.
+	 * It will run once the system has completed booting.
+	 */
+	TASK_INIT(&sc->startup_task, 0, cpufreq_startup_task, dev);
+	taskqueue_enqueue(taskqueue_thread, &sc->startup_task);
 
 	return (0);
 }
 
+/* Handle any work to be done for all drivers that attached during boot. */
+static void 
+cpufreq_startup_task(void *ctx, int pending)
+{
+
+	cpufreq_settings_changed((device_t)ctx);
+}
+
 static int
 cpufreq_detach(device_t dev)
 {
@@ -202,18 +215,11 @@
 	numdevs = devclass_get_count(cpufreq_dc);
 	if (numdevs == 1) {
 		CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev));
-		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
 	}
 
 	return (0);
 }
 
-static void
-cpufreq_evaluate(void *arg)
-{
-	/* TODO: Re-evaluate when notified of changes to drivers. */
-}
-
 static int
 cf_set_method(device_t dev, const struct cf_level *level, int priority)
 {
@@ -222,25 +228,17 @@
 	struct cf_saved_freq *saved_freq, *curr_freq;
 	struct pcpu *pc;
 	int cpu_id, error, i;
-	static int once;
 
 	sc = device_get_softc(dev);
 	error = 0;
 	set = NULL;
 	saved_freq = NULL;
 
-	/*
-	 * Check that the TSC isn't being used as a timecounter.
-	 * If it is, then return EBUSY and refuse to change the
-	 * clock speed.
-	 */
-	if (strcmp(timecounter->tc_name, "TSC") == 0) {
-		if (!once) {
-			printf("cpufreq: frequency change with timecounter"
-				" TSC not allowed, see cpufreq(4)\n");
-			once = 1;
-		}
-		return (EBUSY);
+	/* We are going to change levels so notify the pre-change handler. */
+	EVENTHANDLER_INVOKE(cpufreq_pre_change, level, &error);
+	if (error != 0) {
+		EVENTHANDLER_INVOKE(cpufreq_post_change, level, error);
+		return (error);
 	}
 
 	CF_MTX_LOCK(&sc->lock);
@@ -378,8 +376,15 @@
 
 out:
 	CF_MTX_UNLOCK(&sc->lock);
+
+	/*
+	 * We changed levels (or attempted to) so notify the post-change
+	 * handler of new frequency or error.
+	 */
+	EVENTHANDLER_INVOKE(cpufreq_post_change, level, error);
 	if (error && set)
 		device_printf(set->dev, "set freq failed, err %d\n", error);
+
 	return (error);
 }
 
@@ -1021,3 +1026,12 @@
 
 	return (0);
 }
+
+int
+cpufreq_settings_changed(device_t dev)
+{
+
+	EVENTHANDLER_INVOKE(cpufreq_levels_changed,
+	    device_get_unit(device_get_parent(dev)));
+	return (0);
+}
Index: sys/i386/i386/tsc.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/tsc.c,v
retrieving revision 1.206
diff -u -r1.206 tsc.c
--- sys/i386/i386/tsc.c	4 Aug 2006 07:56:35 -0000	1.206
+++ sys/i386/i386/tsc.c	19 Mar 2007 21:11:59 -0000
@@ -30,6 +30,9 @@
 #include "opt_clock.h"
 
 #include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
@@ -41,6 +44,8 @@
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
+#include "cpufreq_if.h"
+
 uint64_t	tsc_freq;
 int		tsc_is_broken;
 u_int		tsc_present;
@@ -52,14 +57,21 @@
 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
 #endif
 
+static eventhandler_tag evh_levels_tag, evh_pre_tag, evh_post_tag;
+
+static void tsc_freq_changed(void *arg, const struct cf_level *level,
+    int status);
+static void tsc_freq_changing(void *arg, const struct cf_level *level,
+    int *status);
 static	unsigned tsc_get_timecount(struct timecounter *tc);
+static void tsc_levels_changed(void *arg, int unit);
 
 static struct timecounter tsc_timecounter = {
 	tsc_get_timecount,	/* get_timecount */
 	0,			/* no poll_pps */
- 	~0u,			/* counter_mask */
+	~0u,			/* counter_mask */
 	0,			/* frequency */
-	 "TSC",			/* name */
+	"TSC",			/* name */
 	800,			/* quality (adjusted in code) */
 };
 
@@ -86,9 +98,23 @@
 	tsc_freq = tscval[1] - tscval[0];
 	if (bootverbose)
 		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
+
+	/*
+	 * Inform CPU accounting about our boot-time clock rate.  Once the
+	 * system is finished booting, we will get the real max clock rate
+	 * via tsc_freq_max().  This also will be updated if someone loads
+	 * a cpufreq driver after boot that discovers a new max frequency.
+	 */
 	set_cputicker(rdtsc, tsc_freq, 1);
-}
 
+	/* Register to find out about changes in CPU frequency. */
+	evh_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
+	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
+	evh_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
+	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
+	evh_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
+	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
+}
 
 void
 init_TSC_tc(void)
@@ -128,6 +154,72 @@
 	}
 }
 
+/*
+ * When cpufreq levels change, find out about the (new) max frequency.  We
+ * use this to update CPU accounting in case it got a lower estimate at boot.
+ */
+static void
+tsc_levels_changed(void *arg, int unit)
+{
+	device_t cf_dev;
+	struct cf_level *levels;
+	int count, error;
+	uint64_t max_freq;
+
+	/* Only use values from the first CPU, assuming all are equal. */
+	if (unit != 0)
+		return;
+
+	/* Find the appropriate cpufreq device instance. */
+	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
+	if (cf_dev == NULL) {
+		printf("tsc_levels_changed() called but no cpufreq device?\n");
+		return;
+	}
+
+	/* Get settings from the device and find the max frequency. */
+	count = 64;
+	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	if (levels == NULL)
+		return;
+	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
+	if (error == 0 && count != 0) {
+		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
+		set_cputicker(rdtsc, max_freq, 1);
+	} else
+		printf("tsc_levels_changed: no max freq found\n");
+	free(levels, M_TEMP);
+}
+
+/*
+ * If the TSC timecounter is in use, veto the pending change.  It may be
+ * possible in the future to handle a dynamically-changing timecounter rate.
+ */
+static void
+tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
+{
+
+	if (*status != 0 || timecounter != &tsc_timecounter)
+		return;
+
+	printf("timecounter TSC must not be in use when "
+	     "changing frequencies; change denied\n");
+	*status = EBUSY;
+}
+
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	tsc_freq = (uint64_t)level->total_set.freq * 1000000;
+	tsc_timecounter.tc_frequency = tsc_freq;
+}
+
 static int
 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
 {
Index: sys/i386/i386/identcpu.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/identcpu.c,v
retrieving revision 1.172
diff -u -r1.172 identcpu.c
--- sys/i386/i386/identcpu.c	12 Mar 2007 20:27:21 -0000	1.172
+++ sys/i386/i386/identcpu.c	18 Mar 2007 04:33:02 -0000
@@ -45,6 +45,8 @@
 
 #include <sys/param.h>
 #include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
@@ -1077,6 +1079,21 @@
 	write_eflags(eflags);
 }
 
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	hw_clockrate = level->total_set.freq;
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+
 /*
  * Final stage of CPU identification. -- Should I check TI?
  */
Index: sys/i386/isa/prof_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/prof_machdep.c,v
retrieving revision 1.29
diff -u -r1.29 prof_machdep.c
--- sys/i386/isa/prof_machdep.c	29 Oct 2006 09:48:44 -0000	1.29
+++ sys/i386/isa/prof_machdep.c	19 Mar 2007 21:15:34 -0000
@@ -33,6 +33,9 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/gmon.h>
 #include <sys/kernel.h>
 #include <sys/smp.h>
@@ -54,10 +57,14 @@
 #if defined(PERFMON) && defined(I586_PMC_GUPROF)
 static u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
 static int	cputime_clock_pmc_init;
+static int	cputime_prof_active;
 static struct gmonparam saved_gmp;
 #endif
 #endif /* GUPROF */
 
+static void tsc_freq_changed(void *arg, const struct cf_level *level,
+    int status);
+
 #ifdef __GNUCLIKE_ASM
 __asm("								\n\
 GM_STATE	=	0					\n\
@@ -72,19 +79,19 @@
 	# Check that we are profiling.  Do it early for speed.	\n\
 	#							\n\
 	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
- 	je	.mcount_exit					\n\
- 	#							\n\
- 	# __mcount is the same as [.]mcount except the caller	\n\
- 	# hasn't changed the stack except to call here, so the	\n\
+	je	.mcount_exit					\n\
+	#							\n\
+	# __mcount is the same as [.]mcount except the caller	\n\
+	# hasn't changed the stack except to call here, so the	\n\
 	# caller's raddr is above our raddr.			\n\
 	#							\n\
- 	movl	4(%esp),%edx					\n\
- 	jmp	.got_frompc					\n\
- 								\n\
- 	.p2align 4,0x90						\n\
- 	.globl	.mcount						\n\
+	movl	4(%esp),%edx					\n\
+	jmp	.got_frompc					\n\
+								\n\
+	.p2align 4,0x90						\n\
+	.globl	.mcount						\n\
 .mcount:							\n\
- 	.globl	__cyg_profile_func_enter			\n\
+	.globl	__cyg_profile_func_enter			\n\
 __cyg_profile_func_enter:					\n\
 	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
 	je	.mcount_exit					\n\
@@ -139,7 +146,7 @@
 	.p2align 4,0x90						\n\
 	.globl	.mexitcount					\n\
 .mexitcount:							\n\
- 	.globl	__cyg_profile_func_exit				\n\
+	.globl	__cyg_profile_func_exit				\n\
 __cyg_profile_func_exit:					\n\
 	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
 	jne	.mexitcount_exit				\n\
@@ -287,7 +294,7 @@
 	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
 		cputime_clock = CPUTIME_CLOCK_I8254;
 #if defined(I586_CPU) || defined(I686_CPU)
-		if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus < 2)
+		if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus == 1)
 			cputime_clock = CPUTIME_CLOCK_TSC;
 #endif
 	}
@@ -322,6 +329,7 @@
 	}
 #endif /* PERFMON && I586_PMC_GUPROF */
 #endif /* I586_CPU || I686_CPU */
+	cputime_prof_active = 1;
 	cputime_bias = 0;
 	cputime();
 }
@@ -337,5 +345,22 @@
 		cputime_clock_pmc_init = FALSE;
 	}
 #endif
+	cputime_prof_active = 0;
 }
+
+/* If the cpu frequency changed while profiling, report a warning. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
+		printf("warning: cpu freq changed while profiling active\n");
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+
 #endif /* GUPROF */
Index: sys/amd64/amd64/tsc.c
===================================================================
RCS file: /home/ncvs/src/sys/amd64/amd64/tsc.c,v
retrieving revision 1.206
diff -u -r1.206 tsc.c
--- sys/amd64/amd64/tsc.c	11 Feb 2006 09:33:05 -0000	1.206
+++ sys/amd64/amd64/tsc.c	19 Mar 2007 21:10:34 -0000
@@ -30,6 +30,9 @@
 #include "opt_clock.h"
 
 #include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
@@ -41,6 +44,8 @@
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
+#include "cpufreq_if.h"
+
 uint64_t	tsc_freq;
 int		tsc_is_broken;
 
@@ -51,14 +56,21 @@
 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
 #endif
 
+static eventhandler_tag evh_levels_tag, evh_pre_tag, evh_post_tag;
+
+static void tsc_freq_changed(void *arg, const struct cf_level *level,
+    int status);
+static void tsc_freq_changing(void *arg, const struct cf_level *level,
+    int *status);
 static	unsigned tsc_get_timecount(struct timecounter *tc);
+static void tsc_levels_changed(void *arg, int unit);
 
 static struct timecounter tsc_timecounter = {
 	tsc_get_timecount,	/* get_timecount */
 	0,			/* no poll_pps */
- 	~0u,			/* counter_mask */
+	~0u,			/* counter_mask */
 	0,			/* frequency */
-	 "TSC",			/* name */
+	"TSC",			/* name */
 	800,			/* quality (adjusted in code) */
 };
 
@@ -77,9 +89,23 @@
 	tsc_freq = tscval[1] - tscval[0];
 	if (bootverbose)
 		printf("TSC clock: %lu Hz\n", tsc_freq);
+
+	/*
+	 * Inform CPU accounting about our boot-time clock rate.  Once the
+	 * system is finished booting, we will get the real max clock rate
+	 * via tsc_freq_max().  This also will be updated if someone loads
+	 * a cpufreq driver after boot that discovers a new max frequency.
+	 */
 	set_cputicker(rdtsc, tsc_freq, 1);
-}
 
+	/* Register to find out about changes in CPU frequency. */
+	evh_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
+	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
+	evh_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
+	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
+	evh_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
+	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
+}
 
 void
 init_TSC_tc(void)
@@ -104,6 +130,72 @@
 	}
 }
 
+/*
+ * When cpufreq levels change, find out about the (new) max frequency.  We
+ * use this to update CPU accounting in case it got a lower estimate at boot.
+ */
+static void
+tsc_levels_changed(void *arg, int unit)
+{
+	device_t cf_dev;
+	struct cf_level *levels;
+	int count, error;
+	uint64_t max_freq;
+
+	/* Only use values from the first CPU, assuming all are equal. */
+	if (unit != 0)
+		return;
+
+	/* Find the appropriate cpufreq device instance. */
+	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
+	if (cf_dev == NULL) {
+		printf("tsc_levels_changed() called but no cpufreq device?\n");
+		return;
+	}
+
+	/* Get settings from the device and find the max frequency. */
+	count = 64;
+	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	if (levels == NULL)
+		return;
+	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
+	if (error == 0 && count != 0) {
+		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
+		set_cputicker(rdtsc, max_freq, 1);
+	} else
+		printf("tsc_levels_changed: no max freq found\n");
+	free(levels, M_TEMP);
+}
+
+/*
+ * If the TSC timecounter is in use, veto the pending change.  It may be
+ * possible in the future to handle a dynamically-changing timecounter rate.
+ */
+static void
+tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
+{
+
+	if (*status != 0 || timecounter != &tsc_timecounter)
+		return;
+
+	printf("timecounter TSC must not be in use when "
+	     "changing frequencies; change denied\n");
+	*status = EBUSY;
+}
+
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	tsc_freq = (uint64_t)level->total_set.freq * 1000000;
+	tsc_timecounter.tc_frequency = tsc_freq;
+}
+
 static int
 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
 {
Index: sys/amd64/amd64/identcpu.c
===================================================================
RCS file: /home/ncvs/src/sys/amd64/amd64/identcpu.c,v
retrieving revision 1.150
diff -u -r1.150 identcpu.c
--- sys/amd64/amd64/identcpu.c	12 Mar 2007 20:27:21 -0000	1.150
+++ sys/amd64/amd64/identcpu.c	19 Mar 2007 21:07:23 -0000
@@ -45,6 +45,8 @@
 
 #include <sys/param.h>
 #include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
@@ -404,6 +406,21 @@
 }
 
 
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	hw_clockrate = level->total_set.freq;
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+
 /*
  * Final stage of CPU identification. -- Should I check TI?
  */
Index: sys/amd64/amd64/prof_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/amd64/amd64/prof_machdep.c,v
retrieving revision 1.28
diff -u -r1.28 prof_machdep.c
--- sys/amd64/amd64/prof_machdep.c	29 Oct 2006 09:48:44 -0000	1.28
+++ sys/amd64/amd64/prof_machdep.c	19 Mar 2007 21:14:31 -0000
@@ -35,6 +35,9 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/gmon.h>
 #include <sys/kernel.h>
 #include <sys/smp.h>
@@ -58,10 +61,14 @@
 #if defined(PERFMON) && defined(I586_PMC_GUPROF)
 static u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
 static int	cputime_clock_pmc_init;
+static int	cputime_prof_active;
 static struct gmonparam saved_gmp;
 #endif
 #endif /* GUPROF */
 
+static void tsc_freq_changed(void *arg, const struct cf_level *level,
+    int status);
+
 #ifdef __GNUCLIKE_ASM
 __asm("								\n\
 GM_STATE	=	0					\n\
@@ -76,10 +83,10 @@
 	# Check that we are profiling.  Do it early for speed.	\n\
 	#							\n\
 	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
- 	je	.mcount_exit					\n\
- 	#							\n\
- 	# __mcount is the same as [.]mcount except the caller	\n\
- 	# hasn't changed the stack except to call here, so the	\n\
+	je	.mcount_exit					\n\
+	#							\n\
+	# __mcount is the same as [.]mcount except the caller	\n\
+	# hasn't changed the stack except to call here, so the	\n\
 	# caller's raddr is above our raddr.			\n\
 	#							\n\
 	pushq	%rax						\n\
@@ -90,12 +97,12 @@
 	pushq	%r8						\n\
 	pushq	%r9						\n\
 	movq	7*8+8(%rsp),%rdi				\n\
- 	jmp	.got_frompc					\n\
- 								\n\
- 	.p2align 4,0x90						\n\
- 	.globl	.mcount						\n\
+	jmp	.got_frompc					\n\
+								\n\
+	.p2align 4,0x90						\n\
+	.globl	.mcount						\n\
 .mcount:							\n\
- 	.globl	__cyg_profile_func_enter			\n\
+	.globl	__cyg_profile_func_enter			\n\
 __cyg_profile_func_enter:					\n\
 	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
 	je	.mcount_exit					\n\
@@ -161,7 +168,7 @@
 	.p2align 4,0x90						\n\
 	.globl	.mexitcount					\n\
 .mexitcount:							\n\
- 	.globl	__cyg_profile_func_exit				\n\
+	.globl	__cyg_profile_func_exit				\n\
 __cyg_profile_func_exit:					\n\
 	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
 	jne	.mexitcount_exit				\n\
@@ -314,7 +321,7 @@
 {
 	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
 		cputime_clock = CPUTIME_CLOCK_I8254;
-		if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus < 2)
+		if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus == 1)
 			cputime_clock = CPUTIME_CLOCK_TSC;
 	}
 	gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
@@ -346,6 +353,7 @@
 		}
 	}
 #endif /* PERFMON && I586_PMC_GUPROF */
+	cputime_prof_active = 1;
 	cputime_bias = 0;
 	cputime();
 }
@@ -361,5 +369,22 @@
 		cputime_clock_pmc_init = FALSE;
 	}
 #endif
+	cputime_prof_active = 0;
 }
+
+/* If the cpu frequency changed while profiling, report a warning. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
+		printf("warning: cpu freq changed while profiling active\n");
+}
+
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+
 #endif /* GUPROF */
Index: sys/contrib/altq/altq/altq_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/contrib/altq/altq/altq_subr.c,v
retrieving revision 1.8
diff -u -r1.8 altq_subr.c
--- sys/contrib/altq/altq/altq_subr.c	2 Mar 2006 00:51:39 -0000	1.8
+++ sys/contrib/altq/altq/altq_subr.c	19 Mar 2007 21:04:00 -0000
@@ -74,6 +74,9 @@
 #if __FreeBSD__ < 3
 #include "opt_cpu.h"	/* for FreeBSD-2.2.8 to get i586_ctr_freq */
 #endif
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <machine/clock.h>
 #endif
 #if defined(__i386__)
@@ -898,6 +901,22 @@
 extern u_int64_t cpu_tsc_freq;
 #endif /* __alpha__ */
 
+#if (__FreeBSD_version > 700030)
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+	/* If there was an error during the transition, don't do anything. */
+	if (status != 0)
+		return;
+
+	/* Total setting for this level gives the new frequency in MHz. */
+	machclk_freq = level->total_set.freq * 1000000;
+}
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+    EVENTHANDLER_PRI_ANY);
+#endif /* __FreeBSD_version > 700030 */
+
 void
 init_machclk(void)
 {


More information about the freebsd-arch mailing list