New cpufreq framework and drivers

Kevin Oberman oberman at es.net
Wed Feb 2 14:32:55 PST 2005


> Date: Tue, 01 Feb 2005 11:01:49 -0800
> From: Nate Lawson <nate at root.org>
> Sender: owner-freebsd-acpi at freebsd.org
> 
> Eric Anderson wrote:
> > Nate Lawson wrote:
> > 
> >> Nate Lawson wrote:
> >>
> >>> Below is the first patch of cpufreq for wider testing.  It has the 
> >>> framework, cpu pseudodriver updates, and two hardware drivers -- ACPI 
> >>> performance states and SpeedStep-ICH.  It has had a lot of testing on 
> >>> supported hardware but needs wider testing before importing.  Other 
> >>> hardware drivers can be quickly ported to this interface, and I'm 
> >>> happy to assist their maintainers.
> >>>
> >>> http://www.root.org/~nate/freebsd/cpufreq.diff
> >>
> >>
> >>
> >> Sorry, I'm so familiar with the interface that I forgot to mention how 
> >> to use it.  To test, build a new kernel and modules.  Load one or both 
> >> of acpi_perf.ko and cpufreq.ko at boot time.  Type "sysctl dev.cpu" to 
> >> see the new freq and freq_levels output.  If you're a driver 
> >> maintainer, see sys/cpu.h and speedstep_ich.c or acpi_perf.c for an 
> >> example how to provide the driver interface.
> >>
> > 
> > Will this only work on -current, or also 5.3-stable?
> 
> It should work on -stable, modulo any diff fuzz.
> 
> -- 
> Nate
> _______________________________________________
> freebsd-acpi at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-acpi
> To unsubscribe, send any mail to "freebsd-acpi-unsubscribe at freebsd.org"
> 

A bit more that fuzz, I'm afraid, but nothing that should take more than
a little effort, but it's not very difficult.

I am attaching diffs against -stable. Since it's your code, I did not
feel like I should be sending them to others. I'll leave it up to you as
to whether to post them. (And, it's possible that I messed something up,
too, although I did test it.) It requires -stable as of 12/8/2005 or newer.
-- 
R. Kevin Oberman, Network Engineer
Energy Sciences Network (ESnet)
Ernest O. Lawrence Berkeley National Laboratory (Berkeley Lab)
E-mail: oberman at es.net			Phone: +1 510 486-8634

-------------- next part --------------
Index: sys/cpu.h
===================================================================
RCS file: sys/cpu.h
diff -N sys/cpu.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/cpu.h	1 Feb 2005 16:54:24 -0000
@@ -0,0 +1,121 @@
+/*-
+ * Copyright (c) 2005 Nate Lawson (SDG)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_CPU_H_
+#define _SYS_CPU_H_
+
+/*
+ * CPU device support.
+ */
+
+#define CPU_IVAR_PCPU		1
+
+static __inline struct pcpu *cpu_get_pcpu(device_t dev)
+{
+	uintptr_t v = 0;
+	BUS_READ_IVAR(device_get_parent(dev), dev, CPU_IVAR_PCPU, &v);
+	return ((struct pcpu *)v);
+}
+
+/*
+ * CPU frequency control interface.
+ */
+
+/* Each driver's CPU frequency setting is exported in this format. */
+struct cf_setting {
+	int	freq;	/* Processor clock in Mhz or percent (in 100ths.) */
+	int	volts;	/* Voltage in mV. */
+	int	power;	/* Power consumed in mW. */
+	int	lat;	/* Transition latency in us. */
+	device_t dev;	/* Driver providing this setting. */
+};
+
+/* Maximum number of settings a given driver can have. */
+#define MAX_SETTINGS		24
+
+/* A combination of settings is a level. */
+struct cf_level {
+	struct cf_setting	total_set;
+	struct cf_setting	abs_set;
+	struct cf_setting	rel_set[MAX_SETTINGS];
+	int			rel_count;
+	TAILQ_ENTRY(cf_level)	link;
+};
+
+TAILQ_HEAD(cf_level_lst, cf_level);
+
+/* Drivers should set all unknown values to this. */
+#define CPUFREQ_VAL_UNKNOWN	(-1)
+
+/*
+ * Every driver offers a type of CPU control.  Absolute levels are mutually
+ * exclusive while relative levels modify the current absolute level.  There
+ * may be multiple absolute and relative drivers available on a given
+ * system.
+ *
+ * For example, consider a system with two absolute drivers that provide
+ * frequency settings of 100, 200 and 300, 400 and a relative driver that
+ * provides settings of 50%, 100%.  The cpufreq core would export frequency
+ * levels of 50, 100, 150, 200, 300, 400.
+ */
+#define CPUFREQ_TYPE_RELATIVE	(1<<0)
+#define CPUFREQ_TYPE_ABSOLUTE	(1<<1)
+
+/*
+ * When setting a level, the caller indicates the priority of this request.
+ * Priorities determine, among other things, whether a level can be
+ * overridden by other callers.  For example, if the user sets a level but
+ * the system thermal driver needs to override it for emergency cooling,
+ * the driver would use a higher priority.  Once the event has passed, the
+ * driver would call cpufreq to resume any previous level.
+ */
+#define CPUFREQ_PRIO_HIGHEST	1000000
+#define CPUFREQ_PRIO_KERN	1000
+#define CPUFREQ_PRIO_USER	100
+#define CPUFREQ_PRIO_LOWEST	0
+
+/*
+ * Register and unregister a driver with the cpufreq core.  Once a driver
+ * is registered, it must support calls to its CPUFREQ_GET, CPUFREQ_GET_LEVEL,
+ * and CPUFREQ_SET methods.  It must also unregister before returning from
+ * its DEVICE_DETACH method.
+ */
+int	cpufreq_register(device_t dev);
+int	cpufreq_unregister(device_t dev);
+
+/* Allow values to be +/- a bit since sometimes we have to estimate. */
+#define CPUFREQ_CMP(x, y)	(abs((x) - (y)) < 25)
+
+/*
+ * Machine-dependent functions.
+ */
+
+/* Estimate the current clock rate for the given CPU id. */
+int	cpu_est_clockrate(int cpu_id, uint64_t *rate);
+
+#endif /* !_SYS_CPU_H_ */
Index: alpha/alpha/machdep.c
Index: alpha/alpha/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/alpha/alpha/machdep.c,v
--- alpha/alpha/machdep.c.orig	Wed Feb  2 11:45:02 2005
+++ alpha/alpha/machdep.c	Wed Feb  2 12:40:01 2005
@@ -98,6 +98,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
@@ -1720,6 +1721,14 @@
 void
 cpu_boot(int howto)
 {
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+
+	return (ENXIO);
 }
 
 /*
Index: amd64/amd64/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/amd64/amd64/machdep.c,v
--- amd64/amd64/machdep.c.orig	Wed Feb  2 11:45:02 2005
+++ amd64/amd64/machdep.c	Wed Feb  2 12:40:01 2005
@@ -56,8 +56,12 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/sysproto.h>
-#include <sys/signalvar.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
@@ -446,6 +450,44 @@
 void
 cpu_boot(int howto)
 {
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+	uint64_t tsc1, tsc2;
+
+	if (pcpu_find(cpu_id) == NULL || rate == NULL)
+		return (EINVAL);
+
+	/* If we're booting, trust the rate calibrated moments ago. */
+	if (cold) {
+		*rate = tsc_freq;
+		return (0);
+	}
+
+#ifdef SMP
+	/* Schedule ourselves on the indicated cpu. */
+	mtx_lock_spin(&sched_lock);
+	sched_bind(curthread, cpu_id);
+	mtx_unlock_spin(&sched_lock);
+#endif
+
+	/* Calibrate by measuring a short delay. */
+	tsc1 = rdtsc();
+	DELAY(1000);
+	tsc2 = rdtsc();
+
+#ifdef SMP
+	mtx_lock_spin(&sched_lock);
+	sched_unbind(curthread);
+	mtx_unlock_spin(&sched_lock);
+#endif
+
+	tsc_freq = (tsc2 - tsc1) * 1000;
+	*rate = tsc_freq;
+	return (0);
 }
 
 /*
Index: i386/i386/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/machdep.c,v
--- i386/i386/machdep.c.orig	Wed Feb  2 11:45:02 2005
+++ i386/i386/machdep.c	Wed Feb  2 12:40:01 2005
@@ -56,8 +56,12 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/sysproto.h>
-#include <sys/signalvar.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
@@ -66,21 +70,18 @@
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
+#include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
 #include <sys/reboot.h>
-#include <sys/callout.h>
-#include <sys/msgbuf.h>
 #include <sys/sched.h>
-#include <sys/sysent.h>
+#include <sys/signalvar.h>
 #include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
-#include <sys/bus.h>
-#include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -105,6 +106,7 @@
 
 #include <net/netisr.h>
 
+#include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
@@ -1020,6 +1022,46 @@
 void
 cpu_boot(int howto)
 {
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+	uint64_t tsc1, tsc2;
+
+	if (pcpu_find(cpu_id) == NULL || rate == NULL)
+		return (EINVAL);
+	if (!tsc_present)
+		return (EOPNOTSUPP);
+
+	/* If we're booting, trust the rate calibrated moments ago. */
+	if (cold) {
+		*rate = tsc_freq;
+		return (0);
+	}
+
+#ifdef SMP
+	/* Schedule ourselves on the indicated cpu. */
+	mtx_lock_spin(&sched_lock);
+	sched_bind(curthread, cpu_id);
+	mtx_unlock_spin(&sched_lock);
+#endif
+
+	/* Calibrate by measuring a short delay. */
+	tsc1 = rdtsc();
+	DELAY(1000);
+	tsc2 = rdtsc();
+
+#ifdef SMP
+	mtx_lock_spin(&sched_lock);
+	sched_unbind(curthread);
+	mtx_unlock_spin(&sched_lock);
+#endif
+
+	tsc_freq = (tsc2 - tsc1) * 1000;
+	*rate = tsc_freq;
+	return (0);
 }
 
 /*
Index: i386/i386/legacy.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/legacy.c,v
--- i386/i386/legacy.c.orig	Wed Feb  2 12:36:23 2005
+++ i386/i386/legacy.c	Wed Feb  2 12:40:02 2005
@@ -38,6 +38,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
@@ -137,32 +138,25 @@
 {
 	device_t child;
 	int i;
-	struct pcpu *pc;
 
+	/* First, attach the CPU pseudo-driver. */
+	for (i = 0; i <= mp_maxid; i++)
+		if (!CPU_ABSENT(i)) {
+			child = BUS_ADD_CHILD(dev, 0, "cpu", i);
+			if (child == NULL)
+				panic("legacy_attach cpu");
+			device_probe_and_attach(child);
+		}
+
+#ifndef PC98
 	/*
-	 * First, let our child driver's identify any child devices that
+	 * Second, let our child driver's identify any child devices that
 	 * they can find.  Once that is done attach any devices that we
 	 * found.
 	 */
 	bus_generic_probe(dev);
 	bus_generic_attach(dev);
 
-	/* Attach CPU pseudo-driver. */
-	if (!devclass_get_device(devclass_find("cpu"), 0)) {
-		for (i = 0; i <= mp_maxid; i++)
-			if (!CPU_ABSENT(i)) {
-				pc = pcpu_find(i);
-				KASSERT(pc != NULL, ("pcpu_find failed"));
-				child = BUS_ADD_CHILD(dev, 0, "cpu", i);
-				if (child == NULL)
-					panic("legacy_attach cpu");
-				device_probe_and_attach(child);
-				pc->pc_device = child;
-				device_set_ivars(child, pc);
-			}
-	}
-
-#ifndef PC98
 	/*
 	 * If we didn't see EISA or ISA on a pci bridge, create some
 	 * connection points now so they show up "on motherboard".
@@ -265,6 +259,14 @@
  */
 static int	cpu_read_ivar(device_t dev, device_t child, int index,
 		    uintptr_t *result);
+static device_t cpu_add_child(device_t bus, int order, const char *name,
+		    int unit);
+static struct resource_list *cpu_get_rlist(device_t dev, device_t child);
+
+struct cpu_device {
+	struct resource_list cd_rl;
+	struct pcpu *cd_pcpu;
+};
 
 static device_method_t cpu_methods[] = {
 	/* Device interface */
@@ -276,10 +278,15 @@
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* Bus interface */
+	DEVMETHOD(bus_add_child,	cpu_add_child),
 	DEVMETHOD(bus_read_ivar,	cpu_read_ivar),
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
-	DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
-	DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
+	DEVMETHOD(bus_get_resource_list, cpu_get_rlist),
+	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
+	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
+	DEVMETHOD(bus_alloc_resource,	bus_generic_rl_alloc_resource),
+	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
+	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
@@ -293,19 +300,50 @@
 	cpu_methods,
 	1,		/* no softc */
 };
-static devclass_t cpu_devclass;
+devclass_t cpu_devclass;
 DRIVER_MODULE(cpu, legacy, cpu_driver, cpu_devclass, 0, 0);
 
+static device_t
+cpu_add_child(device_t bus, int order, const char *name, int unit)
+{
+	struct cpu_device *cd;
+	device_t child;
+	struct pcpu *pc;
+
+	if ((cd = malloc(sizeof(*cd), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
+		return (NULL);
+
+	resource_list_init(&cd->cd_rl);
+	pc = pcpu_find(unit);
+	KASSERT(pc != NULL, ("pcpu_find failed"));
+	cd->cd_pcpu = pc;
+
+	child = device_add_child_ordered(bus, order, name, unit);
+	if (child != NULL) {
+		pc->pc_device = child;
+		device_set_ivars(child, cd);
+	} else
+		free(cd, M_DEVBUF);
+	return (child);
+}
+
+static struct resource_list *
+cpu_get_rlist(device_t dev, device_t child)
+{
+	struct cpu_device *cpdev;
+
+	cpdev = device_get_ivars(child);
+	return (&cpdev->cd_rl);
+}
+
 static int
 cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
-	struct pcpu *pc;
+	struct cpu_device *cpdev;
 
-	if (index != 0)
-		return (ENOENT);
-	pc = device_get_ivars(child);
-	if (pc == NULL)
+	if (index != CPU_IVAR_PCPU)
 		return (ENOENT);
-	*result = (uintptr_t)pc;
+	cpdev = device_get_ivars(dev);
+	*result = (uintptr_t)cpdev->cd_pcpu;
 	return (0);
 }
Index: ia64/ia64/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/ia64/ia64/machdep.c,v
--- ia64/ia64/machdep.c.orig	Wed Feb  2 11:45:03 2005
+++ ia64/ia64/machdep.c	Wed Feb  2 12:40:02 2005
@@ -34,6 +34,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/kdb.h>
 #include <sys/sysproto.h>
@@ -283,6 +284,17 @@
 {
 
 	ia64_efi_runtime->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, 0);
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+
+	if (pcpu_find(cpu_id) == NULL || rate == NULL)
+		return (EINVAL);
+	*rate = processor_frequency;
+	return (0);
 }
 
 void
Index: sparc64/sparc64/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/sparc64/sparc64/machdep.c,v
--- sparc64/sparc64/machdep.c.orig	Sat Nov 20 19:47:34 2004
+++ sparc64/sparc64/machdep.c	Wed Feb  2 12:40:02 2005
@@ -44,6 +44,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
+#include <sys/cpu.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
@@ -671,6 +672,14 @@
 	cpu_mp_shutdown();
 #endif
 	openfirmware_exit(args);
+}
+
+/* Get current clock frequency for the given cpu id. */
+int
+cpu_est_clockrate(int cpu_id, uint64_t *rate)
+{
+
+	return (ENXIO);
 }
 
 /*
Index: dev/acpica/acpi_cpu.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/acpi_cpu.c,v
--- dev/acpica/acpi_cpu.c.orig	Sat Nov 20 19:47:24 2004
+++ dev/acpica/acpi_cpu.c	Wed Feb  2 12:40:02 2005
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2003 Nate Lawson (SDG)
+ * Copyright (c) 2003-2005 Nate Lawson (SDG)
  * Copyright (c) 2001 Michael Smith
  * All rights reserved.
  *
@@ -71,7 +71,8 @@
 struct acpi_cpu_softc {
     device_t		 cpu_dev;
     ACPI_HANDLE		 cpu_handle;
-    uint32_t		 acpi_id;	/* ACPI processor id */
+    struct pcpu		*cpu_pcpu;
+    uint32_t		 cpu_acpi_id;	/* ACPI processor id */
     uint32_t		 cpu_p_blk;	/* ACPI P_BLK location */
     uint32_t		 cpu_p_blk_len;	/* P_BLK length (must be 6). */
     struct resource	*cpu_p_cnt;	/* Throttling control register */
@@ -80,6 +81,10 @@
     int			 cpu_prev_sleep;/* Last idle sleep duration. */
 };
 
+struct acpi_cpu_device {
+    struct resource_list        ad_rl;
+};
+
 #define CPU_GET_REG(reg, width) 					\
     (bus_space_read_ ## width(rman_get_bustag((reg)), 			\
 		      rman_get_bushandle((reg)), 0))
@@ -127,6 +132,8 @@
 static u_int		 cpu_cx_stats[MAX_CX_STATES];/* Cx usage history. */
 
 /* Values for sysctl. */
+static struct sysctl_ctx_list acpi_cpu_sysctl_ctx;
+static struct sysctl_oid *acpi_cpu_sysctl_tree;
 static uint32_t		 cpu_throttle_state;
 static uint32_t		 cpu_throttle_max;
 static int		 cpu_cx_lowest;
@@ -137,13 +144,15 @@
 static struct acpi_cpu_softc **cpu_softc;
 ACPI_SERIAL_DECL(cpu, "ACPI CPU");
 
-static struct sysctl_ctx_list	acpi_cpu_sysctl_ctx;
-static struct sysctl_oid	*acpi_cpu_sysctl_tree;
-
 static int	acpi_cpu_probe(device_t dev);
 static int	acpi_cpu_attach(device_t dev);
 static int	acpi_pcpu_get_id(uint32_t idx, uint32_t *acpi_id,
-				 uint32_t *cpu_id);
+		    uint32_t *cpu_id);
+static struct resource_list *acpi_cpu_get_rlist(device_t dev, device_t child);
+static device_t	acpi_cpu_add_child(device_t dev, int order, const char *name,
+		    int unit);
+static int	acpi_cpu_read_ivar(device_t dev, device_t child, int index,
+		    uintptr_t *result);
 static int	acpi_cpu_shutdown(device_t dev);
 static int	acpi_cpu_throttle_probe(struct acpi_cpu_softc *sc);
 static int	acpi_cpu_cx_probe(struct acpi_cpu_softc *sc);
@@ -163,7 +172,24 @@
     /* Device interface */
     DEVMETHOD(device_probe,	acpi_cpu_probe),
     DEVMETHOD(device_attach,	acpi_cpu_attach),
+    DEVMETHOD(device_detach,	bus_generic_detach),
     DEVMETHOD(device_shutdown,	acpi_cpu_shutdown),
+    DEVMETHOD(device_suspend,	bus_generic_suspend),
+    DEVMETHOD(device_resume,	bus_generic_resume),
+
+    /* Bus interface */
+    DEVMETHOD(bus_add_child,	acpi_cpu_add_child),
+    DEVMETHOD(bus_read_ivar,	acpi_cpu_read_ivar),
+    DEVMETHOD(bus_get_resource_list, acpi_cpu_get_rlist),
+    DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
+    DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
+    DEVMETHOD(bus_alloc_resource, bus_generic_rl_alloc_resource),
+    DEVMETHOD(bus_release_resource, bus_generic_rl_release_resource),
+    DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
+    DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+    DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+    DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
+    DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
 
     {0, 0}
 };
@@ -174,8 +200,8 @@
     sizeof(struct acpi_cpu_softc),
 };
 
-static devclass_t acpi_cpu_devclass;
-DRIVER_MODULE(cpu, acpi, acpi_cpu_driver, acpi_cpu_devclass, 0, 0);
+extern devclass_t cpu_devclass;
+DRIVER_MODULE(cpu, acpi, acpi_cpu_driver, cpu_devclass, 0, 0);
 MODULE_DEPEND(cpu, acpi, 1, 1, 1);
 
 static int
@@ -265,17 +291,22 @@
 {
     ACPI_BUFFER		   buf;
     ACPI_OBJECT		   *obj;
+    struct pcpu		   *pcpu_data;
     struct acpi_cpu_softc *sc;
     struct acpi_softc	  *acpi_sc;
     ACPI_STATUS		   status;
-    int			   thr_ret, cx_ret;
+    int			   cx_ret, cpu_id, thr_ret;
 
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
     sc = device_get_softc(dev);
     sc->cpu_dev = dev;
     sc->cpu_handle = acpi_get_handle(dev);
-    cpu_softc[acpi_get_magic(dev)] = sc;
+    cpu_id = acpi_get_magic(dev);
+    cpu_softc[cpu_id] = sc;
+    pcpu_data = pcpu_find(cpu_id);
+    pcpu_data->pc_device = dev;
+    sc->cpu_pcpu = pcpu_data;
 
     buf.Pointer = NULL;
     buf.Length = ACPI_ALLOCATE_BUFFER;
@@ -288,7 +319,7 @@
     obj = (ACPI_OBJECT *)buf.Pointer;
     sc->cpu_p_blk = obj->Processor.PblkAddress;
     sc->cpu_p_blk_len = obj->Processor.PblkLength;
-    sc->acpi_id = obj->Processor.ProcId;
+    sc->cpu_acpi_id = obj->Processor.ProcId;
     AcpiOsFree(obj);
     ACPI_DEBUG_PRINT((ACPI_DB_INFO, "acpi_cpu%d: P_BLK at %#x/%d\n",
 		     device_get_unit(dev), sc->cpu_p_blk, sc->cpu_p_blk_len));
@@ -296,8 +327,8 @@
     acpi_sc = acpi_device_get_parent_softc(dev);
     sysctl_ctx_init(&acpi_cpu_sysctl_ctx);
     acpi_cpu_sysctl_tree = SYSCTL_ADD_NODE(&acpi_cpu_sysctl_ctx,
-				SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree),
-				OID_AUTO, "cpu", CTLFLAG_RD, 0, "");
+	SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree), OID_AUTO, "cpu",
+	CTLFLAG_RD, 0, "");
 
     /*
      * Probe for throttling and Cx state support.
@@ -314,7 +345,11 @@
 	sysctl_ctx_free(&acpi_cpu_sysctl_ctx);
     }
 
-    return_VALUE (0);
+    /* Call identify and then probe/attach for cpu child drivers. */
+    bus_generic_probe(dev);
+    bus_generic_attach(dev);
+
+    return (0);
 }
 
 /*
@@ -353,11 +388,61 @@
     return (ESRCH);
 }
 
+static struct resource_list *
+acpi_cpu_get_rlist(device_t dev, device_t child)
+{
+    struct acpi_cpu_device *ad;
+
+    ad = device_get_ivars(child);
+    if (ad == NULL)
+	return (NULL);
+    return (&ad->ad_rl);
+}
+
+static device_t
+acpi_cpu_add_child(device_t dev, int order, const char *name, int unit)
+{
+    struct acpi_cpu_device  *ad;
+    device_t            child;
+
+    if ((ad = malloc(sizeof(*ad), M_TEMP, M_NOWAIT | M_ZERO)) == NULL)
+        return (NULL);
+
+    resource_list_init(&ad->ad_rl);
+    
+    child = device_add_child_ordered(dev, order, name, unit);
+    if (child != NULL)
+        device_set_ivars(child, ad);
+    return (child);
+}
+
+static int
+acpi_cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
+{
+    struct acpi_cpu_softc *sc;
+
+    sc = device_get_softc(dev);
+    switch (index) {
+    case ACPI_IVAR_HANDLE:
+	*result = (uintptr_t)sc->cpu_handle;
+	break;
+    case CPU_IVAR_PCPU:
+	*result = (uintptr_t)sc->cpu_pcpu;
+	break;
+    default:
+	return (ENOENT);
+    }
+    return (0);
+}
+
 static int
 acpi_cpu_shutdown(device_t dev)
 {
     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 
+    /* Allow children to shutdown first. */
+    bus_generic_shutdown(dev);
+
     /* Disable any entry to the idle function. */
     cpu_cx_count = 0;
 
@@ -668,7 +753,7 @@
     int count, i;
 
     /* Get set of CPU devices */
-    devclass_get_devices(acpi_cpu_devclass, &cpu_devices, &cpu_ndevices);
+    devclass_get_devices(cpu_devclass, &cpu_devices, &cpu_ndevices);
 
     /* Check for quirks via the first CPU device. */
     sc = device_get_softc(cpu_devices[0]);
Index: dev/acpica/acpi_perf.c
===================================================================
RCS file: dev/acpica/acpi_perf.c
diff -N dev/acpica/acpi_perf.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ dev/acpica/acpi_perf.c	1 Feb 2005 16:32:55 -0000
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 2003-2005 Nate Lawson (SDG)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/power.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sbuf.h>
+#include <sys/pcpu.h>
+
+#include <machine/bus_pio.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/rman.h>
+
+#include "acpi.h"
+#include <dev/acpica/acpivar.h>
+
+#include "cpufreq_if.h"
+
+/*
+ * Support for ACPI processor performance states (Px) according to
+ * section x of the ACPI specification.
+ */
+
+struct acpi_px {
+	uint32_t	 core_freq;
+	uint32_t	 power;
+	uint32_t	 trans_lat;
+	uint32_t	 bm_lat;
+	uint32_t	 ctrl_val;
+	uint32_t	 sts_val;
+};
+
+#define MAX_PX_STATES	 16
+
+struct acpi_perf_softc {
+	device_t	 dev;
+	ACPI_HANDLE	 handle;
+	struct resource	*perf_ctrl;	/* Set new performance state. */
+	struct resource	*perf_status;	/* Check that transition succeeded. */
+	struct acpi_px	*px_states;	/* ACPI perf states. */
+	uint32_t	 px_count;	/* Total number of perf states. */
+	uint32_t	 px_max_avail;	/* Lowest index state available. */
+	int		 px_curr_state;	/* Active state index. */
+	int		 px_rid;
+};
+
+#define PX_GET_REG(reg) 				\
+	(bus_space_read_4(rman_get_bustag((reg)), 	\
+	    rman_get_bushandle((reg)), 0))
+#define PX_SET_REG(reg, val)				\
+	(bus_space_write_4(rman_get_bustag((reg)), 	\
+	    rman_get_bushandle((reg)), 0, (val)))
+
+static void	acpi_perf_identify(driver_t *driver, device_t parent);
+static int	acpi_perf_probe(device_t dev);
+static int	acpi_perf_attach(device_t dev);
+static int	acpi_perf_detach(device_t dev);
+static int	acpi_perf_evaluate(device_t dev);
+static int	acpi_px_to_set(device_t dev, struct acpi_px *px,
+		    struct cf_setting *set);
+static void	acpi_px_available(struct acpi_perf_softc *sc);
+static void	acpi_px_notify(ACPI_HANDLE h, UINT32 notify, void *context);
+static int	acpi_px_settings(device_t dev, struct cf_setting *sets,
+		    int *count, int *type);
+static int	acpi_px_set(device_t dev, const struct cf_setting *set);
+static int	acpi_px_get(device_t dev, struct cf_setting *set);
+
+static device_method_t acpi_perf_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_identify,	acpi_perf_identify),
+	DEVMETHOD(device_probe,		acpi_perf_probe),
+	DEVMETHOD(device_attach,	acpi_perf_attach),
+	DEVMETHOD(device_detach,	acpi_perf_detach),
+
+	/* cpufreq interface */
+	DEVMETHOD(cpufreq_drv_set,	acpi_px_set),
+	DEVMETHOD(cpufreq_drv_get,	acpi_px_get),
+	DEVMETHOD(cpufreq_drv_settings,	acpi_px_settings),
+	{0, 0}
+};
+
+static driver_t acpi_perf_driver = {
+	"acpi_perf",
+	acpi_perf_methods,
+	sizeof(struct acpi_perf_softc),
+};
+
+static devclass_t acpi_perf_devclass;
+DRIVER_MODULE(acpi_perf, cpu, acpi_perf_driver, acpi_perf_devclass, 0, 0);
+MODULE_DEPEND(acpi_perf, acpi, 1, 1, 1);
+
+MALLOC_DEFINE(M_ACPIPERF, "acpi_perf", "ACPI Performance states");
+
+static void
+acpi_perf_identify(driver_t *driver, device_t parent)
+{
+	device_t child;
+	ACPI_HANDLE handle;
+
+	/* Make sure we're not being doubly invoked. */
+	if (device_find_child(parent, "acpi_perf", 0) != NULL)
+		return;
+
+	/* Get the handle for the Processor object and check for perf states. */
+	handle = acpi_get_handle(parent);
+	if (handle == NULL)
+		return;
+	if (ACPI_FAILURE(AcpiEvaluateObject(handle, "_PSS", NULL, NULL)))
+		return;
+	if ((child = BUS_ADD_CHILD(parent, 0, "acpi_perf", 0)) == NULL)
+		device_printf(parent, "acpi_perf: add child failed\n");
+}
+
+static int
+acpi_perf_probe(device_t dev)
+{
+
+	device_set_desc(dev, "ACPI CPU Frequency Control");
+	return (-10);
+}
+
+static int
+acpi_perf_attach(device_t dev)
+{
+	struct acpi_perf_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+	sc->handle = acpi_get_handle(dev);
+	sc->px_max_avail = 0;
+	sc->px_curr_state = CPUFREQ_VAL_UNKNOWN;
+	if (acpi_perf_evaluate(dev) != 0)
+		return (ENXIO);
+	cpufreq_register(dev);
+
+	return (0);
+}
+
+static int
+acpi_perf_detach(device_t dev)
+{
+	/* TODO: teardown registers, remove notify handler. */
+	return (ENXIO);
+}
+
+/* Probe and setup any valid performance states (Px). */
+static int
+acpi_perf_evaluate(device_t dev)
+{
+	struct acpi_perf_softc *sc;
+	ACPI_BUFFER buf;
+	ACPI_OBJECT *pkg, *res;
+	ACPI_STATUS status;
+	int i, j;
+	uint32_t *p;
+
+	/* Get the control values and parameters for each state. */
+	sc = device_get_softc(dev);
+	buf.Pointer = NULL;
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	status = AcpiEvaluateObject(sc->handle, "_PSS", NULL, &buf);
+	if (ACPI_FAILURE(status))
+		return (ENXIO);
+
+	pkg = (ACPI_OBJECT *)buf.Pointer;
+	if (!ACPI_PKG_VALID(pkg, 1)) {
+		device_printf(dev, "invalid top level _PSS package\n");
+		return (ENXIO);
+	}
+	sc->px_count = pkg->Package.Count;
+
+	sc->px_states = malloc(sc->px_count * sizeof(struct acpi_px),
+	    M_ACPIPERF, M_WAITOK | M_ZERO);
+	if (sc->px_states == NULL)
+		return (ENOMEM);
+
+	/*
+	 * Each state is a package of {CoreFreq, Power, TransitionLatency,
+	 * BusMasterLatency, ControlVal, StatusVal}, sorted from highest
+	 * performance to lowest.
+	 */
+	for (i = 0; i < sc->px_count; i++) {
+		res = &pkg->Package.Elements[i];
+		if (!ACPI_PKG_VALID(res, 6)) {
+			device_printf(dev, "invalid _PSS package\n");
+			continue;
+		}
+		p = &sc->px_states[i].core_freq;
+		for (j = 0; j < 6; j++, p++)
+			acpi_PkgInt32(res, j, p);
+	}
+	AcpiOsFree(buf.Pointer);
+
+	/* Get the control and status registers (one of each). */
+	buf.Pointer = NULL;
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	status = AcpiEvaluateObject(sc->handle, "_PCT", NULL, &buf);
+	if (ACPI_FAILURE(status)) {
+		free(sc->px_states, M_ACPIPERF);
+		return (ENXIO);
+	}
+
+	/* Check the package of two registers, each a Buffer in GAS format. */
+	pkg = (ACPI_OBJECT *)buf.Pointer;
+	if (!ACPI_PKG_VALID(pkg, 2)) {
+		device_printf(dev, "invalid perf register package\n");
+		return (ENXIO);
+	}
+
+	acpi_PkgGas(sc->dev, pkg, 0, &sc->px_rid, &sc->perf_ctrl);
+	if (sc->perf_ctrl == NULL) {
+		device_printf(dev, "failed to attach PERF_CTL register\n");
+		return (ENXIO);
+	}
+	sc->px_rid++;
+
+	acpi_PkgGas(sc->dev, pkg, 1, &sc->px_rid, &sc->perf_status);
+	if (sc->perf_status == NULL) {
+		device_printf(dev, "failed to attach PERF_STATUS register\n");
+		return (ENXIO);
+	}
+	sc->px_rid++;
+	AcpiOsFree(buf.Pointer);
+
+	/* Get our current limit and register for notifies. */
+	acpi_px_available(sc);
+	AcpiInstallNotifyHandler(sc->handle, ACPI_DEVICE_NOTIFY,
+	    acpi_px_notify, sc);
+
+	return (0);
+}
+
+static void
+acpi_px_notify(ACPI_HANDLE h, UINT32 notify, void *context)
+{
+	struct acpi_perf_softc *sc;
+
+	sc = context;
+	acpi_px_available(sc);
+
+	/* TODO: Implement notification when frequency changes. */
+}
+
+/*
+ * Find the highest currently-supported performance state.
+ * This can be called at runtime (e.g., due to a docking event) at
+ * the request of a Notify on the processor object.
+ */
+static void
+acpi_px_available(struct acpi_perf_softc *sc)
+{
+	ACPI_STATUS status;
+	struct cf_setting set;
+
+	status = acpi_GetInteger(sc->handle, "_PPC", &sc->px_max_avail);
+
+	/* If the old state is too high, set current state to the new max. */
+	if (ACPI_SUCCESS(status)) {
+		if (sc->px_curr_state != CPUFREQ_VAL_UNKNOWN &&
+		    sc->px_curr_state > sc->px_max_avail) {
+			acpi_px_to_set(sc->dev,
+			    &sc->px_states[sc->px_max_avail], &set);
+			acpi_px_set(sc->dev, &set);
+		}
+	} else
+		sc->px_max_avail = 0;
+}
+
+static int
+acpi_px_to_set(device_t dev, struct acpi_px *px, struct cf_setting *set)
+{
+
+	if (px == NULL || set == NULL)
+		return (EINVAL);
+
+	set->freq = px->core_freq;
+	set->power = px->power;
+	/* XXX Include BM latency too? */
+	set->lat = px->trans_lat;
+	set->volts = CPUFREQ_VAL_UNKNOWN;
+	set->dev = dev;
+
+	return (0);
+}
+
+static int
+acpi_px_settings(device_t dev, struct cf_setting *sets, int *count, int *type)
+{
+	struct acpi_perf_softc *sc;
+	int x, y;
+
+	sc = device_get_softc(dev);
+	if (sets == NULL || count == NULL)
+		return (EINVAL);
+	if (*count < sc->px_count - sc->px_max_avail)
+		return (ENOMEM);
+
+	/* Return a list of settings that are currently valid. */
+	y = 0;
+	for (x = sc->px_max_avail; x < sc->px_count; x++, y++)
+		acpi_px_to_set(dev, &sc->px_states[x], &sets[y]);
+	*count = sc->px_count - sc->px_max_avail;
+	*type = CPUFREQ_TYPE_ABSOLUTE;
+
+	return (0);
+}
+
+static int
+acpi_px_set(device_t dev, const struct cf_setting *set)
+{
+	struct acpi_perf_softc *sc;
+	int i, status, sts_val, tries;
+
+	if (set == NULL)
+		return (EINVAL);
+	sc = device_get_softc(dev);
+
+	/* Look up appropriate state, based on frequency. */
+	for (i = sc->px_max_avail; i < sc->px_count; i++) {
+		if (CPUFREQ_CMP(set->freq, sc->px_states[i].core_freq))
+			break;
+	}
+	if (i == sc->px_count)
+		return (EINVAL);
+
+	/* Write the appropriate value to the register. */
+	PX_SET_REG(sc->perf_ctrl, sc->px_states[i].ctrl_val);
+
+	/* Try for up to 1 ms to verify the desired state was selected. */
+	sts_val = sc->px_states[i].sts_val;
+	for (tries = 0; tries < 100; tries++) {
+		status = PX_GET_REG(sc->perf_status);
+		if (status == sts_val)
+			break;
+		DELAY(10);
+	}
+	if (tries == 100) {
+		device_printf(dev, "Px transition to %d failed\n",
+		    sc->px_states[i].core_freq);
+		return (ENXIO);
+	}
+	sc->px_curr_state = i;
+
+	return (0);
+}
+
+static int
+acpi_px_get(device_t dev, struct cf_setting *set)
+{
+	struct acpi_perf_softc *sc;
+	uint64_t rate;
+	int i;
+	struct pcpu *pc;
+
+	if (set == NULL)
+		return (EINVAL);
+	sc = device_get_softc(dev);
+
+	/* If we've set the rate before, use the cached value. */
+	if (sc->px_curr_state != CPUFREQ_VAL_UNKNOWN) {
+		acpi_px_to_set(dev, &sc->px_states[sc->px_curr_state], set);
+		return (0);
+	}
+
+	/* Otherwise, estimate and try to match against our settings. */
+	pc = cpu_get_pcpu(dev);
+	if (pc == NULL)
+		return (ENXIO);
+	cpu_est_clockrate(pc->pc_cpuid, &rate);
+	rate /= 1000000;
+	for (i = 0; i < sc->px_count; i++) {
+		if (CPUFREQ_CMP(sc->px_states[i].core_freq, rate)) {
+			sc->px_curr_state = i;
+			acpi_px_to_set(dev, &sc->px_states[i], set);
+			break;
+		}
+	}
+
+	/* No match, give up. */
+	if (i == sc->px_count) {
+		sc->px_curr_state = CPUFREQ_VAL_UNKNOWN;
+		set->freq = CPUFREQ_VAL_UNKNOWN;
+	}
+
+	return (0);
+}
Index: kern/cpufreq_if.m
===================================================================
RCS file: kern/cpufreq_if.m
diff -N kern/cpufreq_if.m
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ kern/cpufreq_if.m	1 Feb 2005 05:49:04 -0000
@@ -0,0 +1,92 @@
+#
+# Copyright (c) 2004 Nate Lawson
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/bus.h>
+
+INTERFACE cpufreq;
+
+HEADER {
+	struct cf_level;
+	struct cf_setting;
+};
+
+# cpufreq interface methods
+
+#
+# Set the current CPU frequency level.
+#
+METHOD int set {
+	device_t		dev;
+	const struct cf_level	*level;
+	int			priority;
+};
+
+#
+# Get the current active level.
+#
+METHOD int get {
+	device_t		dev;
+	struct cf_level		*level;
+};
+
+#
+# Get the current possible levels, based on all drivers.
+#
+METHOD int levels {
+	device_t		dev;
+	struct cf_level		*levels;
+	int			*count;
+};
+
+# Individual frequency driver methods
+
+#
+# Set an individual driver's setting.
+#
+METHOD int drv_set {
+	device_t		dev;
+	const struct cf_setting	*set;
+};
+
+#
+# Get an individual driver's setting.
+#
+METHOD int drv_get {
+	device_t		dev;
+	struct cf_setting	*set;
+};
+
+#
+# Get the settings supported by a driver.
+#
+METHOD int drv_settings {
+	device_t		dev;
+	struct cf_setting	*sets;
+	int			*count;
+	int			*type;
+};
Index: kern/kern_cpu.c
===================================================================
RCS file: kern/kern_cpu.c
diff -N kern/kern_cpu.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ kern/kern_cpu.c	1 Feb 2005 07:15:16 -0000
@@ -0,0 +1,533 @@
+/*-
+ * Copyright (c) 2004-2005 Nate Lawson (SDG)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/sbuf.h>
+
+#include "cpufreq_if.h"
+
+/*
+ * Common CPU frequency glue code.  Drivers for specific hardware can
+ * attach this interface to allow users to get/set the CPU frequency.
+ */
+
+/*
+ * Number of levels we can handle.  Levels are synthesized from settings
+ * so for N settings there may be N^2 levels.
+ */
+#define CF_MAX_LEVELS	32
+
+struct cpufreq_softc {
+	struct cf_level			curr_level;
+	int				priority;
+	struct cf_level_lst		all_levels;
+	device_t			dev;
+	struct sysctl_ctx_list		sysctl_ctx;
+};
+
+struct cf_setting_array {
+	struct cf_setting		sets[MAX_SETTINGS];
+	int				count;
+	TAILQ_ENTRY(cf_setting_array)	link;
+};
+
+TAILQ_HEAD(cf_setting_lst, cf_setting_array);
+
+static int	cpufreq_attach(device_t dev);
+static int	cpufreq_detach(device_t dev);
+static void	cpufreq_evaluate(void *arg);
+static int	cf_set_method(device_t dev, const struct cf_level *level,
+		    int priority);
+static int	cf_get_method(device_t dev, struct cf_level *level);
+static int	cf_levels_method(device_t dev, struct cf_level *levels,
+		    int *count);
+static int	cpufreq_insert_abs(struct cf_level_lst *list,
+		    struct cf_setting *sets, int count);
+static int	cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS);
+static int	cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS);
+
+static device_method_t cpufreq_methods[] = {
+	DEVMETHOD(device_probe,		bus_generic_probe),
+	DEVMETHOD(device_attach,	cpufreq_attach),
+	DEVMETHOD(device_detach,	cpufreq_detach),
+
+        DEVMETHOD(cpufreq_set,		cf_set_method),
+        DEVMETHOD(cpufreq_get,		cf_get_method),
+        DEVMETHOD(cpufreq_levels,	cf_levels_method),
+	{0, 0}
+};
+static driver_t cpufreq_driver = {
+	"cpufreq", cpufreq_methods, sizeof(struct cpufreq_softc)
+};
+static devclass_t cpufreq_dc;
+DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0);
+
+static eventhandler_tag cf_ev_tag;
+
+static int
+cpufreq_attach(device_t dev)
+{
+	struct cpufreq_softc *sc;
+	device_t parent;
+	int numdevs;
+
+	sc = device_get_softc(dev);
+	parent = device_get_parent(dev);
+	sc->dev = dev;
+	sysctl_ctx_init(&sc->sysctl_ctx);
+	TAILQ_INIT(&sc->all_levels);
+	sc->curr_level.total_set.freq = CPUFREQ_VAL_UNKNOWN;
+
+	/*
+	 * Only initialize one set of sysctls for all CPUs.  In the future,
+	 * if multiple CPUs can have different settings, we can move these
+	 * sysctls to be under every CPU instead of just the first one.
+	 */
+	numdevs = devclass_get_count(cpufreq_dc);
+	if (numdevs > 1)
+		return (0);
+
+	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
+	    OID_AUTO, "freq", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
+	    cpufreq_curr_sysctl, "I", "Current CPU frequency");
+	SYSCTL_ADD_PROC(&sc->sysctl_ctx,
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(parent)),
+	    OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+	    cpufreq_levels_sysctl, "A", "CPU frequency levels");
+	cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate,
+	    NULL, EVENTHANDLER_PRI_ANY);
+
+	return (0);
+}
+
+static int
+cpufreq_detach(device_t dev)
+{
+	struct cpufreq_softc *sc;
+	int numdevs;
+
+	sc = device_get_softc(dev);
+	sysctl_ctx_free(&sc->sysctl_ctx);
+
+	/* Only clean up these resources when the last device is detaching. */
+	numdevs = devclass_get_count(cpufreq_dc);
+	if (numdevs == 1)
+		EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag);
+
+	return (0);
+}
+
+static void
+cpufreq_evaluate(void *arg)
+{
+	/* TODO: Re-evaluate when notified of changes to drivers. */
+}
+
+static int
+cf_set_method(device_t dev, const struct cf_level *level, int priority)
+{
+	struct cpufreq_softc *sc;
+	const struct cf_setting *set;
+	int error;
+
+	sc = device_get_softc(dev);
+
+	/* If already at this level, just return. */
+	if (CPUFREQ_CMP(sc->curr_level.total_set.freq, level->total_set.freq))
+		return (0);
+
+	/* First, set the absolute frequency via its driver. */
+	set = &level->abs_set;
+	if (set->dev) {
+		if (!device_is_attached(set->dev)) {
+			error = ENXIO;
+			goto out;
+		}
+		error = CPUFREQ_DRV_SET(set->dev, set);
+		if (error) {
+			goto out;
+		}
+	}
+
+	/* TODO: Next, set any/all relative frequencies via their drivers. */
+
+	/* Record the current level. */
+	sc->curr_level = *level;
+	sc->priority = priority;
+	error = 0;
+
+out:
+	if (error)
+		device_printf(set->dev, "set freq failed, err %d\n", error);
+	return (error);
+}
+
+static int
+cf_get_method(device_t dev, struct cf_level *level)
+{
+	struct cpufreq_softc *sc;
+	struct cf_level *levels;
+	struct cf_setting *curr_set, set;
+	struct pcpu *pc;
+	device_t *devs;
+	int count, error, i, numdevs;
+	uint64_t rate;
+
+	sc = device_get_softc(dev);
+	curr_set = &sc->curr_level.total_set;
+	levels = NULL;
+
+	/* If we already know the current frequency, we're done. */
+	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
+		goto out;
+
+	/*
+	 * We need to figure out the current level.  Loop through every
+	 * driver, getting the current setting.  Then, attempt to get a best
+	 * match of settings against each level.
+	 */
+	count = CF_MAX_LEVELS;
+	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	if (levels == NULL)
+		return (ENOMEM);
+	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
+	if (error)
+		goto out;
+	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
+	if (error)
+		goto out;
+	for (i = 0; i < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; i++) {
+		if (!device_is_attached(devs[i]))
+			continue;
+		error = CPUFREQ_DRV_GET(devs[i], &set);
+		if (error)
+			continue;
+		for (i = 0; i < count; i++) {
+			if (CPUFREQ_CMP(set.freq, levels[i].abs_set.freq)) {
+				sc->curr_level = levels[i];
+				break;
+			}
+		}
+	}
+	free(devs, M_TEMP);
+	if (curr_set->freq != CPUFREQ_VAL_UNKNOWN)
+		goto out;
+
+	/*
+	 * We couldn't find an exact match, so attempt to estimate and then
+	 * match against a level.
+	 */
+	pc = cpu_get_pcpu(dev);
+	if (pc == NULL) {
+		error = ENXIO;
+		goto out;
+	}
+	cpu_est_clockrate(pc->pc_cpuid, &rate);
+	rate /= 1000000;
+	for (i = 0; i < count; i++) {
+		if (CPUFREQ_CMP(rate, levels[i].total_set.freq)) {
+			sc->curr_level = levels[i];
+			break;
+		}
+	}
+
+out:
+	if (levels)
+		free(levels, M_TEMP);
+	*level = sc->curr_level;
+	return (0);
+}
+
+static int
+cf_levels_method(device_t dev, struct cf_level *levels, int *count)
+{
+	TAILQ_HEAD(cf_setting_lst,cf_setting_array) rel_sets;
+	struct cpufreq_softc *sc;
+	struct cf_level *lev;
+	struct cf_setting *sets;
+	struct pcpu *pc;
+	device_t *devs;
+	int error, i, numdevs, numlevels, set_count, type;
+	uint64_t rate;
+
+	if (levels == NULL || count == NULL)
+		return (EINVAL);
+
+	TAILQ_INIT(&rel_sets);
+	sc = device_get_softc(dev);
+	error = device_get_children(device_get_parent(dev), &devs, &numdevs);
+	if (error)
+		return (error);
+	sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
+	if (sets == NULL) {
+		free(devs, M_TEMP);
+		return (ENOMEM);
+	}
+
+	/* Clear all previous levels. */
+	while ((lev = TAILQ_FIRST(&sc->all_levels)) != NULL) {
+		TAILQ_REMOVE(&sc->all_levels, lev, link);
+		free(lev, M_TEMP);
+	}
+
+	/* Get settings from all cpufreq drivers. */
+	numlevels = 0;
+	for (i = 0; i < numdevs; i++) {
+		if (!device_is_attached(devs[i]))
+			continue;
+		set_count = MAX_SETTINGS;
+		error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count, &type);
+		if (error || set_count == 0)
+			continue;
+		error = cpufreq_insert_abs(&sc->all_levels, sets, set_count);
+		if (error)
+			goto out;
+		numlevels += set_count;
+	}
+
+	/* If the caller doesn't have enough space, return the actual count. */
+	if (numlevels > *count) {
+		*count = numlevels;
+		error = E2BIG;
+		goto out;
+	}
+
+	/* If there are no absolute levels, create a fake one at 100%. */
+	if (TAILQ_EMPTY(&sc->all_levels)) {
+		bzero(&sets[0], sizeof(*sets));
+		pc = cpu_get_pcpu(dev);
+		if (pc == NULL) {
+			error = ENXIO;
+			goto out;
+		}
+		cpu_est_clockrate(pc->pc_cpuid, &rate);
+		sets[0].freq = rate / 1000000;
+		error = cpufreq_insert_abs(&sc->all_levels, sets, 1);
+		if (error)
+			goto out;
+	}
+
+	/* TODO: Create a combined list of absolute + relative levels. */
+	i = 0;
+	TAILQ_FOREACH(lev, &sc->all_levels, link) {
+		/* For now, just assume total freq equals absolute freq. */
+		lev->total_set = lev->abs_set;
+		lev->total_set.dev = NULL;
+		levels[i] = *lev;
+		i++;
+	}
+	*count = i;
+	error = 0;
+
+out:
+	free(devs, M_TEMP);
+	free(sets, M_TEMP);
+	return (error);
+}
+
+/*
+ * Create levels for an array of absolute settings and insert them in
+ * sorted order in the specified list.
+ */
+static int
+cpufreq_insert_abs(struct cf_level_lst *list, struct cf_setting *sets,
+    int count)
+{
+	struct cf_level *level, *search;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		level = malloc(sizeof(*level), M_TEMP, M_NOWAIT);
+		if (level == NULL)
+			return (ENOMEM);
+		level->abs_set = sets[i];
+
+		if (TAILQ_EMPTY(list)) {
+			TAILQ_INSERT_HEAD(list, level, link);
+			continue;
+		}
+
+		TAILQ_FOREACH_REVERSE(search, list, cf_level_lst, link) {
+			if (sets[i].freq <= search->abs_set.freq) {
+				TAILQ_INSERT_AFTER(list, search, level, link);
+				break;
+			}
+		}
+	}
+	return (0);
+}
+
+static int
+cpufreq_curr_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct cpufreq_softc *sc;
+	struct cf_level *levels;
+	int count, error, freq, i;
+
+	sc = oidp->oid_arg1;
+	count = CF_MAX_LEVELS;
+	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	if (levels == NULL)
+		return (ENOMEM);
+
+	error = CPUFREQ_GET(sc->dev, &levels[0]);
+	if (error)
+		goto out;
+	freq = levels[0].total_set.freq;
+	error = sysctl_handle_int(oidp, &freq, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		goto out;
+
+	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
+	if (error)
+		goto out;
+	for (i = 0; i < count; i++) {
+		if (CPUFREQ_CMP(levels[i].total_set.freq, freq)) {
+			error = CPUFREQ_SET(sc->dev, &levels[i],
+			    CPUFREQ_PRIO_USER);
+			break;
+		}
+	}
+	if (i == count)
+		error = EINVAL;
+
+out:
+	if (levels)
+		free(levels, M_TEMP);
+	return (error);
+}
+
+static int
+cpufreq_levels_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct cpufreq_softc *sc;
+	struct cf_level *levels;
+	struct cf_setting *set;
+	struct sbuf sb;
+	int count, error, i;
+
+	sc = oidp->oid_arg1;
+	sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
+
+	/* Get settings from the device and generate the output string. */
+	count = CF_MAX_LEVELS;
+	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
+	if (levels == NULL)
+		return (ENOMEM);
+	error = CPUFREQ_LEVELS(sc->dev, levels, &count);
+	if (error)
+		goto out;
+	if (count) {
+		for (i = 0; i < count; i++) {
+			set = &levels[i].total_set;
+			sbuf_printf(&sb, "%d/%d ", set->freq, set->power);
+		}
+	} else
+		sbuf_cpy(&sb, "0");
+	sbuf_trim(&sb);
+	sbuf_finish(&sb);
+	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+
+out:
+	free(levels, M_TEMP);
+	sbuf_delete(&sb);
+	return (error);
+}
+
+int
+cpufreq_register(device_t dev)
+{
+	device_t cf_dev, cpu_dev;
+
+	/*
+	 * Only add one cpufreq device (on cpu0) for all control.  Once
+	 * independent multi-cpu control appears, we can assign one cpufreq
+	 * device per cpu.
+	 */
+	cf_dev = devclass_get_device(cpufreq_dc, 0);
+	if (cf_dev) {
+		device_printf(dev,
+		    "warning: only one cpufreq device at a time supported\n");
+		return (0);
+	}
+
+	/* Add the child device and sysctls. */
+	cpu_dev = devclass_get_device(devclass_find("cpu"), 0);
+	cf_dev = BUS_ADD_CHILD(cpu_dev, 0, "cpufreq", 0);
+	if (cf_dev == NULL)
+		return (ENOMEM);
+	device_quiet(cf_dev);
+
+	return (device_probe_and_attach(cf_dev));
+}
+
+int
+cpufreq_unregister(device_t dev)
+{
+	device_t cf_dev, *devs;
+	int cfcount, count, devcount, error, i, type;
+	struct cf_setting set;
+
+	/*
+	 * If this is the last cpufreq child device, remove the control
+	 * device as well.  We identify cpufreq children by calling a method
+	 * they support.
+	 */
+	error = device_get_children(device_get_parent(dev), &devs, &devcount);
+	if (error)
+		return (error);
+	cf_dev = devclass_get_device(cpufreq_dc, 0);
+	KASSERT(cf_dev != NULL, ("unregister with no cpufreq dev"));
+	cfcount = 0;
+	for (i = 0; i < devcount; i++) {
+		if (!device_is_attached(devs[i]))
+			continue;
+		count = 1;
+		if (CPUFREQ_DRV_SETTINGS(devs[i], &set, &count, &type) == 0)
+			cfcount++;
+	}
+	if (cfcount <= 1) {
+		device_delete_child(device_get_parent(cf_dev), cf_dev);
+	}
+	free(devs, M_TEMP);
+
+	return (0);
+}
Index: dev/cpufreq/speedstep_ich.c
===================================================================
RCS file: dev/cpufreq/speedstep_ich.c
diff -N dev/cpufreq/speedstep_ich.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ dev/cpufreq/speedstep_ich.c	1 Feb 2005 16:24:21 -0000
@@ -0,0 +1,372 @@
+/*-
+ * Copyright (c) 2004-2005 Nate Lawson (SDG)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/pcpu.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <dev/pci/pcivar.h>
+#include <machine/clock.h>
+#include <machine/resource.h>
+#include <sys/rman.h>
+
+#include "cpufreq_if.h"
+
+/*
+ * The SpeedStep ICH feature is a chipset-initiated voltage and frequency
+ * transition available on the ICH2M, 3M, and 4M.  It is different from
+ * the newer Pentium-M SpeedStep feature.  It offers only two levels of
+ * frequency/voltage.  Often, the BIOS will select one of the levels via
+ * SMM code during the power-on process (i.e., choose a lower level if the
+ * system is off AC power.)
+ */
+
+struct ichss_softc {
+	device_t	 dev;
+	int		 bm_rid;	/* Bus-mastering control (PM2REG). */
+	struct resource	*bm_reg;
+	int		 ctrl_rid;	/* Control/status register. */
+	struct resource	*ctrl_reg;
+	struct cf_setting sets[2];	/* Only two settings. */
+};
+
+/* Supported PCI IDs. */
+#define PCI_VENDOR_INTEL	0x8086
+#define PCI_DEV_82801BA		0x244c /* ICH2M */
+#define PCI_DEV_82801CA		0x248c /* ICH3M */
+#define PCI_DEV_82801DB		0x24cc /* ICH4M */
+#define PCI_DEV_82815BA		0x1130 /* Unsupported/buggy part */
+
+/* PCI config registers for finding PMBASE and enabling SpeedStep. */
+#define ICHSS_PMBASE_OFFSET	0x40
+#define ICHSS_PMCFG_OFFSET	0xa0
+
+/* Values and masks. */
+#define ICHSS_ENABLE		(1<<3)	/* Enable SpeedStep control. */
+#define ICHSS_IO_REG		0x1	/* Access register via I/O space. */
+#define ICHSS_PMBASE_MASK	0xff80	/* PMBASE address bits. */
+#define ICHSS_CTRL_BIT		0x1	/* 0 is high speed, 1 is low. */
+#define ICHSS_BM_DISABLE	0x1
+
+/* Offsets from PMBASE for various registers. */
+#define ICHSS_BM_OFFSET		0x20
+#define ICHSS_CTRL_OFFSET	0x50
+
+#define ICH_GET_REG(reg) 				\
+	(bus_space_read_1(rman_get_bustag((reg)), 	\
+	    rman_get_bushandle((reg)), 0))
+#define ICH_SET_REG(reg, val)				\
+	(bus_space_write_1(rman_get_bustag((reg)), 	\
+	    rman_get_bushandle((reg)), 0, (val)))
+
+static int	ichss_pci_probe(device_t dev);
+static int	ichss_probe(device_t dev);
+static int	ichss_attach(device_t dev);
+static int	ichss_detach(device_t dev);
+static int	ichss_settings(device_t dev, struct cf_setting *sets,
+		    int *count, int *type);
+static int	ichss_set(device_t dev, const struct cf_setting *set);
+static int	ichss_get(device_t dev, struct cf_setting *set);
+
+static device_method_t ichss_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		ichss_probe),
+	DEVMETHOD(device_attach,	ichss_attach),
+	DEVMETHOD(device_detach,	ichss_detach),
+
+	/* cpufreq interface */
+	DEVMETHOD(cpufreq_drv_set,	ichss_set),
+	DEVMETHOD(cpufreq_drv_get,	ichss_get),
+	DEVMETHOD(cpufreq_drv_settings,	ichss_settings),
+	{0, 0}
+};
+static driver_t ichss_driver = {
+	"ichss", ichss_methods, sizeof(struct ichss_softc)
+};
+static devclass_t ichss_devclass;
+DRIVER_MODULE(ichss, cpu, ichss_driver, ichss_devclass, 0, 0);
+
+static device_method_t ichss_pci_methods[] = {
+	DEVMETHOD(device_probe,		ichss_pci_probe),
+	{0, 0}
+};
+static driver_t ichss_pci_driver = {
+	"ichss_pci", ichss_pci_methods, 0
+};
+static devclass_t ichss_pci_devclass;
+DRIVER_MODULE(ichss_pci, pci, ichss_pci_driver, ichss_pci_devclass, 0, 0);
+
+#if 0
+#define DPRINT(x...)	printf(x)
+#else
+#define DPRINT(x...)
+#endif
+
+/*
+ * We detect the chipset by looking for its LPC bus ID during the PCI
+ * scan and reading its config registers during the probe.  However,
+ * we add the ichss child under the cpu device since even though the
+ * chipset provides the control, it really affects the cpu only.
+ *
+ * XXX This approach does not work if the module is loaded after boot.
+ */
+static int
+ichss_pci_probe(device_t dev)
+{
+	device_t child, parent;
+	uint32_t pmbase;
+	uint16_t ss_en;
+
+	/*
+	 * TODO: add a quirk to disable if we see the 82815_MC along
+	 * with the 82801BA and revision < 5.
+	 */
+	if (pci_get_vendor(dev) != PCI_VENDOR_INTEL ||
+	    (pci_get_device(dev) != PCI_DEV_82801BA &&
+	    pci_get_device(dev) != PCI_DEV_82801CA &&
+	    pci_get_device(dev) != PCI_DEV_82801DB))
+		return (ENXIO);
+
+	/* Only one CPU is supported for this hardware. */
+	if (devclass_get_device(ichss_devclass, 0))
+		return (ENXIO);
+
+	/* Add a child under the CPU parent. */
+	parent = devclass_get_device(devclass_find("cpu"), 0);
+	KASSERT(parent != NULL, ("cpu parent is NULL"));
+	child = BUS_ADD_CHILD(parent, 0, "ichss", 0);
+	if (child == NULL) {
+		device_printf(parent, "add SpeedStep child failed\n");
+		return (ENXIO);
+	}
+
+	/* Find the PMBASE register from our PCI config header. */
+	pmbase = pci_read_config(dev, ICHSS_PMBASE_OFFSET, sizeof(pmbase));
+	if ((pmbase & ICHSS_IO_REG) == 0) {
+		printf("ichss: invalid PMBASE memory type\n");
+		return (ENXIO);
+	}
+	pmbase &= ICHSS_PMBASE_MASK;
+	if (pmbase == 0) {
+		printf("ichss: invalid zero PMBASE address\n");
+		return (ENXIO);
+	}
+	DPRINT("ichss: PMBASE is %#x\n", pmbase);
+
+	/* Add the bus master arbitration and control registers. */
+	bus_set_resource(child, SYS_RES_IOPORT, 0, pmbase + ICHSS_BM_OFFSET,
+	    1);
+	bus_set_resource(child, SYS_RES_IOPORT, 1, pmbase + ICHSS_CTRL_OFFSET,
+	    1);
+
+	/* Activate SpeedStep control if not already enabled. */
+	ss_en = pci_read_config(dev, ICHSS_PMCFG_OFFSET, sizeof(ss_en));
+	if ((ss_en & ICHSS_ENABLE) == 0) {
+		printf("ichss: enabling SpeedStep support\n");
+		pci_write_config(dev, ICHSS_PMCFG_OFFSET,
+		    ss_en | ICHSS_ENABLE, sizeof(ss_en));
+	}
+
+	/* Attach the new CPU child now. */
+	device_probe_and_attach(child);
+
+	return (ENXIO);
+}
+
+static int
+ichss_probe(device_t dev)
+{
+	device_t perf_dev;
+
+	/* If the ACPI perf driver has attached, let it manage things. */
+	perf_dev = devclass_get_device(devclass_find("acpi_perf"), 0);
+	if (perf_dev && device_is_attached(perf_dev))
+		return (ENXIO);
+
+	device_set_desc(dev, "SpeedStep ICH");
+	return (-1000);
+}
+
+static int
+ichss_attach(device_t dev)
+{
+	struct ichss_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->dev = dev;
+
+	sc->bm_rid = 0;
+	sc->bm_reg = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &sc->bm_rid,
+	    RF_ACTIVE);
+	if (sc->bm_reg == NULL) {
+		device_printf(dev, "failed to alloc BM arb register\n");
+		return (ENXIO);
+	}
+	sc->ctrl_rid = 1;
+	sc->ctrl_reg = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
+	    &sc->ctrl_rid, RF_ACTIVE);
+	if (sc->ctrl_reg == NULL) {
+		device_printf(dev, "failed to alloc control register\n");
+		bus_release_resource(dev, SYS_RES_IOPORT, sc->bm_rid,
+		    sc->bm_reg);
+		return (ENXIO);
+	}
+
+	/* Setup some defaults for our exported settings. */
+	sc->sets[0].freq = CPUFREQ_VAL_UNKNOWN;
+	sc->sets[0].volts = CPUFREQ_VAL_UNKNOWN;
+	sc->sets[0].power = CPUFREQ_VAL_UNKNOWN;
+	sc->sets[0].lat = 1000;
+	sc->sets[0].dev = dev;
+	sc->sets[1] = sc->sets[0];
+	cpufreq_register(dev);
+
+	return (0);
+}
+
+static int
+ichss_detach(device_t dev)
+{
+	/* TODO: teardown BM and CTRL registers. */
+	return (ENXIO);
+}
+
+static int
+ichss_settings(device_t dev, struct cf_setting *sets, int *count, int *type)
+{
+	struct ichss_softc *sc;
+	struct cf_setting set;
+	int first, i;
+
+	if (sets == NULL || count == NULL)
+		return (EINVAL);
+	if (*count < 2) {
+		*count = 2;
+		return (E2BIG);
+	}
+	sc = device_get_softc(dev);
+
+	/*
+	 * Estimate frequencies for both levels, temporarily switching to
+	 * the other one if we haven't calibrated it yet.
+	 */
+	ichss_get(dev, &set);
+	for (i = 0; i < 2; i++) {
+		if (sc->sets[i].freq == CPUFREQ_VAL_UNKNOWN) {
+			first = (i == 0) ? 1 : 0;
+			ichss_set(dev, &sc->sets[i]);
+			ichss_set(dev, &sc->sets[first]);
+		}
+	}
+
+	bcopy(sc->sets, sets, sizeof(sc->sets));
+	*count = 2;
+	*type = CPUFREQ_TYPE_ABSOLUTE;
+
+	return (0);
+}
+
+static int
+ichss_set(device_t dev, const struct cf_setting *set)
+{
+	struct ichss_softc *sc;
+	uint8_t bmval, new_val, old_val, req_val;
+	uint64_t rate;
+
+	/* Look up appropriate bit value based on frequency. */
+	sc = device_get_softc(dev);
+	if (CPUFREQ_CMP(set->freq, sc->sets[0].freq))
+		req_val = 0;
+	else if (CPUFREQ_CMP(set->freq, sc->sets[1].freq))
+		req_val = ICHSS_CTRL_BIT;
+	else
+		return (EINVAL);
+	DPRINT("ichss: requested setting %d\n", req_val);
+
+	/* Disable interrupts and get the other register contents. */
+	disable_intr();
+	old_val = ICH_GET_REG(sc->ctrl_reg) & ~ICHSS_CTRL_BIT;
+
+	/*
+	 * Disable bus master arbitration, write the new value to the control
+	 * register, and then re-enable bus master arbitration.
+	 */
+	bmval = ICH_GET_REG(sc->bm_reg) | ICHSS_BM_DISABLE;
+	ICH_SET_REG(sc->bm_reg, bmval);
+	ICH_SET_REG(sc->ctrl_reg, old_val | req_val);
+	ICH_SET_REG(sc->bm_reg, bmval & ~ICHSS_BM_DISABLE);
+
+	/* Get the new value and re-enable interrupts. */
+	new_val = ICH_GET_REG(sc->ctrl_reg);
+	enable_intr();
+
+	/* Check if the desired state was indeed selected. */
+	if (req_val != (new_val & ICHSS_CTRL_BIT)) {
+	    device_printf(sc->dev, "transition to %d failed\n", req_val);
+	    return (ENXIO);
+	}
+
+	/* Re-initialize our cycle counter if we don't know this new state. */
+	if (sc->sets[req_val].freq == CPUFREQ_VAL_UNKNOWN) {
+		cpu_est_clockrate(0, &rate);
+		sc->sets[req_val].freq = rate / 1000000;
+		DPRINT("ichss: set calibrated new rate of %d\n",
+		    sc->sets[req_val].freq);
+	}
+
+	return (0);
+}
+
+static int
+ichss_get(device_t dev, struct cf_setting *set)
+{
+	struct ichss_softc *sc;
+	uint64_t rate;
+	uint8_t state;
+
+	sc = device_get_softc(dev);
+	state = ICH_GET_REG(sc->ctrl_reg) & ICHSS_CTRL_BIT;
+
+	/* If we haven't changed settings yet, estimate the current value. */
+	if (sc->sets[state].freq == CPUFREQ_VAL_UNKNOWN) {
+		cpu_est_clockrate(0, &rate);
+		sc->sets[state].freq = rate / 1000000;
+		DPRINT("ichss: get calibrated new rate of %d\n",
+		    sc->sets[state].freq);
+	}
+	*set = sc->sets[state];
+
+	return (0);
+}
Index: modules/Makefile
===================================================================
RCS file: /home/ncvs/src/sys/modules/Makefile,v
--- modules/Makefile.orig	Tue Jan  4 09:36:38 2005
+++ modules/Makefile	Wed Feb  2 12:40:02 2005
@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/modules/Makefile,v 1.393.2.6 2004/12/30 00:48:36 obrien Exp $
+# $FreeBSD: src/sys/modules/Makefile,v 1.400 2004/09/10 20:57:45 wpaul Exp $
 
 # pcic -- currently broken and being worked on out of tree.
 # oldcard -- specialized use for debugging only.
@@ -48,6 +48,7 @@
 	coda5 \
 	${_coff} \
 	${_cp} \
+	cpufreq \
 	${_crypto} \
 	${_cryptodev} \
 	${_ctau} \
Index: modules/acpi/Makefile
===================================================================
RCS file: /home/ncvs/src/sys/modules/acpi/Makefile,v
--- modules/acpi/Makefile.orig	Wed Jul 21 10:28:16 2004
+++ modules/acpi/Makefile	Wed Feb  2 12:46:29 2005
@@ -1,5 +1,6 @@
 # $FreeBSD: src/sys/modules/acpi/Makefile,v 1.40 2004/07/21 14:47:54 nyan Exp $
 
-SUBDIR=		acpi acpi_asus acpi_panasonic acpi_toshiba acpi_video
+SUBDIR=		acpi acpi_asus acpi_panasonic acpi_perf \
+       acpi_toshiba acpi_video
 
 .include <bsd.subdir.mk>
Index: modules/acpi/acpi_perf/Makefile
===================================================================
RCS file: modules/acpi/acpi_perf/Makefile
diff -N modules/acpi/acpi_perf/Makefile
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ modules/acpi/acpi_perf/Makefile	31 Jan 2005 06:00:37 -0000
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+.PATH:		${.CURDIR}/../../../dev/acpica
+CFLAGS+=	-I${.CURDIR}/../../../contrib/dev/acpica
+
+KMOD=		acpi_perf
+WARNS?=		2
+SRCS=		acpi_perf.c
+SRCS+=		acpi_if.h bus_if.h cpufreq_if.h device_if.h opt_acpi.h
+
+.include <bsd.kmod.mk>
Index: conf/files
===================================================================
RCS file: /home/ncvs/src/sys/conf/files,v
--- conf/files.orig	Thu Jan 27 10:06:48 2005
+++ conf/files	Wed Feb  2 12:50:43 2005
@@ -1,4 +1,4 @@
-# $FreeBSD: src/sys/conf/files,v 1.943.2.6 2005/01/25 16:26:25 rik Exp $
+# $FreeBSD: src/sys/conf/files,v 1.953 2004/09/16 20:35:27 glebius Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -58,6 +58,7 @@
 kern/device_if.m		standard
 kern/bus_if.m			standard
 kern/clock_if.m			optional genclock
+kern/cpufreq_if.m		standard
 kern/linker_if.m		standard
 cam/cam.c		optional scbus
 cam/cam_periph.c	optional scbus
@@ -1075,6 +1076,7 @@
 kern/kern_condvar.c	standard
 kern/kern_conf.c	standard
 kern/kern_context.c	standard
+kern/kern_cpu.c		standard
 kern/kern_descrip.c	standard
 kern/kern_poll.c	optional device_polling
 kern/kern_environment.c	standard
Index: conf/kmod.mk
===================================================================
RCS file: /home/ncvs/src/sys/conf/kmod.mk,v
--- conf/kmod.mk.orig	Sat Aug 14 16:53:04 2004
+++ conf/kmod.mk	Wed Feb  2 13:09:26 2005
@@ -290,7 +290,7 @@
 .endfor
 .endif
 
-MFILES?= kern/bus_if.m kern/device_if.m dev/iicbus/iicbb_if.m \
+MFILES?= kern/bus_if.m cpufreq_if.m kern/device_if.m dev/iicbus/iicbb_if.m \
     dev/iicbus/iicbus_if.m isa/isa_if.m \
     libkern/iconv_converter_if.m \
     dev/acpica/acpi_if.m dev/eisa/eisa_if.m dev/mii/miibus_if.m \


More information about the freebsd-acpi mailing list