HWPMC changes: sparse CPU numbering and hot plug preliminaries

Joseph Koshy jkoshy at FreeBSD.org
Mon Mar 17 07:13:28 UTC 2008


The following patch implements support for sparse CPU numbering in
HWPMC and lays the ground work for future hot plugging of CPUs.

Highlights:

*) CPUs are now numbered from 0..{PMC_CPU_MAX}, where today
  {PMC_CPU_MAX} is derived from `mp_maxid'.

*) CPUs are treated as being in one of the following states:

   ABSENT, i.e. not present
   DISABLED, i.e., present but administratively disabled, perhaps
     in preparation to be pulled out.
   ACTIVE, i.e., present and participating in scheduling and capable
     of fielding interrupts.

   There are set of new predicates that the module can use in
   "sys/kern/kern_pmc.c".

*) Initialization and teardown has been split into two logical
   parts:

   - Initialization and teardown that needs to be done for all CPUs
     whether 'active' or not.

   - Initialization and teardown for 'active' CPUs.

   The second kind of initialization/teardown happens when a CPU
   changes state; the first is for things to be done at module
   load/unload time.

   Code for the existing PMC 'MD' layers has been changed to the new
   scheme.

*) The asserts have been changed to use the new support functions in
   kern_pmc.c; this reduces reliance on specifics of the kernel's
   implementation (e.g., the direct use of variables like `mp_maxid').

In this new scheme userland will have to cope with the possibility
that a working PMC will now become inaccessible.

The rest of the implementation of hot-plug support depends on how the
base kernel notifies modules of the arrival or departure of a CPU.

An eventhandler callback would be sufficient for CPU arrivals, but CPU
departures are more complex.  For example, we need to distinguish
controlled CPU departures from uncontrolled ones.  The controlled case
is the one where HWPMC code gets a chance to run on the departing CPU
before it leaves.  In the uncontrolled case, all HWPMC can do is clean
up its internal data structures.

Regards,
Koshy

-------------- next part --------------
Index: sys/dev/hwpmc/hwpmc_amd.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/dev/hwpmc/hwpmc_amd.c,v
retrieving revision 1.14
diff -u -r1.14 hwpmc_amd.c
--- sys/dev/hwpmc/hwpmc_amd.c	7 Dec 2007 08:20:15 -0000	1.14
+++ sys/dev/hwpmc/hwpmc_amd.c	16 Mar 2008 11:11:32 -0000
@@ -39,6 +39,7 @@
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pmc.h>
+#include <sys/pmckern.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
@@ -269,7 +270,7 @@
 	const struct pmc_hw *phw;
 	pmc_value_t tmp;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -324,7 +325,7 @@
 	const struct pmc_hw *phw;
 	enum pmc_mode mode;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -371,7 +372,7 @@
 
 	PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -453,7 +454,7 @@
 
 	(void) cpu;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row index %d", __LINE__, ri));
@@ -547,7 +548,7 @@
 
 	(void) pmc;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -579,7 +580,7 @@
 	struct pmc_hw *phw;
 	const struct amd_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -628,7 +629,7 @@
 	const struct amd_descr *pd;
 	uint64_t config;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -680,7 +681,7 @@
 	struct pmc_hw *phw;
 	pmc_value_t v;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] out of range CPU %d", __LINE__, cpu));
 
 	PMCDBG(MDP,INT,1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
@@ -760,7 +761,7 @@
 	const struct amd_descr *pd;
 	struct pmc_hw *phw;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < AMD_NPMCS,
 	    ("[amd,%d] row-index %d out of range", __LINE__, ri));
@@ -829,7 +830,7 @@
 	struct amd_cpu *pcs;
 	struct pmc_hw  *phw;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] insane cpu number %d", __LINE__, cpu));
 
 	PMCDBG(MDP,INI,1,"amd-init cpu=%d", cpu);
@@ -859,38 +860,44 @@
 	return 0;
 }
 
-
 /*
- * processor dependent cleanup prior to the KLD
- * being unloaded
+ * Cleanup actions needed by an active CPU.
  */
 
 static int
-amd_cleanup(int cpu)
+amd_cpu_cleanup(int cpu)
 {
 	int i;
-	uint32_t evsel;
-	struct pmc_cpu *pcs;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] insane cpu number (%d)", __LINE__, cpu));
-
-	PMCDBG(MDP,INI,1,"amd-cleanup cpu=%d", cpu);
+	PMCDBG(MDP,INI,1,"amd-cpu-cleanup cpu=%d", cpu);
 
 	/*
-	 * First, turn off all PMCs on this CPU.
+	 * Turn off all PMCs on this CPU.
 	 */
+	for (i = 0; i < 4; i++)
+		wrmsr(AMD_PMC_EVSEL_0 + i, 0);
 
-	for (i = 0; i < 4; i++) { /* XXX this loop is now not needed */
-		evsel = rdmsr(AMD_PMC_EVSEL_0 + i);
-		evsel &= ~AMD_PMC_ENABLE;
-		wrmsr(AMD_PMC_EVSEL_0 + i, evsel);
-	}
+	return (0);
+}
+
+/*
+ * Common cleanup.
+ */
+
+static int
+amd_cleanup(int cpu)
+{
+	struct pmc_cpu *pcs;
+
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[amd,%d] insane cpu number (%d)", __LINE__, cpu));
+	PMCDBG(MDP,INI,1,"amd-cleanup cpu=%d", cpu);
 
 	/*
-	 * Next, free up allocated space.
+	 * Free up allocated space.
 	 */
-
 	if ((pcs = pmc_pcpu[cpu]) == NULL)
 		return 0;
 
@@ -988,6 +995,8 @@
 
 	pmc_mdep->pmd_init    	   = amd_init;
 	pmc_mdep->pmd_cleanup 	   = amd_cleanup;
+	pmc_mdep->pmd_cpu_init 	   = NULL;
+	pmc_mdep->pmd_cpu_cleanup  = amd_cpu_cleanup;
 	pmc_mdep->pmd_switch_in    = amd_switch_in;
 	pmc_mdep->pmd_switch_out   = amd_switch_out;
 	pmc_mdep->pmd_read_pmc 	   = amd_read_pmc;
Index: sys/dev/hwpmc/hwpmc_mod.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/dev/hwpmc/hwpmc_mod.c,v
retrieving revision 1.32
diff -u -r1.32 hwpmc_mod.c
--- sys/dev/hwpmc/hwpmc_mod.c	13 Jan 2008 14:44:02 -0000	1.32
+++ sys/dev/hwpmc/hwpmc_mod.c	17 Mar 2008 04:05:50 -0000
@@ -98,8 +98,8 @@
 	KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
 		    __LINE__));						  \
 	atomic_add_int(&pmc_pmcdisp[(R)], -1);				  \
-	KASSERT(pmc_pmcdisp[(R)] >= (-mp_ncpus), ("[pmc,%d] row "	  \
-		"disposition error", __LINE__));			  \
+	KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_active_count()),		  \
+		("[pmc,%d] row disposition error", __LINE__));		  \
 } while (0)
 
 #define	PMC_UNMARK_ROW_STANDALONE(R) do { 				  \
@@ -637,12 +637,12 @@
 static void
 pmc_select_cpu(int cpu)
 {
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] bad cpu number %d", __LINE__, cpu));
 
-	/* never move to a disabled CPU */
-	KASSERT(pmc_cpu_is_disabled(cpu) == 0, ("[pmc,%d] selecting "
-	    "disabled CPU %d", __LINE__, cpu));
+	/* Never move to an inactive CPU. */
+	KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive "
+	    "CPU %d", __LINE__, cpu));
 
 	PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu);
 	thread_lock(curthread);
@@ -1182,7 +1182,7 @@
 	PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 	    p->p_pid, p->p_comm, pp);
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
 
 	pc = pmc_pcpu[cpu];
@@ -1307,7 +1307,7 @@
 	PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 	    p->p_pid, p->p_comm, pp);
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
 
 	pc = pmc_pcpu[cpu];
@@ -2034,7 +2034,7 @@
 #ifdef	DEBUG
 	volatile int maxloop;
 
-	maxloop = 100 * mp_ncpus;
+	maxloop = 100 * pmc_cpu_max();
 #endif
 
 	/*
@@ -2495,7 +2495,7 @@
 
 	cpu = PMC_TO_CPU(pm);
 
-	if (pmc_cpu_is_disabled(cpu))
+	if (!pmc_cpu_is_active(cpu))
 		return ENXIO;
 
 	pmc_select_cpu(cpu);
@@ -2562,10 +2562,10 @@
 
 	cpu = PMC_TO_CPU(pm);
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
 
-	if (pmc_cpu_is_disabled(cpu))
+	if (!pmc_cpu_is_active(cpu))
 		return ENXIO;
 
 	pmc_select_cpu(cpu);
@@ -2730,7 +2730,7 @@
 		struct pmc_op_getcpuinfo gci;
 
 		gci.pm_cputype = md->pmd_cputype;
-		gci.pm_ncpu    = mp_ncpus;
+		gci.pm_ncpu    = mp_ncpus; /* XXX: use pmc_cpu_max() */
 		gci.pm_npmc    = md->pmd_npmc;
 		gci.pm_nclass  = md->pmd_nclass;
 		bcopy(md->pmd_classes, &gci.pm_classes,
@@ -2798,12 +2798,12 @@
 		if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0)
 			break;
 
-		if (cpu >= (unsigned int) mp_ncpus) {
+		if (cpu >= pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
 
-		if (pmc_cpu_is_disabled(cpu)) {
+		if (!pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
@@ -2892,12 +2892,12 @@
 
 		cpu = pma.pm_cpu;
 
-		if (cpu < 0 || cpu >= mp_ncpus) {
+		if (cpu < 0 || cpu >= (int) pmc_cpu_max()) {
 			error = EINVAL;
 			break;
 		}
 
-		if (pmc_cpu_is_disabled(cpu)) {
+		if (!pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
@@ -2985,7 +2985,7 @@
 
 		if ((mode != PMC_MODE_SS  &&  mode != PMC_MODE_SC  &&
 		     mode != PMC_MODE_TS  &&  mode != PMC_MODE_TC) ||
-		    (cpu != (u_int) PMC_CPU_ANY && cpu >= (u_int) mp_ncpus)) {
+		    (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
 			error = EINVAL;
 			break;
 		}
@@ -3002,10 +3002,10 @@
 		}
 
 		/*
-		 * Check that a disabled CPU is not being asked for.
+		 * Check that an inactive CPU is not being asked for.
 		 */
 
-		if (PMC_IS_SYSTEM_MODE(mode) && pmc_cpu_is_disabled(cpu)) {
+		if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) {
 			error = ENXIO;
 			break;
 		}
@@ -3518,7 +3518,7 @@
 			cpu = PMC_TO_CPU(pm);
 			ri  = PMC_TO_ROWINDEX(pm);
 
-			if (pmc_cpu_is_disabled(cpu)) {
+			if (!pmc_cpu_is_active(cpu)) {
 				error = ENXIO;
 				break;
 			}
@@ -4288,6 +4288,7 @@
 pmc_initialize(void)
 {
 	int cpu, error, n;
+	unsigned int maxcpu;
 	struct pmc_binding pb;
 	struct pmc_sample *ps;
 	struct pmc_samplebuffer *sb;
@@ -4345,21 +4346,38 @@
 	if (md == NULL || md->pmd_init == NULL)
 		return ENOSYS;
 
+	maxcpu = pmc_cpu_max();
+
 	/* allocate space for the per-cpu array */
-	MALLOC(pmc_pcpu, struct pmc_cpu **, mp_ncpus * sizeof(struct pmc_cpu *),
+	MALLOC(pmc_pcpu, struct pmc_cpu **, maxcpu * sizeof(struct pmc_cpu *),
 	    M_PMC, M_WAITOK|M_ZERO);
 
 	/* per-cpu 'saved values' for managing process-mode PMCs */
 	MALLOC(pmc_pcpu_saved, pmc_value_t *,
-	    sizeof(pmc_value_t) * mp_ncpus * md->pmd_npmc, M_PMC, M_WAITOK);
+	    sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK);
 
-	/* perform cpu dependent initialization */
+	/*
+	 * Perform MD layer initialization.  This initialization has
+	 * two parts:
+	 *
+	 * - Initialization that is needed irrespective of whether a
+	 *   CPU is active or not.
+	 * - Initialization required for active CPUs.
+	 */
 	pmc_save_cpu_binding(&pb);
-	for (cpu = 0; cpu < mp_ncpus; cpu++) {
-		if (pmc_cpu_is_disabled(cpu))
+	for (cpu = 0; cpu < maxcpu; cpu++) {
+		if (md->pmd_init != NULL &&
+		    (error = md->pmd_init(cpu)) != 0)
+			break;
+		/*
+		 * Next, we call the MD initialization code for
+		 * currently `active' CPUs; the MD code can expect to
+		 * run on the CPU it is initializing.
+		 */
+		if (!pmc_cpu_is_active(cpu) || md->pmd_cpu_init == NULL)
 			continue;
 		pmc_select_cpu(cpu);
-		if ((error = md->pmd_init(cpu)) != 0)
+		if ((error = md->pmd_cpu_init(cpu)) != 0)
 			break;
 	}
 	pmc_restore_cpu_binding(&pb);
@@ -4368,9 +4386,7 @@
 		return error;
 
 	/* allocate space for the sample array */
-	for (cpu = 0; cpu < mp_ncpus; cpu++) {
-		if (pmc_cpu_is_disabled(cpu))
-			continue;
+	for (cpu = 0; cpu < maxcpu; cpu++) {
 		MALLOC(sb, struct pmc_samplebuffer *,
 		    sizeof(struct pmc_samplebuffer) +
 		    pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
@@ -4459,6 +4475,7 @@
 pmc_cleanup(void)
 {
 	int cpu;
+	unsigned int maxcpu;
 	struct pmc_ownerhash *ph;
 	struct pmc_owner *po, *tmp;
 	struct pmc_binding pb;
@@ -4539,9 +4556,8 @@
 	    ("[pmc,%d] Global SS count not empty", __LINE__));
 
 	/* free the per-cpu sample buffers */
-	for (cpu = 0; cpu < mp_ncpus; cpu++) {
-		if (pmc_cpu_is_disabled(cpu))
-			continue;
+	maxcpu = pmc_cpu_max();
+	for (cpu = 0; cpu < maxcpu; cpu++) {
 		KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
 		    ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
 			cpu));
@@ -4554,14 +4570,19 @@
 	PMCDBG(MOD,INI,3, "%s", "md cleanup");
 	if (md) {
 		pmc_save_cpu_binding(&pb);
-		for (cpu = 0; cpu < mp_ncpus; cpu++) {
+		for (cpu = 0; cpu < maxcpu; cpu++) {
 			PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p",
 			    cpu, pmc_pcpu[cpu]);
-			if (pmc_cpu_is_disabled(cpu))
+			if (pmc_pcpu[cpu] == NULL)
 				continue;
-			pmc_select_cpu(cpu);
-			if (pmc_pcpu[cpu])
-				(void) md->pmd_cleanup(cpu);
+			if (pmc_cpu_is_active(cpu) &&
+			    md->pmd_cpu_cleanup != NULL) {
+				pmc_select_cpu(cpu);
+				(void) md->pmd_cpu_cleanup(cpu);
+			}
+			/* Do cleanup for inactive CPUs if any. */
+			if (md->pmd_cleanup)
+				md->pmd_cleanup(cpu);
 		}
 		FREE(md, M_PMC);
 		md = NULL;
@@ -4602,8 +4623,8 @@
 		error = pmc_initialize();
 		if (error != 0)
 			break;
-		PMCDBG(MOD,INI,1, "syscall=%d ncpus=%d",
-		    pmc_syscall_num, mp_ncpus);
+		PMCDBG(MOD,INI,1, "syscall=%d maxcpu=%d",
+		    pmc_syscall_num, pmc_cpu_max());
 		break;
 
 
Index: sys/dev/hwpmc/hwpmc_piv.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/dev/hwpmc/hwpmc_piv.c,v
retrieving revision 1.15
diff -u -r1.15 hwpmc_piv.c
--- sys/dev/hwpmc/hwpmc_piv.c	7 Dec 2007 08:20:15 -0000	1.15
+++ sys/dev/hwpmc/hwpmc_piv.c	16 Mar 2008 11:17:53 -0000
@@ -532,8 +532,8 @@
 	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
 		    __LINE__));						\
 	atomic_add_int(&p4_escrdisp[(E)], -1);				\
-	KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row "	\
-		"disposition error", __LINE__));			\
+	KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_active_count()), 		\
+		("[p4,%d] row disposition error", __LINE__));		\
 } while (0)
 
 #define	P4_ESCR_UNMARK_ROW_STANDALONE(E) do {				\
@@ -596,11 +596,11 @@
 	struct p4_logicalcpu *plcs;
 	struct pmc_hw *phw;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] insane cpu number %d", __LINE__, cpu));
 
-	PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu,
-	    pmc_cpu_is_logical(cpu) != 0);
+	PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
+	    pmc_cpu_is_primary(cpu) != 0);
 
 	/*
 	 * The two CPUs in an HT pair share their per-cpu state.
@@ -614,7 +614,7 @@
 	 * secondary.
 	 */
 
-	if (pmc_cpu_is_logical(cpu) && (cpu & 1)) {
+	if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
 
 		p4_system_has_htt = 1;
 
@@ -677,9 +677,20 @@
  */
 
 static int
-p4_cleanup(int cpu)
+p4_cpu_cleanup(int cpu)
 {
 	int i;
+
+	/* Turn off all PMCs on a primary CPU */
+	if (!P4_CPU_IS_HTT_SECONDARY(cpu))
+		for (i = 0; i < P4_NPMCS - 1; i++)
+			wrmsr(P4_CCCR_MSR_FIRST + i, 0);
+	return (0);
+}
+
+static int
+p4_cleanup(int cpu)
+{
 	struct p4_cpu *pcs;
 
 	PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
@@ -687,11 +698,6 @@
 	if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
 		return 0;
 
-	/* Turn off all PMCs on this CPU */
-	for (i = 0; i < P4_NPMCS - 1; i++)
-		wrmsr(P4_CCCR_MSR_FIRST + i,
-		    rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
-
 	/*
 	 * If the CPU is physical we need to teardown the
 	 * full MD state.
@@ -761,7 +767,7 @@
 	struct pmc_hw *phw;
 	pmc_value_t tmp;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
@@ -839,7 +845,7 @@
 	const struct pmc_hw *phw;
 	const struct p4pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
@@ -913,7 +919,7 @@
 	struct p4_cpu *pc;
 	int cfgflags, cpuflag;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
@@ -1050,7 +1056,7 @@
 	struct p4_event_descr *pevent;
 	const struct p4pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] illegal row-index value %d", __LINE__, ri));
@@ -1297,7 +1303,7 @@
 	struct pmc_hw *phw;
 	struct p4pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
@@ -1449,7 +1455,7 @@
 	struct p4pmc_descr *pd;
 	pmc_value_t tmp;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] illegal row index %d", __LINE__, ri));
@@ -1722,7 +1728,7 @@
 	struct pmc_hw *phw;
 	const struct p4pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P4_NPMCS,
 	    ("[p4,%d] row-index %d out of range", __LINE__, ri));
@@ -1791,6 +1797,8 @@
 
 		pmc_mdep->pmd_init    	    = p4_init;
 		pmc_mdep->pmd_cleanup 	    = p4_cleanup;
+		pmc_mdep->pmd_cpu_init 	    = NULL;
+		pmc_mdep->pmd_cpu_cleanup   = p4_cpu_cleanup;
 		pmc_mdep->pmd_switch_in     = p4_switch_in;
 		pmc_mdep->pmd_switch_out    = p4_switch_out;
 		pmc_mdep->pmd_read_pmc 	    = p4_read_pmc;
Index: sys/dev/hwpmc/hwpmc_ppro.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/dev/hwpmc/hwpmc_ppro.c,v
retrieving revision 1.10
diff -u -r1.10 hwpmc_ppro.c
--- sys/dev/hwpmc/hwpmc_ppro.c	7 Dec 2007 08:20:15 -0000	1.10
+++ sys/dev/hwpmc/hwpmc_ppro.c	16 Mar 2008 10:48:06 -0000
@@ -336,7 +336,7 @@
 	struct p6_cpu *pcs;
 	struct pmc_hw *phw;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] bad cpu %d", __LINE__, cpu));
 
 	PMCDBG(MDP,INI,0,"p6-init cpu=%d", cpu);
@@ -366,7 +366,7 @@
 {
 	struct pmc_cpu *pcs;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] bad cpu %d", __LINE__, cpu));
 
 	PMCDBG(MDP,INI,0,"p6-cleanup cpu=%d", cpu);
@@ -379,6 +379,21 @@
 }
 
 static int
+p6_cpu_cleanup(int cpu)
+{
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
+	    ("[p6,%d] bad cpu %d", __LINE__, cpu));
+
+	PMCDBG(MDP,INI,0,"p6-cpu-cleanup cpu=%d", cpu);
+
+	/* Turn off PMCs. */
+	wrmsr(P6_MSR_EVSEL1, 0);
+	wrmsr(P6_MSR_EVSEL0, 0);
+
+	return (0);
+}
+
+static int
 p6_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
 {
 	(void) pc;
@@ -512,7 +527,7 @@
 
 	(void) cpu;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P6_NPMCS,
 	    ("[p4,%d] illegal row-index value %d", __LINE__, ri));
@@ -616,7 +631,7 @@
 
 	PMCDBG(MDP,REL,1, "p6-release cpu=%d ri=%d pm=%p", cpu, ri, pm);
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P6_NPMCS,
 	    ("[p6,%d] illegal row-index %d", __LINE__, ri));
@@ -638,7 +653,7 @@
 	struct pmc_hw *phw;
 	const struct p6pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] illegal CPU value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P6_NPMCS,
 	    ("[p6,%d] illegal row-index %d", __LINE__, ri));
@@ -682,7 +697,7 @@
 	struct pmc_hw *phw;
 	struct p6pmc_descr *pd;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] illegal cpu value %d", __LINE__, cpu));
 	KASSERT(ri >= 0 && ri < P6_NPMCS,
 	    ("[p6,%d] illegal row index %d", __LINE__, ri));
@@ -724,7 +739,7 @@
 	struct pmc_hw *phw;
 	pmc_value_t v;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[p6,%d] CPU %d out of range", __LINE__, cpu));
 
 	retval = 0;
@@ -847,6 +862,8 @@
 
 		pmc_mdep->pmd_init    	    = p6_init;
 		pmc_mdep->pmd_cleanup 	    = p6_cleanup;
+		pmc_mdep->pmd_cpu_init 	    = NULL;
+		pmc_mdep->pmd_cpu_cleanup   = p6_cpu_cleanup;
 		pmc_mdep->pmd_switch_in     = p6_switch_in;
 		pmc_mdep->pmd_switch_out    = p6_switch_out;
 		pmc_mdep->pmd_read_pmc 	    = p6_read_pmc;
Index: sys/kern/kern_pmc.c
===================================================================
RCS file: /cvs/FreeBSD/src/sys/kern/kern_pmc.c,v
retrieving revision 1.7
diff -u -r1.7 kern_pmc.c
--- sys/kern/kern_pmc.c	7 Dec 2007 08:20:16 -0000	1.7
+++ sys/kern/kern_pmc.c	16 Mar 2008 10:50:12 -0000
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2003-2008 Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
  * All rights reserved.
  *
@@ -81,25 +81,95 @@
 SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
 
 /*
- * Helper functions
+ * Helper functions.
  */
 
+/*
+ * A note on the CPU numbering scheme used by HWPMC.
+ *
+ * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1].
+ * CPUs could be numbered "sparsely" in this range; the pmc_cpu_is_present()
+ * predicate is used to test whether a given CPU exists.  This is a
+ * runtime test in order to support hot-pluggable CPUs.
+ *
+ * A physically present CPU may be administratively disabled or
+ * otherwise unavailable for use by HWPMC.  The pmc_cpu_is_active()
+ * predicate tests for CPU usability.
+ *
+ * On systems with hyperthreaded CPUs, multiple ``CPU''s may share PMC
+ * hardware resources.  For such processors one ``CPU'' is denoted as
+ * the primary owner of the in-CPU PMC resources. The pmc_cpu_is_primary()
+ * predicate is used to distinguish this primary CPU from the others.
+ */
+
+/*
+ * An `active' CPU is one which can be used for PMC operations.  It
+ * should be participating in thread scheduling and should be able to
+ * field interrupts raised by PMC hardware.
+ */
+
+int
+pmc_cpu_is_active(int cpu)
+{
+#ifdef	SMP
+	return (pmc_cpu_is_present(cpu) &&
+	    (hlt_cpus_mask & (1 << cpu)) == 0);
+#else
+	return (1);
+#endif
+}
+
 int
-pmc_cpu_is_disabled(int cpu)
+pmc_cpu_is_present(int cpu)
 {
 #ifdef	SMP
-	return ((hlt_cpus_mask & (1 << cpu)) != 0);
+	return (!CPU_ABSENT(cpu));
 #else
-	return 0;
+	return (1);
 #endif
 }
 
 int
-pmc_cpu_is_logical(int cpu)
+pmc_cpu_is_primary(int cpu)
 {
 #ifdef	SMP
-	return ((logical_cpus_mask & (1 << cpu)) != 0);
+	return ((logical_cpus_mask & (1 << cpu)) == 0);
 #else
-	return 0;
+	return (1);
 #endif
 }
+
+
+/*
+ * Return the maximum CPU number supported by the system.  The return
+ * value is used for scaling internal data structures and for runtime
+ * checks.
+ */
+
+unsigned int
+pmc_cpu_max(void)
+{
+#ifdef	SMP
+	return (mp_maxid+1);
+#else
+	return (1);
+#endif
+}
+
+#ifdef	INVARIANTS
+
+/*
+ * Return the count of CPUs in the `active' state in the system.
+ */
+
+int
+pmc_cpu_active_count(void)
+{
+#ifdef	SMP
+	return (mp_ncpus); /* To be changed along with the base kernel. */
+#else
+	return (1);
+#endif
+}
+
+#endif
Index: sys/sys/pmc.h
===================================================================
RCS file: /cvs/FreeBSD/src/sys/sys/pmc.h,v
retrieving revision 1.14
diff -u -r1.14 pmc.h
--- sys/sys/pmc.h	14 Jan 2008 06:33:41 -0000	1.14
+++ sys/sys/pmc.h	16 Mar 2008 10:25:35 -0000
@@ -871,6 +871,8 @@
 
 	int (*pmd_init)(int _cpu);    /* machine dependent initialization */
 	int (*pmd_cleanup)(int _cpu); /* machine dependent cleanup  */
+	int (*pmd_cpu_init)(int _cpu); /* initialization for active CPUs */
+	int (*pmd_cpu_cleanup)(int _cpu); /* cleanup for active CPUs  */
 
 	/* thread context switch in/out */
 	int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp);
Index: sys/sys/pmckern.h
===================================================================
RCS file: /cvs/FreeBSD/src/sys/sys/pmckern.h,v
retrieving revision 1.7
diff -u -r1.7 pmckern.h
--- sys/sys/pmckern.h	7 Dec 2007 08:20:17 -0000	1.7
+++ sys/sys/pmckern.h	16 Mar 2008 09:14:06 -0000
@@ -124,8 +124,17 @@
 /* Check if a CPU has recorded samples. */
 #define	PMC_CPU_HAS_SAMPLES(C)	(__predict_false(pmc_cpumask & (1 << (C))))
 
-/* helper functions */
-int	pmc_cpu_is_disabled(int _cpu);
-int	pmc_cpu_is_logical(int _cpu);
+/*
+ * Helper functions.
+ */
+
+int	pmc_cpu_is_active(int _cpu);
+int	pmc_cpu_is_present(int _cpu);
+int	pmc_cpu_is_primary(int _cpu);
+unsigned int pmc_cpu_max(void);
+
+#ifdef	INVARIANTS
+int	pmc_cpu_active_count(void);
+#endif	/* INVARIANTS */
 
 #endif /* _SYS_PMCKERN_H_ */
-------------- next part --------------



More information about the freebsd-arch mailing list