svn commit: r333509 - in head/sys: dev/hwpmc kern sys

Matt Macy mmacy at FreeBSD.org
Sat May 12 01:26:36 UTC 2018


Author: mmacy
Date: Sat May 12 01:26:34 2018
New Revision: 333509
URL: https://svnweb.freebsd.org/changeset/base/333509

Log:
  hwpmc(9): Make pmclog buffer pcpu and update constants
  
  On non-trivial SMP systems the contention on the pmc_owner mutex leads
  to a substantial number of samples captured being from the pmc process
  itself. This change a) makes buffers larger to avoid contention on the
  global list b) makes the working sample buffer per cpu.
  
  Run pmcstat in the background (default event rate of 64k):
  pmcstat -S UNHALTED_CORE_CYCLES -O /dev/null sleep 600 &
  
  Before:
  make -j96 buildkernel -s >&/dev/null 3336.68s user 24684.10s system 7442% cpu 6:16.50 total
  
  After:
  make -j96 buildkernel -s >&/dev/null 2697.82s user 1347.35s system 6058% cpu 1:06.77 total
  
  For more realistic overhead measurement set the sample rate for ~2khz
  on a 2.1Ghz processor:
  pmcstat -n 1050000 -S UNHALTED_CORE_CYCLES -O /dev/null sleep 6000 &
  
  Collecting 10 samples of `make -j96 buildkernel` from each:
  
  x before
  + after
  
  real time:
      N           Min           Max        Median           Avg        Stddev
  x  10          76.4        127.62        84.845        88.577     15.100031
  +  10         59.71         60.79        60.135        60.179    0.29957192
  Difference at 95.0% confidence
          -28.398 +/- 10.0344
          -32.0602% +/- 7.69825%
          (Student's t, pooled s = 10.6794)
  
  system time:
      N           Min           Max        Median           Avg        Stddev
  x  10       2277.96       6948.53       2949.47      3341.492     1385.2677
  +  10        1038.7       1081.06      1070.555      1064.017      15.85404
  Difference at 95.0% confidence
          -2277.47 +/- 920.425
          -68.1574% +/- 8.77623%
          (Student's t, pooled s = 979.596)
  
  x no pmc
  + pmc running
  real time:
  
  HEAD:
      N           Min           Max        Median           Avg        Stddev
  x  10         58.38         59.15         58.86        58.847    0.22504567
  +  10          76.4        127.62        84.845        88.577     15.100031
  Difference at 95.0% confidence
          29.73 +/- 10.0335
          50.5208% +/- 17.0525%
          (Student's t, pooled s = 10.6785)
  
  patched:
      N           Min           Max        Median           Avg        Stddev
  x  10         58.38         59.15         58.86        58.847    0.22504567
  +  10         59.71         60.79        60.135        60.179    0.29957192
  Difference at 95.0% confidence
          1.332 +/- 0.248939
          2.2635% +/- 0.426506%
          (Student's t, pooled s = 0.264942)
  
  system time:
  
  HEAD:
      N           Min           Max        Median           Avg        Stddev
  x  10       1010.15       1073.31      1025.465      1031.524     18.135705
  +  10       2277.96       6948.53       2949.47      3341.492     1385.2677
  Difference at 95.0% confidence
          2309.97 +/- 920.443
          223.937% +/- 89.3039%
          (Student's t, pooled s = 979.616)
  
  patched:
      N           Min           Max        Median           Avg        Stddev
  x  10       1010.15       1073.31      1025.465      1031.524     18.135705
  +  10        1038.7       1081.06      1070.555      1064.017      15.85404
  Difference at 95.0% confidence
          32.493 +/- 16.0042
          3.15% +/- 1.5794%
          (Student's t, pooled s = 17.0331)
  
  Reviewed by:	jeff@
  Approved by:	sbruno@
  Differential Revision:	https://reviews.freebsd.org/D15155

Modified:
  head/sys/dev/hwpmc/hwpmc_amd.c
  head/sys/dev/hwpmc/hwpmc_core.c
  head/sys/dev/hwpmc/hwpmc_e500.c
  head/sys/dev/hwpmc/hwpmc_intel.c
  head/sys/dev/hwpmc/hwpmc_logging.c
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/dev/hwpmc/hwpmc_mpc7xxx.c
  head/sys/dev/hwpmc/hwpmc_piv.c
  head/sys/dev/hwpmc/hwpmc_ppc970.c
  head/sys/dev/hwpmc/hwpmc_ppro.c
  head/sys/dev/hwpmc/hwpmc_soft.c
  head/sys/kern/kern_pmc.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h

Modified: head/sys/dev/hwpmc/hwpmc_amd.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_amd.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_amd.c	Sat May 12 01:26:34 2018	(r333509)
@@ -694,8 +694,10 @@ amd_intr(int cpu, struct trapframe *tf)
 			wrmsr(evsel, config);
 	}
 
-	atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
-	    &pmc_stats.pm_intr_ignored, 1);
+	if (retval)
+		counter_u64_add(pmc_stats.pm_intr_processed, 1);
+	else
+		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
 	PMCDBG1(MDP,INT,2, "retval=%d", retval);
 	return (retval);

Modified: head/sys/dev/hwpmc/hwpmc_core.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_core.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_core.c	Sat May 12 01:26:34 2018	(r333509)
@@ -2831,8 +2831,10 @@ core_intr(int cpu, struct trapframe *tf)
 	if (found_interrupt)
 		lapic_reenable_pmc();
 
-	atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed :
-	    &pmc_stats.pm_intr_ignored, 1);
+	if (found_interrupt)
+		counter_u64_add(pmc_stats.pm_intr_processed, 1);
+	else
+		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
 	return (found_interrupt);
 }
@@ -2896,6 +2898,7 @@ core2_intr(int cpu, struct trapframe *tf)
 
 		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
 		    TRAPF_USERMODE(tf));
+
 		if (error)
 			intrenable &= ~flag;
 
@@ -2955,8 +2958,10 @@ core2_intr(int cpu, struct trapframe *tf)
 	if (found_interrupt)
 		lapic_reenable_pmc();
 
-	atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed :
-	    &pmc_stats.pm_intr_ignored, 1);
+	if (found_interrupt)
+		counter_u64_add(pmc_stats.pm_intr_processed, 1);
+	else
+		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
 	return (found_interrupt);
 }

Modified: head/sys/dev/hwpmc/hwpmc_e500.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_e500.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_e500.c	Sat May 12 01:26:34 2018	(r333509)
@@ -616,8 +616,10 @@ e500_intr(int cpu, struct trapframe *tf)
 		e500_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount);
 	}
 
-	atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
-	    &pmc_stats.pm_intr_ignored, 1);
+	if (retval)
+		counter_u64_add(pmc_stats.pm_intr_processed, 1);
+	else
+		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
 	/* Re-enable PERF exceptions. */
 	if (retval)

Modified: head/sys/dev/hwpmc/hwpmc_intel.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_intel.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_intel.c	Sat May 12 01:26:34 2018	(r333509)
@@ -94,6 +94,8 @@ pmc_intel_initialize(void)
 	model = ((cpu_id & 0xF0000) >> 12) | ((cpu_id & 0xF0) >> 4);
 	stepping = cpu_id & 0xF;
 
+	snprintf(pmc_cpuid, sizeof(pmc_cpuid), "GenuineIntel-%d-%02X",
+			 (cpu_id & 0xF00) >> 8, model);
 	switch (cpu_id & 0xF00) {
 #if	defined(__i386__)
 	case 0x500:		/* Pentium family processors */

Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_logging.c	Sat May 12 01:26:34 2018	(r333509)
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2005-2007 Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2018 Matthew Macy
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
@@ -50,7 +51,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/pmckern.h>
 #include <sys/pmclog.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/signalvar.h>
+#include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
@@ -79,31 +82,28 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFL
  * kern.hwpmc.nbuffer -- number of global log buffers
  */
 
-static int pmc_nlogbuffers = PMC_NLOGBUFFERS;
+static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
 #if (__FreeBSD_version < 1100000)
-TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers);
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu);
 #endif
-SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_RDTUN,
-    &pmc_nlogbuffers, 0, "number of global log buffers");
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN,
+    &pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu");
 
 /*
  * Global log buffer list and associated spin lock.
  */
 
-TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist =
-	TAILQ_HEAD_INITIALIZER(pmc_bufferlist);
-static struct mtx pmc_bufferlist_mtx;	/* spin lock */
 static struct mtx pmc_kthread_mtx;	/* sleep lock */
 
-#define	PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do {				\
-		const int __roundup = roundup(sizeof(*D),		\
-			sizeof(uint32_t));				\
-		(D)->plb_fence = ((char *) (D)) +			\
-			 1024*pmclog_buffer_size;			\
-		(D)->plb_base  = (D)->plb_ptr = ((char *) (D)) +	\
-			__roundup;					\
+#define	PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do {						\
+		(D)->plb_fence = ((char *) (buf)) +	1024*pmclog_buffer_size;			\
+		(D)->plb_base  = (D)->plb_ptr = ((char *) (buf));				\
+		(D)->plb_domain = domain; \
 	} while (0)
 
+#define	PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do {			\
+		(D)->plb_ptr  = (D)->plb_base; \
+	} while (0)
 
 /*
  * Log file record constructors.
@@ -114,15 +114,29 @@ static struct mtx pmc_kthread_mtx;	/* sleep lock */
 	 ((L) & 0xFFFF))
 
 /* reserve LEN bytes of space and initialize the entry header */
-#define	_PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do {			\
+#define	_PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do {			\
 		uint32_t *_le;						\
-		int _len = roundup((LEN), sizeof(uint32_t));		\
+		int _len = roundup((LEN), sizeof(uint32_t));	\
 		if ((_le = pmclog_reserve((PO), _len)) == NULL) {	\
 			ACTION;						\
 		}							\
 		*_le = _PMCLOG_TO_HEADER(TYPE,_len);			\
 		_le += 3	/* skip over timestamp */
 
+/* reserve LEN bytes of space and initialize the entry header */
+#define	_PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do {			\
+		uint32_t *_le;						\
+		int _len = roundup((LEN), sizeof(uint32_t));		\
+		spinlock_enter();									\
+		if ((_le = pmclog_reserve((PO), _len)) == NULL) {	\
+			spinlock_exit();								\
+			ACTION;											\
+		}												\
+		*_le = _PMCLOG_TO_HEADER(TYPE,_len);			\
+		_le += 3	/* skip over timestamp */
+
+
+#define	PMCLOG_RESERVE_SAFE(P,T,L)		_PMCLOG_RESERVE_SAFE(P,T,L,return)
 #define	PMCLOG_RESERVE(P,T,L)		_PMCLOG_RESERVE(P,T,L,return)
 #define	PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L,		\
 	error=ENOMEM;goto error)
@@ -138,11 +152,21 @@ static struct mtx pmc_kthread_mtx;	/* sleep lock */
 #define	PMCLOG_EMITSTRING(S,L)	do { bcopy((S), _le, (L)); } while (0)
 #define	PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0)
 
-#define	PMCLOG_DESPATCH(PO)						\
-		pmclog_release((PO));					\
+#define	PMCLOG_DESPATCH_SAFE(PO)						\
+	    pmclog_release((PO));						\
 	} while (0)
 
+#define	PMCLOG_DESPATCH(PO)							\
+	    pmclog_release((PO));						\
+		spinlock_exit();							\
+	} while (0)
 
+#define	PMCLOG_DESPATCH_SYNC(PO)						\
+	    pmclog_schedule_io((PO));						\
+		spinlock_exit();								\
+		} while (0)
+
+
 /*
  * Assertions about the log file format.
  */
@@ -180,8 +204,19 @@ struct pmclog_buffer {
 	char 		*plb_base;
 	char		*plb_ptr;
 	char 		*plb_fence;
-};
+	uint16_t	 plb_domain;
+} __aligned(CACHE_LINE_SIZE);
 
+struct pmc_domain_buffer_header {
+	struct mtx pdbh_mtx;
+	TAILQ_HEAD(, pmclog_buffer) pdbh_head;
+	struct pmclog_buffer *pdbh_plbs;
+	int pdbh_ncpus;
+} __aligned(CACHE_LINE_SIZE);
+
+struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
+
 /*
  * Prototypes
  */
@@ -191,12 +226,28 @@ static void pmclog_loop(void *arg);
 static void pmclog_release(struct pmc_owner *po);
 static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
 static void pmclog_schedule_io(struct pmc_owner *po);
+static void pmclog_schedule_all(struct pmc_owner *po);
 static void pmclog_stop_kthread(struct pmc_owner *po);
 
 /*
  * Helper functions
  */
 
+static inline void
+pmc_plb_rele_unlocked(struct pmclog_buffer *plb)
+{
+	TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next);
+}
+
+static inline void
+pmc_plb_rele(struct pmclog_buffer *plb)
+{
+	mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
+	pmc_plb_rele_unlocked(plb);
+	mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
+}
+
+
 /*
  * Get a log buffer
  */
@@ -205,16 +256,16 @@ static int
 pmclog_get_buffer(struct pmc_owner *po)
 {
 	struct pmclog_buffer *plb;
+	int domain;
 
-	mtx_assert(&po->po_mtx, MA_OWNED);
-
-	KASSERT(po->po_curbuf == NULL,
+	KASSERT(po->po_curbuf[curcpu] == NULL,
 	    ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
 
-	mtx_lock_spin(&pmc_bufferlist_mtx);
-	if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL)
-		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
-	mtx_unlock_spin(&pmc_bufferlist_mtx);
+	domain = PCPU_GET(domain);
+	mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
+	if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL)
+		TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
+	mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
 
 	PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb);
 
@@ -227,12 +278,12 @@ pmclog_get_buffer(struct pmc_owner *po)
 		    plb->plb_base, plb->plb_fence));
 #endif
 
-	po->po_curbuf = plb;
+	po->po_curbuf[curcpu] = plb;
 
 	/* update stats */
-	atomic_add_int(&pmc_stats.pm_buffer_requests, 1);
+	counter_u64_add(pmc_stats.pm_buffer_requests, 1);
 	if (plb == NULL)
-		atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1);
+		counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1);
 
 	return (plb ? 0 : ENOMEM);
 }
@@ -421,12 +472,9 @@ pmclog_loop(void *arg)
 		mtx_lock(&pmc_kthread_mtx);
 
 		/* put the used buffer back into the global pool */
-		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+		PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
 
-		mtx_lock_spin(&pmc_bufferlist_mtx);
-		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
-		mtx_unlock_spin(&pmc_bufferlist_mtx);
-
+		pmc_plb_rele(lb);
 		lb = NULL;
 	}
 
@@ -437,11 +485,9 @@ pmclog_loop(void *arg)
 
 	/* return the current I/O buffer to the global pool */
 	if (lb) {
-		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+		PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
 
-		mtx_lock_spin(&pmc_bufferlist_mtx);
-		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
-		mtx_unlock_spin(&pmc_bufferlist_mtx);
+		pmc_plb_rele(lb);
 	}
 
 	/*
@@ -460,19 +506,20 @@ pmclog_loop(void *arg)
 static void
 pmclog_release(struct pmc_owner *po)
 {
-	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
+	struct pmclog_buffer *plb;
+
+	plb = po->po_curbuf[curcpu];
+	KASSERT(plb->plb_ptr >= plb->plb_base,
 	    ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
-		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
-	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+		po, plb->plb_ptr, plb->plb_base));
+	KASSERT(plb->plb_ptr <= plb->plb_fence,
 	    ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
-		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
+		po, plb->plb_ptr, plb->plb_fence));
 
 	/* schedule an I/O if we've filled a buffer */
-	if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence)
+	if (plb->plb_ptr >= plb->plb_fence)
 		pmclog_schedule_io(po);
 
-	mtx_unlock_spin(&po->po_mtx);
-
 	PMCDBG1(LOG,REL,1, "po=%p", po);
 }
 
@@ -492,36 +539,32 @@ pmclog_reserve(struct pmc_owner *po, int length)
 	uintptr_t newptr, oldptr;
 	uint32_t *lh;
 	struct timespec ts;
+	struct pmclog_buffer *plb, **pplb;
 
 	PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length);
 
 	KASSERT(length % sizeof(uint32_t) == 0,
 	    ("[pmclog,%d] length not a multiple of word size", __LINE__));
 
-	mtx_lock_spin(&po->po_mtx);
-
 	/* No more data when shutdown in progress. */
-	if (po->po_flags & PMC_PO_SHUTDOWN) {
-		mtx_unlock_spin(&po->po_mtx);
+	if (po->po_flags & PMC_PO_SHUTDOWN)
 		return (NULL);
-	}
 
-	if (po->po_curbuf == NULL)
-		if (pmclog_get_buffer(po) != 0) {
-			mtx_unlock_spin(&po->po_mtx);
-			return (NULL);
-		}
+	pplb = &po->po_curbuf[curcpu];
+	if (*pplb == NULL && pmclog_get_buffer(po) != 0)
+		goto fail;
 
-	KASSERT(po->po_curbuf != NULL,
+	KASSERT(*pplb != NULL,
 	    ("[pmclog,%d] po=%p no current buffer", __LINE__, po));
 
-	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base &&
-	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+	plb = *pplb;
+	KASSERT(plb->plb_ptr >= plb->plb_base &&
+	    plb->plb_ptr <= plb->plb_fence,
 	    ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
-		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
-		po->po_curbuf->plb_fence));
+		__LINE__, po, plb->plb_ptr, plb->plb_base,
+		plb->plb_fence));
 
-	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
+	oldptr = (uintptr_t) plb->plb_ptr;
 	newptr = oldptr + length;
 
 	KASSERT(oldptr != (uintptr_t) NULL,
@@ -531,8 +574,8 @@ pmclog_reserve(struct pmc_owner *po, int length)
 	 * If we have space in the current buffer, return a pointer to
 	 * available space with the PO structure locked.
 	 */
-	if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) {
-		po->po_curbuf->plb_ptr = (char *) newptr;
+	if (newptr <= (uintptr_t) plb->plb_fence) {
+		plb->plb_ptr = (char *) newptr;
 		goto done;
 	}
 
@@ -542,24 +585,23 @@ pmclog_reserve(struct pmc_owner *po, int length)
 	 */
 	pmclog_schedule_io(po);
 
-	if (pmclog_get_buffer(po) != 0) {
-		mtx_unlock_spin(&po->po_mtx);
-		return (NULL);
-	}
+	if (pmclog_get_buffer(po) != 0)
+		goto fail;
 
-	KASSERT(po->po_curbuf != NULL,
+	plb = *pplb;
+	KASSERT(plb != NULL,
 	    ("[pmclog,%d] po=%p no current buffer", __LINE__, po));
 
-	KASSERT(po->po_curbuf->plb_ptr != NULL,
+	KASSERT(plb->plb_ptr != NULL,
 	    ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__));
 
-	KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base &&
-	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+	KASSERT(plb->plb_ptr == plb->plb_base &&
+	    plb->plb_ptr <= plb->plb_fence,
 	    ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
-		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
-		po->po_curbuf->plb_fence));
+		__LINE__, po, plb->plb_ptr, plb->plb_base,
+		plb->plb_fence));
 
-	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
+	oldptr = (uintptr_t) plb->plb_ptr;
 
  done:
 	lh = (uint32_t *) oldptr;
@@ -568,6 +610,8 @@ pmclog_reserve(struct pmc_owner *po, int length)
 	*lh++ = ts.tv_sec & 0xFFFFFFFF;
 	*lh++ = ts.tv_nsec & 0xFFFFFFF;
 	return ((uint32_t *) oldptr);
+ fail:
+	return (NULL);
 }
 
 /*
@@ -579,26 +623,28 @@ pmclog_reserve(struct pmc_owner *po, int length)
 static void
 pmclog_schedule_io(struct pmc_owner *po)
 {
-	KASSERT(po->po_curbuf != NULL,
-	    ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po));
+	struct pmclog_buffer *plb;
 
-	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
+	plb = po->po_curbuf[curcpu];
+	po->po_curbuf[curcpu] = NULL;
+	KASSERT(plb != NULL,
+	    ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po));
+	KASSERT(plb->plb_ptr >= plb->plb_base,
 	    ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
-		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
-	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+		po, plb->plb_ptr, plb->plb_base));
+	KASSERT(plb->plb_ptr <= plb->plb_fence,
 	    ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
-		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
+		po, plb->plb_ptr, plb->plb_fence));
 
 	PMCDBG1(LOG,SIO, 1, "po=%p", po);
 
-	mtx_assert(&po->po_mtx, MA_OWNED);
-
 	/*
 	 * Add the current buffer to the tail of the buffer list and
 	 * wakeup the helper.
 	 */
-	TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next);
-	po->po_curbuf = NULL;
+	mtx_lock_spin(&po->po_mtx);
+	TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next);
+	mtx_unlock_spin(&po->po_mtx);
 	wakeup_one(po);
 }
 
@@ -671,7 +717,7 @@ pmclog_configure_log(struct pmc_mdep *md, struct pmc_o
 	    sizeof(struct pmclog_initialize));
 	PMCLOG_EMIT32(PMC_VERSION);
 	PMCLOG_EMIT32(md->pmd_cputype);
-	PMCLOG_DESPATCH(po);
+	PMCLOG_DESPATCH_SYNC(po);
 
 	return (0);
 
@@ -719,19 +765,22 @@ pmclog_deconfigure_log(struct pmc_owner *po)
 	/* return all queued log buffers to the global pool */
 	while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) {
 		TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
-		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
-		mtx_lock_spin(&pmc_bufferlist_mtx);
-		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
-		mtx_unlock_spin(&pmc_bufferlist_mtx);
+		PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
+		pmc_plb_rele(lb);
 	}
-
-	/* return the 'current' buffer to the global pool */
-	if ((lb = po->po_curbuf) != NULL) {
-		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
-		mtx_lock_spin(&pmc_bufferlist_mtx);
-		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
-		mtx_unlock_spin(&pmc_bufferlist_mtx);
+	for (int i = 0; i < mp_ncpus; i++) {
+		thread_lock(curthread);
+		sched_bind(curthread, i);
+		thread_unlock(curthread);
+		/* return the 'current' buffer to the global pool */
+		if ((lb = po->po_curbuf[curcpu]) != NULL) {
+			PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
+			pmc_plb_rele(lb);
+		}
 	}
+	thread_lock(curthread);
+	sched_unbind(curthread);
+	thread_unlock(curthread);
 
 	/* drop a reference to the fd */
 	if (po->po_file != NULL) {
@@ -752,7 +801,6 @@ int
 pmclog_flush(struct pmc_owner *po)
 {
 	int error;
-	struct pmclog_buffer *lb;
 
 	PMCDBG1(LOG,FLS,1, "po=%p", po);
 
@@ -774,23 +822,45 @@ pmclog_flush(struct pmc_owner *po)
 		goto error;
 	}
 
-	/*
-	 * Schedule the current buffer if any and not empty.
-	 */
-	mtx_lock_spin(&po->po_mtx);
-	lb = po->po_curbuf;
-	if (lb && lb->plb_ptr != lb->plb_base) {
-		pmclog_schedule_io(po);
-	} else
-		error = ENOBUFS;
-	mtx_unlock_spin(&po->po_mtx);
-
+	pmclog_schedule_all(po);
  error:
 	mtx_unlock(&pmc_kthread_mtx);
 
 	return (error);
 }
 
+static void
+pmclog_schedule_one_cond(void *arg)
+{
+	struct pmc_owner *po = arg;
+	struct pmclog_buffer *plb;
+
+	spinlock_enter();
+	/* tell hardclock not to run again */
+	DPCPU_SET(pmc_sampled, 0);
+	plb = po->po_curbuf[curcpu];
+	if (plb && plb->plb_ptr != plb->plb_base)
+		pmclog_schedule_io(po);
+	spinlock_exit();
+}
+
+static void
+pmclog_schedule_all(struct pmc_owner *po)
+{
+	/*
+	 * Schedule the current buffer if any and not empty.
+	 */
+	for (int i = 0; i < mp_ncpus; i++) {
+		thread_lock(curthread);
+		sched_bind(curthread, i);
+		thread_unlock(curthread);
+		pmclog_schedule_one_cond(po);
+	}
+	thread_lock(curthread);
+	sched_unbind(curthread);
+	thread_unlock(curthread);
+}
+
 int
 pmclog_close(struct pmc_owner *po)
 {
@@ -804,19 +874,14 @@ pmclog_close(struct pmc_owner *po)
 	/*
 	 * Schedule the current buffer.
 	 */
-	mtx_lock_spin(&po->po_mtx);
-	if (po->po_curbuf)
-		pmclog_schedule_io(po);
-	else
-		wakeup_one(po);
-	mtx_unlock_spin(&po->po_mtx);
+	pmclog_schedule_all(po);
+	wakeup_one(po);
 
 	/*
 	 * Initiate shutdown: no new data queued,
 	 * thread will close file on last block.
 	 */
 	po->po_flags |= PMC_PO_SHUTDOWN;
-
 	mtx_unlock(&pmc_kthread_mtx);
 
 	return (0);
@@ -836,20 +901,20 @@ pmclog_process_callchain(struct pmc *pm, struct pmc_sa
 	    ps->ps_nsamples * sizeof(uintfptr_t);
 	po = pm->pm_owner;
 	flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
-	PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
+	PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen);
 	PMCLOG_EMIT32(ps->ps_pid);
 	PMCLOG_EMIT32(pm->pm_id);
 	PMCLOG_EMIT32(flags);
 	for (n = 0; n < ps->ps_nsamples; n++)
 		PMCLOG_EMITADDR(ps->ps_pc[n]);
-	PMCLOG_DESPATCH(po);
+	PMCLOG_DESPATCH_SAFE(po);
 }
 
 void
 pmclog_process_closelog(struct pmc_owner *po)
 {
 	PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog));
-	PMCLOG_DESPATCH(po);
+	PMCLOG_DESPATCH_SYNC(po);
 }
 
 void
@@ -913,14 +978,14 @@ pmclog_process_pmcallocate(struct pmc *pm)
 		else
 			PMCLOG_EMITNULLSTRING(PMC_NAME_MAX);
 		pmc_soft_ev_release(ps);
-		PMCLOG_DESPATCH(po);
+		PMCLOG_DESPATCH_SYNC(po);
 	} else {
 		PMCLOG_RESERVE(po, PMCALLOCATE,
 		    sizeof(struct pmclog_pmcallocate));
 		PMCLOG_EMIT32(pm->pm_id);
 		PMCLOG_EMIT32(pm->pm_event);
 		PMCLOG_EMIT32(pm->pm_flags);
-		PMCLOG_DESPATCH(po);
+		PMCLOG_DESPATCH_SYNC(po);
 	}
 }
 
@@ -941,7 +1006,7 @@ pmclog_process_pmcattach(struct pmc *pm, pid_t pid, ch
 	PMCLOG_EMIT32(pm->pm_id);
 	PMCLOG_EMIT32(pid);
 	PMCLOG_EMITSTRING(path, pathlen);
-	PMCLOG_DESPATCH(po);
+	PMCLOG_DESPATCH_SYNC(po);
 }
 
 void
@@ -956,7 +1021,7 @@ pmclog_process_pmcdetach(struct pmc *pm, pid_t pid)
 	PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach));
 	PMCLOG_EMIT32(pm->pm_id);
 	PMCLOG_EMIT32(pid);
-	PMCLOG_DESPATCH(po);
+	PMCLOG_DESPATCH_SYNC(po);
 }
 
 /*
@@ -1081,30 +1146,57 @@ pmclog_process_userlog(struct pmc_owner *po, struct pm
 void
 pmclog_initialize()
 {
-	int n;
+	int domain, cpu;
+	struct pcpu *pc;
 	struct pmclog_buffer *plb;
 
-	if (pmclog_buffer_size <= 0) {
+	if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) {
 		(void) printf("hwpmc: tunable logbuffersize=%d must be "
-		    "greater than zero.\n", pmclog_buffer_size);
+					  "greater than zero and less than or equal to 16MB.\n",
+					  pmclog_buffer_size);
 		pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
 	}
 
-	if (pmc_nlogbuffers <= 0) {
+	if (pmc_nlogbuffers_pcpu <= 0) {
 		(void) printf("hwpmc: tunable nlogbuffers=%d must be greater "
-		    "than zero.\n", pmc_nlogbuffers);
-		pmc_nlogbuffers = PMC_NLOGBUFFERS;
+					  "than zero.\n", pmc_nlogbuffers_pcpu);
+		pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
 	}
 
-	/* create global pool of log buffers */
-	for (n = 0; n < pmc_nlogbuffers; n++) {
-		plb = malloc(1024 * pmclog_buffer_size, M_PMC,
-		    M_WAITOK|M_ZERO);
-		PMCLOG_INIT_BUFFER_DESCRIPTOR(plb);
-		TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next);
+	if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) {
+		(void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n",
+					  pmc_nlogbuffers_pcpu*pmclog_buffer_size);
+		pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
+		pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
 	}
-	mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc-leaf",
-	    MTX_SPIN);
+	for (domain = 0; domain < vm_ndomains; domain++) {
+		pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
+										M_WAITOK|M_ZERO);
+		mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
+		TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
+	}
+	CPU_FOREACH(cpu) {
+		if (CPU_ABSENT(cpu))
+			continue;
+		pc = pcpu_find(cpu);
+		domain = pc->pc_domain;
+		pmc_dom_hdrs[domain]->pdbh_ncpus++;
+	}
+	for (domain = 0; domain < vm_ndomains; domain++) {
+		int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus;
+		int total = ncpus*pmc_nlogbuffers_pcpu;
+
+		plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO);
+		pmc_dom_hdrs[domain]->pdbh_plbs = plb;
+		for (int i = 0; i < total; i++, plb++) {
+			void *buf;
+
+			buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain,
+								M_WAITOK|M_ZERO);
+			PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain);
+			pmc_plb_rele_unlocked(plb);
+		}
+	}
 	mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF);
 }
 
@@ -1118,12 +1210,17 @@ void
 pmclog_shutdown()
 {
 	struct pmclog_buffer *plb;
+	int domain;
 
 	mtx_destroy(&pmc_kthread_mtx);
-	mtx_destroy(&pmc_bufferlist_mtx);
 
-	while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) {
-		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
-		free(plb, M_PMC);
+	for (domain = 0; domain < vm_ndomains; domain++) {
+		mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx);
+		while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) {
+			TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
+			free(plb->plb_base, M_PMC);
+		}
+		free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC);
+		free(pmc_dom_hdrs[domain], M_PMC);
 	}
 }

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c	Fri May 11 22:16:23 2018	(r333508)
+++ head/sys/dev/hwpmc/hwpmc_mod.c	Sat May 12 01:26:34 2018	(r333509)
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2003-2008 Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2018 Matthew Macy
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
@@ -138,8 +139,9 @@ static eventhandler_tag	pmc_exit_tag, pmc_fork_tag, pm
     pmc_kld_unload_tag;
 
 /* Module statistics */
-struct pmc_op_getdriverstats pmc_stats;
+struct pmc_driverstats pmc_stats;
 
+
 /* Machine/processor dependent operations */
 static struct pmc_mdep  *md;
 
@@ -235,11 +237,34 @@ static void pmc_generic_cpu_finalize(struct pmc_mdep *
  */
 
 SYSCTL_DECL(_kern_hwpmc);
+SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats");
 
+
+/* Stats. */
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW,
+				   &pmc_stats.pm_intr_ignored, "# of interrupts ignored");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW,
+				   &pmc_stats.pm_intr_processed, "# of interrupts processed");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW,
+				   &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW,
+				   &pmc_stats.pm_syscalls, "# of syscalls");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW,
+				   &pmc_stats.pm_syscall_errors, "# of syscall_errors");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW,
+				   &pmc_stats.pm_buffer_requests, "# of buffer requests");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW,
+				   &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW,
+				   &pmc_stats.pm_log_sweeps, "# of ?");
+
 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
     &pmc_callchaindepth, 0, "depth of call chain records");
 
+char pmc_cpuid[64];
+SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD,
+	pmc_cpuid, 0, "cpu version string");
 #ifdef	HWPMC_DEBUG
 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
 char	pmc_debugstr[PMC_DEBUG_STRSIZE];
@@ -250,6 +275,7 @@ SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
     0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags");
 #endif
 
+
 /*
  * kern.hwpmc.hashrows -- determines the number of rows in the
  * of the hash table used to look up threads
@@ -1260,7 +1286,7 @@ pmc_process_csw_in(struct thread *td)
 			continue;
 
 		/* increment PMC runcount */
-		atomic_add_rel_int(&pm->pm_runcount, 1);
+		counter_u64_add(pm->pm_runcount, 1);
 
 		/* configure the HWPMC we are going to use. */
 		pcd = pmc_ri_to_classdep(md, ri, &adjri);
@@ -1311,10 +1337,10 @@ pmc_process_csw_in(struct thread *td)
 
 		/* If a sampling mode PMC, reset stalled state. */
 		if (PMC_TO_MODE(pm) == PMC_MODE_TS)
-			CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
+			pm->pm_pcpu_state[cpu].pps_stalled = 0;
 
 		/* Indicate that we desire this to run. */
-		CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
+		pm->pm_pcpu_state[cpu].pps_cpustate = 1;
 
 		/* Start the PMC. */
 		pcd->pcd_start_pmc(cpu, adjri);
@@ -1417,12 +1443,12 @@ pmc_process_csw_out(struct thread *td)
 		 * an interrupt re-enables the PMC after this code has
 		 * already checked the pm_stalled flag.
 		 */
-		CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
-		if (!CPU_ISSET(cpu, &pm->pm_stalled))
+		pm->pm_pcpu_state[cpu].pps_cpustate = 0;
+		if (pm->pm_pcpu_state[cpu].pps_stalled == 0)
 			pcd->pcd_stop_pmc(cpu, adjri);
 
 		/* reduce this PMC's runcount */
-		atomic_subtract_rel_int(&pm->pm_runcount, 1);
+		counter_u64_add(pm->pm_runcount, -1);
 
 		/*
 		 * If this PMC is associated with this process,
@@ -1537,7 +1563,7 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
 	/* Inform owners of all system-wide sampling PMCs. */
 	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
-		pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
+			pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
 
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		goto done;
@@ -1993,7 +2019,7 @@ pmc_hook_handler(struct thread *td, int function, void
 		 * had already processed the interrupt).  We don't
 		 * lose the interrupt sample.
 		 */
-		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
+		DPCPU_SET(pmc_sampled, 0);
 		pmc_process_samples(PCPU_GET(cpuid), PMC_HR);
 		pmc_process_samples(PCPU_GET(cpuid), PMC_SR);
 		break;
@@ -2191,7 +2217,8 @@ pmc_allocate_pmc_descriptor(void)
 	struct pmc *pmc;
 
 	pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
-
+	pmc->pm_runcount = counter_u64_alloc(M_WAITOK);
+	pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO);
 	PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
 
 	return pmc;
@@ -2212,10 +2239,12 @@ pmc_destroy_pmc_descriptor(struct pmc *pm)
 	    ("[pmc,%d] destroying pmc with targets", __LINE__));
 	KASSERT(pm->pm_owner == NULL,
 	    ("[pmc,%d] destroying pmc attached to an owner", __LINE__));
-	KASSERT(pm->pm_runcount == 0,
-	    ("[pmc,%d] pmc has non-zero run count %d", __LINE__,
-		pm->pm_runcount));
+	KASSERT(counter_u64_fetch(pm->pm_runcount) == 0,
+	    ("[pmc,%d] pmc has non-zero run count %ld", __LINE__,
+		 (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 
+	counter_u64_free(pm->pm_runcount);
+	free(pm->pm_pcpu_state, M_PMC);
 	free(pm, M_PMC);
 }
 
@@ -2231,13 +2260,13 @@ pmc_wait_for_pmc_idle(struct pmc *pm)
 	 * Loop (with a forced context switch) till the PMC's runcount
 	 * comes down to zero.
 	 */
-	while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
+	while (counter_u64_fetch(pm->pm_runcount) > 0) {
 #ifdef HWPMC_DEBUG
 		maxloop--;
 		KASSERT(maxloop > 0,
-		    ("[pmc,%d] (ri%d, rc%d) waiting too long for "
+		    ("[pmc,%d] (ri%d, rc%ld) waiting too long for "
 			"pmc to be free", __LINE__,
-			PMC_TO_ROWINDEX(pm), pm->pm_runcount));
+			 PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 #endif
 		pmc_force_context_switch();
 	}
@@ -2295,9 +2324,9 @@ pmc_release_pmc_descriptor(struct pmc *pm)
 		pmc_select_cpu(cpu);
 
 		/* switch off non-stalled CPUs */
-		CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
+		pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 		if (pm->pm_state == PMC_STATE_RUNNING &&
-		    !CPU_ISSET(cpu, &pm->pm_stalled)) {
+			pm->pm_pcpu_state[cpu].pps_stalled == 0) {
 
 			phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
 
@@ -2735,10 +2764,10 @@ pmc_start(struct pmc *pm)
 		 pm->pm_sc.pm_initial)) == 0) {
 		/* If a sampling mode PMC, reset stalled state. */
 		if (PMC_IS_SAMPLING_MODE(mode))
-			CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
+			pm->pm_pcpu_state[cpu].pps_stalled = 0;
 
 		/* Indicate that we desire this to run. Start it. */
-		CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
+		pm->pm_pcpu_state[cpu].pps_cpustate = 1;
 		error = pcd->pcd_start_pmc(cpu, adjri);
 	}
 	critical_exit();
@@ -2802,7 +2831,7 @@ pmc_stop(struct pmc *pm)
 	ri = PMC_TO_ROWINDEX(pm);
 	pcd = pmc_ri_to_classdep(md, ri, &adjri);
 
-	CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
+	pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 	critical_enter();
 	if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
 		error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
@@ -2884,7 +2913,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 	    pmc_op_to_name[op], arg);
 
 	error = 0;
-	atomic_add_int(&pmc_stats.pm_syscalls, 1);
+	counter_u64_add(pmc_stats.pm_syscalls, 1);
 
 	switch (op) {
 
@@ -3063,8 +3092,16 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 	case PMC_OP_GETDRIVERSTATS:
 	{
 		struct pmc_op_getdriverstats gms;
-
-		bcopy(&pmc_stats, &gms, sizeof(gms));
+#define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field)
+		CFETCH(gms, pmc_stats, pm_intr_ignored);
+		CFETCH(gms, pmc_stats, pm_intr_processed);
+		CFETCH(gms, pmc_stats, pm_intr_bufferfull);
+		CFETCH(gms, pmc_stats, pm_syscalls);
+		CFETCH(gms, pmc_stats, pm_syscall_errors);
+		CFETCH(gms, pmc_stats, pm_buffer_requests);
+		CFETCH(gms, pmc_stats, pm_buffer_requests_failed);
+		CFETCH(gms, pmc_stats, pm_log_sweeps);
+#undef CFETCH
 		error = copyout(&gms, arg, sizeof(gms));
 	}
 	break;
@@ -4040,7 +4077,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 		sx_xunlock(&pmc_sx);
 done_syscall:
 	if (error)
-		atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
+		counter_u64_add(pmc_stats.pm_syscall_errors, 1);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list