svn commit: r333690 - in head/sys: dev/hwpmc kern sys

Matt Macy mmacy at FreeBSD.org
Wed May 16 22:29:22 UTC 2018


Author: mmacy
Date: Wed May 16 22:29:20 2018
New Revision: 333690
URL: https://svnweb.freebsd.org/changeset/base/333690

Log:
  hwpmc: Implement per-thread counters for PMC sampling
  
  This implements per-thread counters for PMC sampling. The thread
  descriptors are stored in a list attached to the process descriptor.
  These thread descriptors can store any per-thread information necessary
  for current or future features. For the moment, they just store the counters
  for sampling.
  
  The thread descriptors are created when the process descriptor is created.
  Additionally, thread descriptors are created or freed when threads
  are started or stopped. Because the thread exit function is called in a
  critical section, we can't directly free the thread descriptors. Hence,
  they are freed to a cache, which is also used as a source of allocations
  when needed for new threads.
  
  Approved by:	sbruno
  Obtained from:	jtl
  Sponsored by:	Juniper Networks, Limelight Networks
  Differential Revision:	https://reviews.freebsd.org/D15335

Modified:
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/kern/kern_thr.c
  head/sys/kern/kern_thread.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c	Wed May 16 22:25:47 2018	(r333689)
+++ head/sys/dev/hwpmc/hwpmc_mod.c	Wed May 16 22:29:20 2018	(r333690)
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
+#include <sys/gtaskqueue.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
@@ -92,6 +93,7 @@ enum pmc_flags {
 	PMC_FLAG_NONE	  = 0x00, /* do nothing */
 	PMC_FLAG_REMOVE   = 0x01, /* atomically remove entry from hash */
 	PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */
+	PMC_FLAG_NOWAIT   = 0x04, /* do not wait for mallocs */
 };
 
 /*
@@ -175,8 +177,22 @@ static LIST_HEAD(pmc_ownerhash, pmc_owner)	*pmc_ownerh
 
 static LIST_HEAD(, pmc_owner)			pmc_ss_owners;
 
+/*
+ * List of free thread entries. This is protected by the spin
+ * mutex.
+ */
+static struct mtx pmc_threadfreelist_mtx;	/* spin mutex */
+static LIST_HEAD(, pmc_thread)			pmc_threadfreelist;
+static int pmc_threadfreelist_entries=0;
+#define	THREADENTRY_SIZE						\
+(sizeof(struct pmc_thread) + (md->pmd_npmc * sizeof(struct pmc_threadpmcstate)))
 
 /*
+ * Task to free thread descriptors
+ */
+static struct grouptask free_gtask;
+
+/*
  * A map of row indices to classdep structures.
  */
 static struct pmc_classdep **pmc_rowindex_to_classdep;
@@ -191,6 +207,8 @@ static int	pmc_debugflags_parse(char *newstr, char *fe
 #endif
 
 static int	load(struct module *module, int cmd, void *arg);
+static void	pmc_add_thread_descriptors_from_proc(struct proc *p,
+    struct pmc_process *pp);
 static int	pmc_attach_process(struct proc *p, struct pmc *pm);
 static struct pmc *pmc_allocate_pmc_descriptor(void);
 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
@@ -205,12 +223,15 @@ static int	pmc_detach_one_process(struct proc *p, stru
     int flags);
 static void	pmc_destroy_owner_descriptor(struct pmc_owner *po);
 static void	pmc_destroy_pmc_descriptor(struct pmc *pm);
+static void	pmc_destroy_process_descriptor(struct pmc_process *pp);
 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
 static int	pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
     pmc_id_t pmc);
 static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
     uint32_t mode);
+static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp,
+    struct thread *td, uint32_t mode);
 static void	pmc_force_context_switch(void);
 static void	pmc_link_target_process(struct pmc *pm,
     struct pmc_process *pp);
@@ -225,6 +246,8 @@ static void	pmc_process_fork(void *arg, struct proc *p
     struct proc *p2, int n);
 static void	pmc_process_samples(int cpu, int soft);
 static void	pmc_release_pmc_descriptor(struct pmc *pmc);
+static void	pmc_process_thread_add(struct thread *td);
+static void	pmc_process_thread_delete(struct thread *td);
 static void	pmc_remove_owner(struct pmc_owner *po);
 static void	pmc_remove_process_descriptor(struct pmc_process *pp);
 static void	pmc_restore_cpu_binding(struct pmc_binding *pb);
@@ -233,6 +256,9 @@ static void	pmc_select_cpu(int cpu);
 static int	pmc_start(struct pmc *pm);
 static int	pmc_stop(struct pmc *pm);
 static int	pmc_syscall_handler(struct thread *td, void *syscall_args);
+static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void);
+static void	pmc_thread_descriptor_pool_drain(void);
+static void	pmc_thread_descriptor_pool_free(struct pmc_thread *pt);
 static void	pmc_unlink_target_process(struct pmc *pmc,
     struct pmc_process *pp);
 static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp);
@@ -312,6 +338,24 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG
 
 
 /*
+ * kern.hwpmc.threadfreelist_entries -- number of free entries
+ */
+
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD,
+    &pmc_threadfreelist_entries, 0, "number of avalable thread entries");
+
+
+/*
+ * kern.hwpmc.threadfreelist_max -- maximum number of free entries
+ */
+
+static int pmc_threadfreelist_max = PMC_THREADLIST_MAX;
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW,
+    &pmc_threadfreelist_max, 0,
+    "maximum number of available thread entries before freeing some");
+
+
+/*
  * security.bsd.unprivileged_syspmcs -- allow non-root processes to
  * allocate system-wide PMCs.
  *
@@ -835,6 +879,9 @@ pmc_link_target_process(struct pmc *pm, struct pmc_pro
 {
 	int ri;
 	struct pmc_target *pt;
+#ifdef INVARIANTS
+	struct pmc_thread *pt_td;
+#endif
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
@@ -878,6 +925,18 @@ pmc_link_target_process(struct pmc *pm, struct pmc_pro
 
 	pp->pp_refcnt++;
 
+#ifdef INVARIANTS
+	/* Confirm that the per-thread values at this row index are cleared. */
+	if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
+		mtx_lock_spin(pp->pp_tdslock);
+		LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) {
+			KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0,
+			    ("[pmc,%d] pt_pmcval not cleared for pid=%d at "
+			    "ri=%d", __LINE__, pp->pp_proc->p_pid, ri));
+		}
+		mtx_unlock_spin(pp->pp_tdslock);
+	}
+#endif
 }
 
 /*
@@ -890,6 +949,7 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_p
 	int ri;
 	struct proc *p;
 	struct pmc_target *ptgt;
+	struct pmc_thread *pt;
 
 	sx_assert(&pmc_sx, SX_XLOCKED);
 
@@ -912,6 +972,14 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_p
 	pp->pp_pmcs[ri].pp_pmc = NULL;
 	pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
 
+	/* Clear the per-thread values at this row index. */
+	if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
+		mtx_lock_spin(pp->pp_tdslock);
+		LIST_FOREACH(pt, &pp->pp_tds, pt_next)
+			pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t) 0;
+		mtx_unlock_spin(pp->pp_tdslock);
+	}
+
 	/* Remove owner-specific flags */
 	if (pm->pm_owner->po_owner == pp->pp_proc) {
 		pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
@@ -1005,7 +1073,7 @@ pmc_can_attach(struct pmc *pm, struct proc *t)
 static int
 pmc_attach_one_process(struct proc *p, struct pmc *pm)
 {
-	int ri;
+	int ri, error;
 	char *fullpath, *freepath;
 	struct pmc_process	*pp;
 
@@ -1026,15 +1094,26 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
 	 */
 	ri = PMC_TO_ROWINDEX(pm);
 
-	if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
-		return ENOMEM;
+	/* mark process as using HWPMCs */
+	PROC_LOCK(p);
+	p->p_flag |= P_HWPMC;
+	PROC_UNLOCK(p);
 
-	if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */
-		return EEXIST;
+	if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) {
+		error = ENOMEM;
+		goto fail;
+	}
 
-	if (pp->pp_pmcs[ri].pp_pmc != NULL)
-		return EBUSY;
+	if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */
+		error = EEXIST;
+		goto fail;
+	}
 
+	if (pp->pp_pmcs[ri].pp_pmc != NULL) {
+		error = EBUSY;
+		goto fail;
+	}
+
 	pmc_link_target_process(pm, pp);
 
 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
@@ -1056,12 +1135,13 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
 		if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 			pmc_log_process_mappings(pm->pm_owner, p);
 	}
-	/* mark process as using HWPMCs */
+
+	return (0);
+ fail:
 	PROC_LOCK(p);
-	p->p_flag |= P_HWPMC;
+	p->p_flag &= ~P_HWPMC;
 	PROC_UNLOCK(p);
-
-	return 0;
+	return (error);
 }
 
 /*
@@ -1173,7 +1253,7 @@ pmc_detach_one_process(struct proc *p, struct pmc *pm,
 	pmc_remove_process_descriptor(pp);
 
 	if (flags & PMC_FLAG_REMOVE)
-		free(pp, M_PMC);
+		pmc_destroy_process_descriptor(pp);
 
 	PROC_LOCK(p);
 	p->p_flag &= ~P_HWPMC;
@@ -1250,10 +1330,11 @@ pmc_process_csw_in(struct thread *td)
 	struct pmc_hw *phw;
 	pmc_value_t newvalue;
 	struct pmc_process *pp;
+	struct pmc_thread *pt;
 	struct pmc_classdep *pcd;
 
 	p = td->td_proc;
-
+	pt = NULL;
 	if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
 		return;
 
@@ -1312,23 +1393,54 @@ pmc_process_csw_in(struct thread *td)
 		/*
 		 * Write out saved value and start the PMC.
 		 *
-		 * Sampling PMCs use a per-process value, while
+		 * Sampling PMCs use a per-thread value, while
 		 * counting mode PMCs use a per-pmc value that is
 		 * inherited across descendants.
 		 */
 		if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
+			if (pt == NULL)
+				pt = pmc_find_thread_descriptor(pp, td,
+				    PMC_FLAG_NONE);
+
+			KASSERT(pt != NULL,
+			    ("[pmc,%d] No thread found for td=%p", __LINE__,
+			    td));
+
 			mtx_pool_lock_spin(pmc_mtxpool, pm);
 
 			/*
-			 * Use the saved value calculated after the most recent
-			 * thread switch out to start this counter.  Reset
-			 * the saved count in case another thread from this
-			 * process switches in before any threads switch out.
+			 * If we have a thread descriptor, use the per-thread
+			 * counter in the descriptor. If not, we will use
+			 * a per-process counter. 
+			 *
+			 * TODO: Remove the per-process "safety net" once
+			 * we have thoroughly tested that we don't hit the
+			 * above assert.
 			 */
-			newvalue = PMC_PCPU_SAVED(cpu,ri) =
-			    pp->pp_pmcs[ri].pp_pmcval;
-			pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount;
+			if (pt != NULL) {
+				if (pt->pt_pmcs[ri].pt_pmcval > 0)
+					newvalue = pt->pt_pmcs[ri].pt_pmcval;
+				else
+					newvalue = pm->pm_sc.pm_reloadcount;
+			} else {
+				/*
+				 * Use the saved value calculated after the most
+				 * recent time a thread using the shared counter
+				 * switched out. Reset the saved count in case
+				 * another thread from this process switches in
+				 * before any threads switch out.
+				 */
+
+				newvalue = pp->pp_pmcs[ri].pp_pmcval;
+				pp->pp_pmcs[ri].pp_pmcval =
+				    pm->pm_sc.pm_reloadcount;
+			}
 			mtx_pool_unlock_spin(pmc_mtxpool, pm);
+			KASSERT(newvalue > 0 && newvalue <=
+			    pm->pm_sc.pm_reloadcount,
+			    ("[pmc,%d] pmcval outside of expected range cpu=%d "
+			    "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__,
+			    cpu, ri, newvalue, pm->pm_sc.pm_reloadcount));
 		} else {
 			KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
 			    ("[pmc,%d] illegal mode=%d", __LINE__,
@@ -1381,6 +1493,7 @@ pmc_process_csw_out(struct thread *td)
 	pmc_value_t newvalue;
 	unsigned int adjri, ri;
 	struct pmc_process *pp;
+	struct pmc_thread *pt = NULL;
 	struct pmc_classdep *pcd;
 
 
@@ -1476,37 +1589,50 @@ pmc_process_csw_out(struct thread *td)
 			pcd->pcd_read_pmc(cpu, adjri, &newvalue);
 
 			if (mode == PMC_MODE_TS) {
-				PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (samp)",
-				    cpu, ri, PMC_PCPU_SAVED(cpu,ri) - newvalue);
+				PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)",
+				    cpu, ri, newvalue);
 
+				if (pt == NULL)
+					pt = pmc_find_thread_descriptor(pp, td,
+					    PMC_FLAG_NONE);
+
+				KASSERT(pt != NULL,
+				    ("[pmc,%d] No thread found for td=%p",
+				    __LINE__, td));
+
+				mtx_pool_lock_spin(pmc_mtxpool, pm);
+
 				/*
-				 * For sampling process-virtual PMCs,
-				 * newvalue is the number of events to be seen
-				 * until the next sampling interrupt.
-				 * We can just add the events left from this
-				 * invocation to the counter, then adjust
-				 * in case we overflow our range.
+				 * If we have a thread descriptor, save the
+				 * per-thread counter in the descriptor. If not,
+				 * we will update the per-process counter.
 				 *
-				 * (Recall that we reload the counter every
-				 * time we use it.)
+				 * TODO: Remove the per-process "safety net"
+				 * once we have thoroughly tested that we
+				 * don't hit the above assert.
 				 */
-				mtx_pool_lock_spin(pmc_mtxpool, pm);
-
-				pp->pp_pmcs[ri].pp_pmcval += newvalue;
-				if (pp->pp_pmcs[ri].pp_pmcval >
-				    pm->pm_sc.pm_reloadcount)
-					pp->pp_pmcs[ri].pp_pmcval -=
-					    pm->pm_sc.pm_reloadcount;
-				KASSERT(pp->pp_pmcs[ri].pp_pmcval > 0 &&
-				    pp->pp_pmcs[ri].pp_pmcval <=
-				    pm->pm_sc.pm_reloadcount,
-				    ("[pmc,%d] pp_pmcval outside of expected "
-				    "range cpu=%d ri=%d pp_pmcval=%jx "
-				    "pm_reloadcount=%jx", __LINE__, cpu, ri,
-				    pp->pp_pmcs[ri].pp_pmcval,
-				    pm->pm_sc.pm_reloadcount));
+				if (pt != NULL)
+					pt->pt_pmcs[ri].pt_pmcval = newvalue;
+				else {
+					/*
+					 * For sampling process-virtual PMCs,
+					 * newvalue is the number of events to
+					 * be seen until the next sampling
+					 * interrupt. We can just add the events
+					 * left from this invocation to the
+					 * counter, then adjust in case we
+					 * overflow our range.
+					 *
+					 * (Recall that we reload the counter
+					 * every time we use it.)
+					 */
+					pp->pp_pmcs[ri].pp_pmcval += newvalue;
+					if (pp->pp_pmcs[ri].pp_pmcval >
+					    pm->pm_sc.pm_reloadcount)
+						pp->pp_pmcs[ri].pp_pmcval -=
+						    pm->pm_sc.pm_reloadcount;
+				}
 				mtx_pool_unlock_spin(pmc_mtxpool, pm);
-
 			} else {
 				tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
 
@@ -1550,6 +1676,33 @@ pmc_process_csw_out(struct thread *td)
 }
 
 /*
+ * A new thread for a process.
+ */
+static void
+pmc_process_thread_add(struct thread *td)
+{
+	struct pmc_process *pmc;
+
+	pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE);
+	if (pmc != NULL)
+		pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE);
+}
+
+/*
+ * A thread delete for a process.
+ */
+static void
+pmc_process_thread_delete(struct thread *td)
+{
+	struct pmc_process *pmc;
+
+	pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE);
+	if (pmc != NULL)
+		pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc,
+		    td, PMC_FLAG_REMOVE));
+}
+
+/*
  * A mapping change for a process.
  */
 
@@ -1873,13 +2026,16 @@ const char *pmc_hooknames[] = {
 	"MUNMAP",
 	"CALLCHAIN-NMI",
 	"CALLCHAIN-SOFT",
-	"SOFTSAMPLING"
+	"SOFTSAMPLING",
+	"THR-CREATE",
+	"THR-EXIT",
 };
 #endif
 
 static int
 pmc_hook_handler(struct thread *td, int function, void *arg)
 {
+	int cpu;
 
 	PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
 	    pmc_hooknames[function], arg);
@@ -1996,7 +2152,7 @@ pmc_hook_handler(struct thread *td, int function, void
 
 		if (pp->pp_refcnt == 0) {
 			pmc_remove_process_descriptor(pp);
-			free(pp, M_PMC);
+			pmc_destroy_process_descriptor(pp);
 			break;
 		}
 
@@ -2034,8 +2190,9 @@ pmc_hook_handler(struct thread *td, int function, void
 		 * lose the interrupt sample.
 		 */
 		DPCPU_SET(pmc_sampled, 0);
-		pmc_process_samples(PCPU_GET(cpuid), PMC_HR);
-		pmc_process_samples(PCPU_GET(cpuid), PMC_SR);
+		cpu = PCPU_GET(cpuid);
+		pmc_process_samples(cpu, PMC_HR);
+		pmc_process_samples(cpu, PMC_SR);
 		break;
 
 	case PMC_FN_MMAP:
@@ -2078,6 +2235,16 @@ pmc_hook_handler(struct thread *td, int function, void
 		pmc_soft_intr((struct pmckern_soft *) arg);
 		break;
 
+	case PMC_FN_THR_CREATE:
+		pmc_process_thread_add(td);
+		break;
+
+	case PMC_FN_THR_EXIT:
+		KASSERT(td == curthread, ("[pmc,%d] td != curthread",
+		    __LINE__));
+		pmc_process_thread_delete(td);
+		break;
+
 	default:
 #ifdef	HWPMC_DEBUG
 		KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@@ -2129,6 +2296,198 @@ pmc_destroy_owner_descriptor(struct pmc_owner *po)
 }
 
 /*
+ * Allocate a thread descriptor from the free pool.
+ *
+ * NOTE: This *can* return NULL.
+ */
+static struct pmc_thread *
+pmc_thread_descriptor_pool_alloc(void)
+{
+	struct pmc_thread *pt;
+
+	mtx_lock_spin(&pmc_threadfreelist_mtx);
+	if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) {
+		LIST_REMOVE(pt, pt_next);
+		pmc_threadfreelist_entries--;
+	}
+	mtx_unlock_spin(&pmc_threadfreelist_mtx);
+
+	return (pt);
+}
+
+/*
+ * Add a thread descriptor to the free pool. We use this instead of free()
+ * to maintain a cache of free entries. Additionally, we can safely call
+ * this function when we cannot call free(), such as in a critical section.
+ * 
+ */
+static void
+pmc_thread_descriptor_pool_free(struct pmc_thread *pt)
+{
+
+	if (pt == NULL)
+		return;
+
+	memset(pt, 0, THREADENTRY_SIZE);
+	mtx_lock_spin(&pmc_threadfreelist_mtx);
+	LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next);
+	pmc_threadfreelist_entries++;
+	if (pmc_threadfreelist_entries > pmc_threadfreelist_max)
+		GROUPTASK_ENQUEUE(&free_gtask);
+	mtx_unlock_spin(&pmc_threadfreelist_mtx);
+}
+
+/*
+ * A callout to manage the free list.
+ */
+static void
+pmc_thread_descriptor_pool_free_task(void *arg __unused)
+{
+	struct pmc_thread *pt;
+	LIST_HEAD(, pmc_thread) tmplist;
+	int delta;
+
+	LIST_INIT(&tmplist);
+	/* Determine what changes, if any, we need to make. */
+	mtx_lock_spin(&pmc_threadfreelist_mtx);
+	delta = pmc_threadfreelist_entries - pmc_threadfreelist_max;
+	while (delta > 0) {
+		pt = LIST_FIRST(&pmc_threadfreelist);
+		MPASS(pt);
+		LIST_REMOVE(pt, pt_next);
+		LIST_INSERT_HEAD(&tmplist, pt, pt_next);
+	}
+	mtx_unlock_spin(&pmc_threadfreelist_mtx);
+
+	/* If there are entries to free, free them. */
+	while (!LIST_EMPTY(&tmplist)) {
+		pt = LIST_FIRST(&pmc_threadfreelist);
+		LIST_REMOVE(pt, pt_next);
+		free(pt, M_PMC);
+	}
+}
+
+/*
+ * Drain the thread free pool, freeing all allocations.
+ */
+static void
+pmc_thread_descriptor_pool_drain()
+{
+	struct pmc_thread *pt, *next;
+
+	LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) {
+		LIST_REMOVE(pt, pt_next);
+		free(pt, M_PMC);
+	}
+}
+
+/*
+ * find the descriptor corresponding to thread 'td', adding or removing it
+ * as specified by 'mode'.
+ *
+ * Note that this supports additional mode flags in addition to those
+ * supported by pmc_find_process_descriptor():
+ * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs.
+ *     This makes it safe to call while holding certain other locks.
+ */
+
+static struct pmc_thread *
+pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td,
+    uint32_t mode)
+{
+	struct pmc_thread *pt = NULL, *ptnew = NULL;
+	int wait_flag;
+
+	KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__));
+
+	/*
+	 * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to
+	 * acquiring the lock.
+	 */
+	if (mode & PMC_FLAG_ALLOCATE) {
+		if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) {
+			wait_flag = (mode & PMC_FLAG_NOWAIT) ? M_NOWAIT :
+			    M_WAITOK;
+			ptnew = malloc(THREADENTRY_SIZE, M_PMC,
+			    wait_flag|M_ZERO);
+		}
+	}
+
+	mtx_lock_spin(pp->pp_tdslock);
+
+	LIST_FOREACH(pt, &pp->pp_tds, pt_next)
+		if (pt->pt_td == td)
+			break;
+
+	if ((mode & PMC_FLAG_REMOVE) && pt != NULL)
+		LIST_REMOVE(pt, pt_next);
+
+	if ((mode & PMC_FLAG_ALLOCATE) && pt == NULL && ptnew != NULL) {
+		pt = ptnew;
+		ptnew = NULL;
+		pt->pt_td = td;
+		LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next);
+	}
+
+	mtx_unlock_spin(pp->pp_tdslock);
+
+	if (ptnew != NULL) {
+		free(ptnew, M_PMC);
+	}
+
+	return pt;
+}
+
+/*
+ * Try to add thread descriptors for each thread in a process.
+ */
+
+static void
+pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp)
+{
+	struct thread *curtd;
+	struct pmc_thread **tdlist;
+	int i, tdcnt, tdlistsz;
+
+	KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked",
+	    __LINE__));
+	tdcnt = 32;
+ restart:
+	tdlistsz = roundup2(tdcnt, 32);
+
+	tdcnt = 0;
+	tdlist = malloc(sizeof(struct pmc_thread*) * tdlistsz, M_TEMP, M_WAITOK);
+
+	PROC_LOCK(p);
+	FOREACH_THREAD_IN_PROC(p, curtd)
+		tdcnt++;
+	if (tdcnt >= tdlistsz) {
+		PROC_UNLOCK(p);
+		free(tdlist, M_TEMP);
+		goto restart;
+	}
+	/*
+	 * Try to add each thread to the list without sleeping. If unable,
+	 * add to a queue to retry after dropping the process lock.
+	 */
+	tdcnt = 0;
+	FOREACH_THREAD_IN_PROC(p, curtd) {
+		tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd,
+						   PMC_FLAG_ALLOCATE|PMC_FLAG_NOWAIT);
+		if (tdlist[tdcnt] == NULL) {
+			PROC_UNLOCK(p);
+			for (i = 0; i <= tdcnt; i++)
+				pmc_thread_descriptor_pool_free(tdlist[i]);
+			free(tdlist, M_TEMP);
+			goto restart;
+		}
+		tdcnt++;
+	}
+	PROC_UNLOCK(p);
+	free(tdlist, M_TEMP);
+}
+
+/*
  * find the descriptor corresponding to process 'p', adding or removing it
  * as specified by 'mode'.
  */
@@ -2146,7 +2505,7 @@ pmc_find_process_descriptor(struct proc *p, uint32_t m
 	ppnew = NULL;
 
 	/*
-	 * Pre-allocate memory in the FIND_ALLOCATE case since we
+	 * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we
 	 * cannot call malloc(9) once we hold a spin lock.
 	 */
 	if (mode & PMC_FLAG_ALLOCATE)
@@ -2164,13 +2523,20 @@ pmc_find_process_descriptor(struct proc *p, uint32_t m
 	if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL &&
 	    ppnew != NULL) {
 		ppnew->pp_proc = p;
+		LIST_INIT(&ppnew->pp_tds);
+		ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew);
 		LIST_INSERT_HEAD(pph, ppnew, pp_next);
+		mtx_unlock_spin(&pmc_processhash_mtx);
 		pp = ppnew;
 		ppnew = NULL;
+
+		/* Add thread descriptors for this process' current threads. */
+		pmc_add_thread_descriptors_from_proc(p, pp);
 	}
-	mtx_unlock_spin(&pmc_processhash_mtx);
+	else
+		mtx_unlock_spin(&pmc_processhash_mtx);
 
-	if (pp != NULL && ppnew != NULL)
+	if (ppnew != NULL)
 		free(ppnew, M_PMC);
 
 	return pp;
@@ -2192,7 +2558,23 @@ pmc_remove_process_descriptor(struct pmc_process *pp)
 	mtx_unlock_spin(&pmc_processhash_mtx);
 }
 
+/*
+ * destroy a process descriptor.
+ */
 
+static void
+pmc_destroy_process_descriptor(struct pmc_process *pp)
+{
+	struct pmc_thread *pmc_td;
+
+	while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) {
+		LIST_REMOVE(pmc_td, pt_next);
+		pmc_thread_descriptor_pool_free(pmc_td);
+	}
+	free(pp, M_PMC);
+}
+
+
 /*
  * find an owner descriptor corresponding to proc 'p'
  */
@@ -2420,7 +2802,7 @@ pmc_release_pmc_descriptor(struct pmc *pm)
 
 			if (pp->pp_refcnt == 0) {
 				pmc_remove_process_descriptor(pp);
-				free(pp, M_PMC);
+				pmc_destroy_process_descriptor(pp);
 			}
 		}
 
@@ -4582,15 +4964,21 @@ pmc_process_exit(void *arg __unused, struct proc *p)
 				pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 				if (!pm->pm_pcpu_state[cpu].pps_stalled) {
 					(void) pcd->pcd_stop_pmc(cpu, adjri);
-					pcd->pcd_read_pmc(cpu, adjri,
-					    &newvalue);
-					tmp = newvalue -
-					    PMC_PCPU_SAVED(cpu,ri);
 
-					mtx_pool_lock_spin(pmc_mtxpool, pm);
-					pm->pm_gv.pm_savedvalue += tmp;
-					pp->pp_pmcs[ri].pp_pmcval += tmp;
-					mtx_pool_unlock_spin(pmc_mtxpool, pm);
+					if (PMC_TO_MODE(pm) == PMC_MODE_TC) {
+						pcd->pcd_read_pmc(cpu, adjri,
+						    &newvalue);
+						tmp = newvalue -
+						    PMC_PCPU_SAVED(cpu,ri);
+
+						mtx_pool_lock_spin(pmc_mtxpool,
+						    pm);
+						pm->pm_gv.pm_savedvalue += tmp;
+						pp->pp_pmcs[ri].pp_pmcval +=
+						    tmp;
+						mtx_pool_unlock_spin(
+						    pmc_mtxpool, pm);
+					}
 				}
 			}
 
@@ -4700,6 +5088,13 @@ pmc_process_fork(void *arg __unused, struct proc *p1, 
 	if (do_descendants == 0) /* nothing to do */
 		goto done;
 
+	/*
+	 * Now mark the new process as being tracked by this driver.
+	 */
+	PROC_LOCK(newproc);
+	newproc->p_flag |= P_HWPMC;
+	PROC_UNLOCK(newproc);
+
 	/* allocate a descriptor for the new process  */
 	if ((ppnew = pmc_find_process_descriptor(newproc,
 		 PMC_FLAG_ALLOCATE)) == NULL)
@@ -4724,13 +5119,6 @@ pmc_process_fork(void *arg __unused, struct proc *p1, 
 				    newproc->p_pid);
 		}
 
-	/*
-	 * Now mark the new process as being tracked by this driver.
-	 */
-	PROC_LOCK(newproc);
-	newproc->p_flag |= P_HWPMC;
-	PROC_UNLOCK(newproc);
-
  done:
 	sx_xunlock(&pmc_sx);
 }
@@ -5055,6 +5443,16 @@ pmc_initialize(void)
 	    "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
 	    pmc_processhash, pmc_processhashmask);
 
+	/* Initialize a spin mutex for the thread free list. */
+	mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf",
+	    MTX_SPIN);
+
+	/*
+	 * Initialize the callout to monitor the thread free list.
+	 * This callout will also handle the initial population of the list.
+	 */
+	taskqgroup_config_gtask_init(NULL, &free_gtask, pmc_thread_descriptor_pool_free_task, "thread descriptor pool free task");
+
 	/* register process {exit,fork,exec} handlers */
 	pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit,
 	    pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY);
@@ -5152,6 +5550,9 @@ pmc_cleanup(void)
 		}
 
 	/* reclaim allocated data structures */
+	mtx_destroy(&pmc_threadfreelist_mtx);
+	pmc_thread_descriptor_pool_drain();
+
 	if (pmc_mtxpool)
 		mtx_pool_destroy(&pmc_mtxpool);
 

Modified: head/sys/kern/kern_thr.c
==============================================================================
--- head/sys/kern/kern_thr.c	Wed May 16 22:25:47 2018	(r333689)
+++ head/sys/kern/kern_thr.c	Wed May 16 22:29:20 2018	(r333690)
@@ -30,6 +30,7 @@
 __FBSDID("$FreeBSD$");
 
 #include "opt_posix.h"
+#include "opt_hwpmc_hooks.h"
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -55,6 +56,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/rtprio.h>
 #include <sys/umtx.h>
 #include <sys/limits.h>
+#ifdef	HWPMC_HOOKS
+#include <sys/pmckern.h>
+#endif
 
 #include <machine/frame.h>
 
@@ -258,6 +262,10 @@ thread_create(struct thread *td, struct rtprio *rtp,
 		newtd->td_dbgflags |= TDB_BORN;
 
 	PROC_UNLOCK(p);
+#ifdef	HWPMC_HOOKS
+	if (PMC_PROC_IS_USING_PMCS(p))
+		PMC_CALL_HOOK(newtd, PMC_FN_THR_CREATE, NULL);
+#endif
 
 	tidhash_add(newtd);
 

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c	Wed May 16 22:25:47 2018	(r333689)
+++ head/sys/kern/kern_thread.c	Wed May 16 22:29:20 2018	(r333690)
@@ -586,8 +586,10 @@ thread_exit(void)
 	 * If this thread is part of a process that is being tracked by hwpmc(4),
 	 * inform the module of the thread's impending exit.
 	 */
-	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
+	if (PMC_PROC_IS_USING_PMCS(td->td_proc)) {
 		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
+		PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL);
+	}
 #endif
 	PROC_UNLOCK(p);
 	PROC_STATLOCK(p);

Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h	Wed May 16 22:25:47 2018	(r333689)
+++ head/sys/sys/pmc.h	Wed May 16 22:29:20 2018	(r333690)
@@ -647,6 +647,7 @@ struct pmc_op_getdyneventinfo {
 #define	PMC_NLOGBUFFERS_PCPU		8
 #define	PMC_NSAMPLES				64
 #define	PMC_CALLCHAIN_DEPTH			32
+#define	PMC_THREADLIST_MAX			64
 
 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
 
@@ -786,8 +787,27 @@ struct pmc {
 #define	PMC_TO_ROWINDEX(P)	PMC_ID_TO_ROWINDEX((P)->pm_id)
 #define	PMC_TO_CPU(P)		PMC_ID_TO_CPU((P)->pm_id)
 
+/*
+ * struct pmc_threadpmcstate
+ *
+ * Record per-PMC, per-thread state.
+ */
+struct pmc_threadpmcstate {
+	pmc_value_t	pt_pmcval;	/* per-thread reload count */
+};
 
 /*
+ * struct pmc_thread
+ *
+ * Record a 'target' thread being profiled.
+ */
+struct pmc_thread {
+	LIST_ENTRY(pmc_thread) pt_next;		/* linked list */
+	struct thread	*pt_td;			/* target thread */
+	struct pmc_threadpmcstate pt_pmcs[];	/* per-PMC state */
+};
+
+/*
  * struct pmc_process
  *
  * Record a 'target' process being profiled.
@@ -808,9 +828,11 @@ struct pmc_targetstate {
 
 struct pmc_process {
 	LIST_ENTRY(pmc_process) pp_next;	/* hash chain */
+	LIST_HEAD(,pmc_thread) pp_tds;		/* list of threads */
+	struct mtx	*pp_tdslock;		/* lock on pp_tds thread list */
 	int		pp_refcnt;		/* reference count */
 	uint32_t	pp_flags;		/* flags PMC_PP_* */
-	struct proc	*pp_proc;		/* target thread */
+	struct proc	*pp_proc;		/* target process */
 	struct pmc_targetstate pp_pmcs[];       /* NHWPMCs */
 };
 

Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h	Wed May 16 22:25:47 2018	(r333689)
+++ head/sys/sys/pmckern.h	Wed May 16 22:29:20 2018	(r333690)
@@ -60,6 +60,8 @@
 #define	PMC_FN_USER_CALLCHAIN		9
 #define	PMC_FN_USER_CALLCHAIN_SOFT	10
 #define	PMC_FN_SOFT_SAMPLING		11
+#define	PMC_FN_THR_CREATE		12
+#define	PMC_FN_THR_EXIT			13
 
 #define	PMC_HR	0	/* Hardware ring buffer */
 #define	PMC_SR	1	/* Software ring buffer */


More information about the svn-src-head mailing list