svn commit: r334595 - in head: sys/dev/hwpmc sys/kern sys/sys usr.sbin/pmcstat

Matt Macy mmacy at FreeBSD.org
Mon Jun 4 01:10:26 UTC 2018


Author: mmacy
Date: Mon Jun  4 01:10:23 2018
New Revision: 334595
URL: https://svnweb.freebsd.org/changeset/base/334595

Log:
  hwpmc: support sampling both kernel and user stacks when interrupted in kernel
  
  This adds the -U options to pmcstat which will attribute in-kernel samples
  back to the user stack that invoked the system call. It is not the default,
  because when looking at kernel profiles it is generally more desirable to
  merge all instances of a given system call together.
  
  Although heavily revised, this change is directly derived from D7350 by
  Jonathan T. Looney.
  
  Obtained from: jtl
  Sponsored by: Juniper Networks, Limelight Networks

Modified:
  head/sys/dev/hwpmc/hwpmc_logging.c
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/kern/subr_trap.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h
  head/sys/sys/proc.h
  head/usr.sbin/pmcstat/pmcpl_callgraph.c
  head/usr.sbin/pmcstat/pmcstat.8
  head/usr.sbin/pmcstat/pmcstat.c

Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/dev/hwpmc/hwpmc_logging.c	Mon Jun  4 01:10:23 2018	(r334595)
@@ -250,11 +250,9 @@ pmc_plb_rele(struct pmclog_buffer *plb)
 	mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
 }
 
-
 /*
  * Get a log buffer
  */
-
 static int
 pmclog_get_buffer(struct pmc_owner *po)
 {
@@ -345,7 +343,6 @@ pmclog_proc_ignite(void *handle, struct pmc_owner *po)
  *
  * This function is executed by each pmc owner's helper thread.
  */
-
 static void
 pmclog_loop(void *arg)
 {
@@ -846,12 +843,15 @@ pmclog_schedule_one_cond(void *arg)
 {
 	struct pmc_owner *po = arg;
 	struct pmclog_buffer *plb;
+	int cpu;
 
 	spinlock_enter();
+	cpu = curcpu;
 	/* tell hardclock not to run again */
-	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
+	if (PMC_CPU_HAS_SAMPLES(cpu))
 		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
-	plb = po->po_curbuf[curcpu];
+	pmc_flush_samples(cpu);
+	plb = po->po_curbuf[cpu];
 	if (plb && plb->plb_ptr != plb->plb_base)
 		pmclog_schedule_io(po, 1);
 	spinlock_exit();

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/dev/hwpmc/hwpmc_mod.c	Mon Jun  4 01:10:23 2018	(r334595)
@@ -207,6 +207,8 @@ static int	pmc_debugflags_parse(char *newstr, char *fe
 #endif
 
 static int	load(struct module *module, int cmd, void *arg);
+static int	pmc_add_sample(int cpu, int ring, struct pmc *pm,
+    struct trapframe *tf, int inuserspace);
 static void	pmc_add_thread_descriptors_from_proc(struct proc *p,
     struct pmc_process *pp);
 static int	pmc_attach_process(struct proc *p, struct pmc *pm);
@@ -248,6 +250,7 @@ static void	pmc_process_samples(int cpu, int soft);
 static void	pmc_release_pmc_descriptor(struct pmc *pmc);
 static void	pmc_process_thread_add(struct thread *td);
 static void	pmc_process_thread_delete(struct thread *td);
+static void	pmc_process_thread_userret(struct thread *td);
 static void	pmc_remove_owner(struct pmc_owner *po);
 static void	pmc_remove_process_descriptor(struct pmc_process *pp);
 static void	pmc_restore_cpu_binding(struct pmc_binding *pb);
@@ -265,7 +268,7 @@ static int generic_switch_in(struct pmc_cpu *pc, struc
 static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp);
 static struct pmc_mdep *pmc_generic_cpu_initialize(void);
 static void pmc_generic_cpu_finalize(struct pmc_mdep *md);
-
+static void pmc_post_callchain_callback(void);
 /*
  * Kernel tunables and sysctl(8) interface.
  */
@@ -291,6 +294,10 @@ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer
 				   &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed");
 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW,
 				   &pmc_stats.pm_log_sweeps, "# of ?");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW,
+				   &pmc_stats.pm_merges, "# of times kernel stack was found for user trace");
+SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW,
+				   &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged");
 
 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
@@ -1703,6 +1710,19 @@ pmc_process_thread_delete(struct thread *td)
 }
 
 /*
+ * A userret() call for a thread.
+ */
+static void
+pmc_process_thread_userret(struct thread *td)
+{
+
+	thread_lock(td);
+	curthread->td_flags |= TDF_ASTPENDING;
+	thread_unlock(td);
+	pmc_post_callchain_callback();
+}
+
+/*
  * A mapping change for a process.
  */
 
@@ -2030,6 +2050,7 @@ const char *pmc_hooknames[] = {
 	"SOFTSAMPLING",
 	"THR-CREATE",
 	"THR-EXIT",
+	"THR-USERRET",
 };
 #endif
 
@@ -2194,6 +2215,7 @@ pmc_hook_handler(struct thread *td, int function, void
 		cpu = PCPU_GET(cpuid);
 		pmc_process_samples(cpu, PMC_HR);
 		pmc_process_samples(cpu, PMC_SR);
+		pmc_process_samples(cpu, PMC_UR);
 		break;
 
 	case PMC_FN_MMAP:
@@ -2214,6 +2236,11 @@ pmc_hook_handler(struct thread *td, int function, void
 
 		pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR,
 		    (struct trapframe *) arg);
+
+		KASSERT(td->td_pinned == 1,
+			("[pmc,%d] invalid td_pinned value", __LINE__));
+		sched_unpin();  /* Can migrate safely now. */
+
 		td->td_pflags &= ~TDP_CALLCHAIN;
 		break;
 
@@ -2223,8 +2250,18 @@ pmc_hook_handler(struct thread *td, int function, void
 		 */
 		KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 		    __LINE__));
-		pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR,
+
+		cpu = PCPU_GET(cpuid);
+		pmc_capture_user_callchain(cpu, PMC_SR,
 		    (struct trapframe *) arg);
+		pmc_capture_user_callchain(cpu, PMC_UR,
+		    (struct trapframe *) arg);
+
+		KASSERT(td->td_pinned == 1,
+		    ("[pmc,%d] invalid td_pinned value", __LINE__));
+
+		sched_unpin();  /* Can migrate safely now. */
+
 		td->td_pflags &= ~TDP_CALLCHAIN;
 		break;
 
@@ -2245,6 +2282,12 @@ pmc_hook_handler(struct thread *td, int function, void
 		pmc_process_thread_delete(td);
 		break;
 
+	case PMC_FN_THR_USERRET:
+		KASSERT(td == curthread, ("[pmc,%d] td != curthread",
+		    __LINE__));
+		pmc_process_thread_userret(td);
+		break;
+
 	default:
 #ifdef	HWPMC_DEBUG
 		KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@@ -2658,7 +2701,9 @@ pmc_wait_for_pmc_idle(struct pmc *pm)
 	 * Loop (with a forced context switch) till the PMC's runcount
 	 * comes down to zero.
 	 */
+	pmclog_flush(pm->pm_owner);
 	while (counter_u64_fetch(pm->pm_runcount) > 0) {
+		pmclog_flush(pm->pm_owner);
 #ifdef HWPMC_DEBUG
 		maxloop--;
 		KASSERT(maxloop > 0,
@@ -3264,7 +3309,7 @@ pmc_class_to_classdep(enum pmc_class class)
 	return (NULL);
 }
 
-#ifdef	HWPMC_DEBUG
+#if defined(HWPMC_DEBUG) && defined(KTR)
 static const char *pmc_op_to_name[] = {
 #undef	__PMC_OP
 #define	__PMC_OP(N, D)	#N ,
@@ -3806,11 +3851,26 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 		 */
 
 		if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
-		    PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
+		    PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN |
+		    PMC_F_USERCALLCHAIN)) != 0) {
 			error = EINVAL;
 			break;
 		}
 
+		/* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */
+		if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) ==
+		    PMC_F_USERCALLCHAIN) {
+			error = EINVAL;
+			break;
+		}
+
+		/* PMC_F_USERCALLCHAIN is only valid for sampling mode */
+		if (pa.pm_flags & PMC_F_USERCALLCHAIN &&
+			mode != PMC_MODE_TS && mode != PMC_MODE_SS) {
+			error = EINVAL;
+			break;
+		}
+
 		/* process logging options are not allowed for system PMCs */
 		if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
 		    (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
@@ -4533,8 +4593,6 @@ pmc_post_callchain_callback(void)
 }
 
 /*
- * Interrupt processing.
- *
  * Find a free slot in the per-cpu array of samples and capture the
  * current callchain there.  If a sample was successfully added, a bit
  * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
@@ -4544,8 +4602,8 @@ pmc_post_callchain_callback(void)
  * use any of the locking primitives supplied by the OS.
  */
 
-int
-pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
+static int
+pmc_add_sample(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
     int inuserspace)
 {
 	int error, callchaindepth;
@@ -4561,7 +4619,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
 	psb = pmc_pcpu[cpu]->pc_sb[ring];
 
 	ps = psb->ps_write;
-	if (ps->ps_nsamples) {	/* in use, reader hasn't caught up */
+	if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+		counter_u64_add(ps->ps_pmc->pm_runcount, -1);
+		counter_u64_add(pmc_stats.pm_overwrites, 1);
+		ps->ps_nsamples = 0;
+	} else if (ps->ps_nsamples) {	/* in use, reader hasn't caught up */
 		pm->pm_pcpu_state[cpu].pps_stalled = 1;
 		counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
 		PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
@@ -4573,7 +4635,6 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
 		goto done;
 	}
 
-
 	/* Fill in entry. */
 	PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
 	    (void *) tf, inuserspace,
@@ -4619,7 +4680,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
 	}
 
 	ps->ps_nsamples = callchaindepth;	/* mark entry as in use */
-
+	if (ring == PMC_UR) {
+		ps->ps_nsamples_actual = callchaindepth;	/* mark entry as in use */
+		ps->ps_nsamples = PMC_SAMPLE_INUSE;
+	} else
+		ps->ps_nsamples = callchaindepth;	/* mark entry as in use */
 	/* increment write pointer, modulo ring buffer size */
 	ps++;
 	if (ps == psb->ps_fence)
@@ -4636,6 +4701,30 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *p
 }
 
 /*
+ * Interrupt processing.
+ *
+ * This function is meant to be called from an NMI handler.  It cannot
+ * use any of the locking primitives supplied by the OS.
+ */
+
+int
+pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
+    int inuserspace)
+{
+	struct thread *td;
+
+	td = curthread;
+	if ((pm->pm_flags & PMC_F_USERCALLCHAIN) &&
+		td && td->td_proc &&
+		(td->td_proc->p_flag & P_KPROC) == 0 &&
+		!inuserspace) {
+		atomic_add_int(&curthread->td_pmcpend, 1);
+		return (pmc_add_sample(cpu, PMC_UR, pm, tf, 0));
+	}
+	return (pmc_add_sample(cpu, ring, pm, tf, inuserspace));
+}
+
+/*
  * Capture a user call chain.  This function will be called from ast()
  * before control returns to userland and before the process gets
  * rescheduled.
@@ -4648,6 +4737,7 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
 	struct thread *td;
 	struct pmc_sample *ps, *ps_end;
 	struct pmc_samplebuffer *psb;
+	int nsamples, nrecords, pass;
 #ifdef	INVARIANTS
 	int ncallchains;
 	int nfree;
@@ -4664,6 +4754,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
 	ncallchains = 0;
 	nfree = 0;
 #endif
+	nrecords = INT_MAX;
+	pass = 0;
+ restart:
+	if (ring == PMC_UR)
+		nrecords = atomic_readandclear_32(&td->td_pmcpend);
 
 	/*
 	 * Iterate through all deferred callchain requests.
@@ -4675,6 +4770,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
 	ps_end = psb->ps_write;
 	do {
 #ifdef	INVARIANTS
+		if (ps->ps_nsamples == PMC_SAMPLE_FREE) {
+			nfree++;
+			goto next;
+		}
+
 		if ((ps->ps_pmc == NULL) ||
 		    (ps->ps_pmc->pm_state != PMC_STATE_RUNNING))
 			nfree++;
@@ -4697,37 +4797,91 @@ pmc_capture_user_callchain(int cpu, int ring, struct t
 		KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 		    ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 
+		if (ring == PMC_UR) {
+			nsamples = ps->ps_nsamples_actual;
+			counter_u64_add(pmc_stats.pm_merges, 1);
+		} else
+			nsamples = 0;
+
 		/*
 		 * Retrieve the callchain and mark the sample buffer
 		 * as 'processable' by the timer tick sweep code.
 		 */
-		ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
-		    pmc_callchaindepth, tf);
 
-#ifdef	INVARIANTS
+#ifdef INVARIANTS
 		ncallchains++;
 #endif
 
+		if (__predict_true(nsamples < pmc_callchaindepth - 1))
+			nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples,
+		       pmc_callchaindepth - nsamples - 1, tf);
+		wmb();
+		ps->ps_nsamples = nsamples;
+		if (nrecords-- == 1)
+			break;
 next:
 		/* increment the pointer, modulo sample ring size */
 		if (++ps == psb->ps_fence)
 			ps = psb->ps_samples;
 	} while (ps != ps_end);
+	if (__predict_false(ring == PMC_UR && td->td_pmcpend)) {
+		if (pass == 0) {
+			pass = 1;
+			goto restart;
+		}
+		/* only collect samples for this part once */
+		td->td_pmcpend = 0;
+	}
 
-#ifdef	INVARIANTS
-	KASSERT(ncallchains > 0 || nfree > 0,
-	    ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
-		cpu));
+#ifdef INVARIANTS
+	if (ring == PMC_HR)
+		KASSERT(ncallchains > 0 || nfree > 0,
+		    ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
+			    cpu));
 #endif
 
-	KASSERT(td->td_pinned == 1,
-	    ("[pmc,%d] invalid td_pinned value", __LINE__));
-	sched_unpin();	/* Can migrate safely now. */
-
 	/* mark CPU as needing processing */
 	DPCPU_SET(pmc_sampled, 1);
 }
 
+
+static void
+pmc_flush_ring(int cpu, int ring)
+{
+	struct pmc *pm;
+	struct pmc_sample *ps;
+	struct pmc_samplebuffer *psb;
+	int n;
+
+	psb = pmc_pcpu[cpu]->pc_sb[ring];
+
+	for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
+
+		ps = psb->ps_read;
+		if (ps->ps_nsamples == PMC_SAMPLE_FREE)
+			goto next;
+		pm = ps->ps_pmc;
+		counter_u64_add(pm->pm_runcount, -1);
+		ps->ps_nsamples = PMC_SAMPLE_FREE;
+		/* increment read pointer, modulo sample size */
+	next:
+		if (++ps == psb->ps_fence)
+			psb->ps_read = psb->ps_samples;
+		else
+			psb->ps_read = ps;
+	}
+}
+
+void
+pmc_flush_samples(int cpu)
+{
+	int n;
+
+	for (n = 0; n < PMC_NUM_SR; n++)
+		pmc_flush_ring(cpu, n);
+}
+
+
 /*
  * Process saved PC samples.
  */
@@ -4788,23 +4942,20 @@ pmc_process_samples(int cpu, int ring)
 		 * its owner, and if the PC is in user mode, update
 		 * profiling statistics like timer-based profiling
 		 * would have done.
+		 *
+		 * Otherwise, this is either a sampling-mode PMC that
+		 * is attached to a different process than its owner,
+		 * or a system-wide sampling PMC. Dispatch a log
+		 * entry to the PMC's owner process.
 		 */
 		if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
 			if (ps->ps_flags & PMC_CC_F_USERSPACE) {
 				td = FIRST_THREAD_IN_PROC(po->po_owner);
 				addupc_intr(td, ps->ps_pc[0], 1);
 			}
-			goto entrydone;
-		}
+		} else
+			pmclog_process_callchain(pm, ps);
 
-		/*
-		 * Otherwise, this is either a sampling mode PMC that
-		 * is attached to a different process than its owner,
-		 * or a system-wide sampling PMC.  Dispatch a log
-		 * entry to the PMC's owner process.
-		 */
-		pmclog_process_callchain(pm, ps);
-
 	entrydone:
 		ps->ps_nsamples = 0; /* mark entry as free */
 		counter_u64_add(pm->pm_runcount, -1);
@@ -5282,6 +5433,8 @@ pmc_initialize(void)
 	pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK);
 	pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK);
 	pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK);
+	pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK);
+	pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK);
 
 #ifdef	HWPMC_DEBUG
 	/* parse debug flags first */
@@ -5427,6 +5580,24 @@ pmc_initialize(void)
 			    (n * pmc_callchaindepth);
 
 		pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb;
+
+		sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
+			pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
+		    M_WAITOK|M_ZERO);
+		sb->ps_read = sb->ps_write = sb->ps_samples;
+		sb->ps_fence = sb->ps_samples + pmc_nsamples;
+
+		KASSERT(pmc_pcpu[cpu] != NULL,
+		    ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
+
+		sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples *
+		    sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO);
+
+		for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
+			ps->ps_pc = sb->ps_callchains +
+			    (n * pmc_callchaindepth);
+
+		pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb;
 	}
 
 	/* allocate space for the row disposition array */
@@ -5637,10 +5808,15 @@ pmc_cleanup(void)
 		KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL,
 		    ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__,
 			cpu));
+		KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL,
+		    ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__,
+			cpu));
 		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
 		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
 		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
 		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
+		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC);
+		free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC);
 		free_domain(pmc_pcpu[cpu], M_PMC);
 	}
 
@@ -5669,6 +5845,8 @@ pmc_cleanup(void)
 	counter_u64_free(pmc_stats.pm_buffer_requests);
 	counter_u64_free(pmc_stats.pm_buffer_requests_failed);
 	counter_u64_free(pmc_stats.pm_log_sweeps);
+	counter_u64_free(pmc_stats.pm_merges);
+	counter_u64_free(pmc_stats.pm_overwrites);
 	sx_xunlock(&pmc_sx); 	/* we are done */
 }
 

Modified: head/sys/kern/subr_trap.c
==============================================================================
--- head/sys/kern/subr_trap.c	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/kern/subr_trap.c	Mon Jun  4 01:10:23 2018	(r334595)
@@ -145,6 +145,11 @@ userret(struct thread *td, struct trapframe *frame)
 	 */
 	if (p->p_flag & P_PROFIL)
 		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
+
+#ifdef HWPMC_HOOKS
+	if (PMC_THREAD_HAS_SAMPLES(td))
+		PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL);
+#endif
 	/*
 	 * Let the scheduler adjust our priority etc.
 	 */

Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/sys/pmc.h	Mon Jun  4 01:10:23 2018	(r334595)
@@ -369,13 +369,14 @@ enum pmc_ops {
 #define	PMC_F_KGMON		0x00000040 /*OP ALLOCATE kgmon(8) profiling */
 /* V2 API */
 #define	PMC_F_CALLCHAIN		0x00000080 /*OP ALLOCATE capture callchains */
+#define	PMC_F_USERCALLCHAIN	0x00000100 /*OP ALLOCATE use userspace stack */
 
 /* internal flags */
 #define	PMC_F_ATTACHED_TO_OWNER	0x00010000 /*attached to owner*/
 #define	PMC_F_NEEDS_LOGFILE	0x00020000 /*needs log file */
 #define	PMC_F_ATTACH_DONE	0x00040000 /*attached at least once */
 
-#define	PMC_CALLCHAIN_DEPTH_MAX	128
+#define	PMC_CALLCHAIN_DEPTH_MAX	512
 
 #define	PMC_CC_F_USERSPACE	0x01	   /*userspace callchain*/
 
@@ -568,6 +569,8 @@ struct pmc_driverstats {
 	counter_u64_t	pm_buffer_requests_failed; /* #failed buffer requests */
 	counter_u64_t	pm_log_sweeps;		/* #sample buffer processing
 						   passes */
+	counter_u64_t	pm_merges;		/* merged k+u */
+	counter_u64_t	pm_overwrites;		/* UR overwrites */
 };
 #endif
 
@@ -643,11 +646,11 @@ struct pmc_op_getdyneventinfo {
 
 #define	PMC_HASH_SIZE				1024
 #define	PMC_MTXPOOL_SIZE			2048
-#define	PMC_LOG_BUFFER_SIZE			128
-#define	PMC_NLOGBUFFERS_PCPU		8
-#define	PMC_NSAMPLES				64
-#define	PMC_CALLCHAIN_DEPTH			32
-#define	PMC_THREADLIST_MAX			64
+#define	PMC_LOG_BUFFER_SIZE			256
+#define	PMC_NLOGBUFFERS_PCPU			32
+#define	PMC_NSAMPLES				256
+#define	PMC_CALLCHAIN_DEPTH			128
+#define	PMC_THREADLIST_MAX			128
 
 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
 
@@ -923,9 +926,9 @@ struct pmc_hw {
 
 struct pmc_sample {
 	uint16_t		ps_nsamples;	/* callchain depth */
+	uint16_t		ps_nsamples_actual;
 	uint16_t		ps_cpu;		/* cpu number */
 	uint16_t		ps_flags;	/* other flags */
-	uint8_t			ps_pad[2];
 	lwpid_t			ps_tid;		/* thread id */
 	pid_t			ps_pid;		/* process PID or -1 */
 	struct thread		*ps_td;		/* which thread */
@@ -954,7 +957,7 @@ struct pmc_samplebuffer {
 
 struct pmc_cpu {
 	uint32_t	pc_state;	/* physical cpu number + flags */
-	struct pmc_samplebuffer *pc_sb[2]; /* space for samples */
+	struct pmc_samplebuffer *pc_sb[3]; /* space for samples */
 	struct pmc_hw	*pc_hwpmcs[];	/* 'npmc' pointers */
 };
 
@@ -1203,7 +1206,7 @@ MALLOC_DECLARE(M_PMC);
 struct pmc_mdep *pmc_md_initialize(void);	/* MD init function */
 void	pmc_md_finalize(struct pmc_mdep *_md);	/* MD fini function */
 int	pmc_getrowdisp(int _ri);
-int	pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm,
+int	pmc_process_interrupt(int _cpu, int _ring, struct pmc *_pm,
     struct trapframe *_tf, int _inuserspace);
 int	pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
     struct trapframe *_tf);
@@ -1211,5 +1214,6 @@ int	pmc_save_user_callchain(uintptr_t *_cc, int _maxsa
     struct trapframe *_tf);
 struct pmc_mdep *pmc_mdep_alloc(int nclasses);
 void pmc_mdep_free(struct pmc_mdep *md);
+void pmc_flush_samples(int cpu);
 #endif /* _KERNEL */
 #endif /* _SYS_PMC_H_ */

Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/sys/pmckern.h	Mon Jun  4 01:10:23 2018	(r334595)
@@ -62,9 +62,12 @@
 #define	PMC_FN_SOFT_SAMPLING		11
 #define	PMC_FN_THR_CREATE		12
 #define	PMC_FN_THR_EXIT			13
+#define	PMC_FN_THR_USERRET		14
 
 #define	PMC_HR	0	/* Hardware ring buffer */
 #define	PMC_SR	1	/* Software ring buffer */
+#define	PMC_UR	2	/* userret ring buffer */
+#define PMC_NUM_SR (PMC_UR+1)
 
 struct pmckern_procexec {
 	int		pm_credentialschanged;
@@ -226,6 +229,9 @@ do {						\
 /* Check if a process is using HWPMCs.*/
 #define PMC_PROC_IS_USING_PMCS(p)				\
 	(__predict_false(p->p_flag & P_HWPMC))
+
+#define PMC_THREAD_HAS_SAMPLES(td)				\
+	(__predict_false((td)->td_pmcpend))
 
 /* Check if a thread have pending user capture. */
 #define PMC_IS_PENDING_CALLCHAIN(p)				\

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/sys/sys/proc.h	Mon Jun  4 01:10:23 2018	(r334595)
@@ -375,6 +375,7 @@ struct thread {
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
 	TAILQ_ENTRY(thread) td_epochq;	/* (t) Epoch queue. */
 	epoch_section_t td_epoch_section; /* (t) epoch section object */
+	int		td_pmcpend;
 };
 
 struct thread0_storage {

Modified: head/usr.sbin/pmcstat/pmcpl_callgraph.c
==============================================================================
--- head/usr.sbin/pmcstat/pmcpl_callgraph.c	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/usr.sbin/pmcstat/pmcpl_callgraph.c	Mon Jun  4 01:10:23 2018	(r334595)
@@ -345,7 +345,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pm
 
 	pc = cc[0];
 	pmcid = pmcr->pr_pmcid;
-	parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
+	child = parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
 	if (parent == NULL) {
 		pmcstat_stats.ps_callchain_dubious_frames++;
 		pmcr->pr_dubious_frames++;
@@ -384,7 +384,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pm
 			}
 		}
 		if (ppm == NULL)
-			return;
+			continue;
 
 		image = ppm->ppm_image;
 		loadaddress = ppm->ppm_lowpc + image->pi_vaddr -

Modified: head/usr.sbin/pmcstat/pmcstat.8
==============================================================================
--- head/usr.sbin/pmcstat/pmcstat.8	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/usr.sbin/pmcstat/pmcstat.8	Mon Jun  4 01:10:23 2018	(r334595)
@@ -47,7 +47,7 @@
 .Op Fl R Ar logfilename
 .Op Fl S Ar event-spec
 .Op Fl T
-.Op Fl U Ar event-spec
+.Op Fl U
 .Op Fl W
 .Op Fl a Ar pathname
 .Op Fl c Ar cpu-spec
@@ -226,8 +226,10 @@ can be used: 'c+a' switch to accumulative mode, 'c+d' 
 to delta mode, 'm' merge PMCs, 'n' change view, 'p' show next
 PMC, ' ' pause, 'q' quit. calltree only: 'f' cost under threshold
 is seen as a dot.
-.It Fl U Ar event-spec
-Provide long description of event.
+.It Fl U
+Toggle capturing user-space call traces while in kernel mode.
+The default is for sampling PMCs to capture user-space callchain information
+while in user-space mode, and kernel callchain information while in kernel mode.
 .It Fl W
 Toggle logging the incremental counts seen by the threads of a
 tracked process each time they are scheduled on a CPU.

Modified: head/usr.sbin/pmcstat/pmcstat.c
==============================================================================
--- head/usr.sbin/pmcstat/pmcstat.c	Sun Jun  3 23:49:02 2018	(r334594)
+++ head/usr.sbin/pmcstat/pmcstat.c	Mon Jun  4 01:10:23 2018	(r334595)
@@ -374,7 +374,7 @@ pmcstat_show_usage(void)
 	    "\t -R file\t read events from \"file\"\n"
 	    "\t -S spec\t allocate a system-wide sampling PMC\n"
 	    "\t -T\t\t start in top mode\n"
-	    "\t -U spec \t provide long description of counters matching spec\n"
+	    "\t -U \t\n merged user kernel stack capture\n"
 	    "\t -W\t\t (toggle) show counts per context switch\n"
 	    "\t -a file\t print sampled PCs and callgraph to \"file\"\n"
 	    "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n"
@@ -432,7 +432,8 @@ main(int argc, char **argv)
 	int option, npmc;
 	int c, check_driver_stats; 
 	int do_callchain, do_descendants, do_logproccsw, do_logprocexit;
-	int do_print, do_read, do_listcounters, do_descr, do_long_descr;
+	int do_print, do_read, do_listcounters, do_descr;
+	int do_userspace;
 	size_t len;
 	int graphdepth;
 	int pipefd[2], rfd;
@@ -455,7 +456,7 @@ main(int argc, char **argv)
 	do_callchain		= 1;
 	do_descr                = 0;
 	do_descendants          = 0;
-	do_long_descr           = 0;
+	do_userspace            = 0;
 	do_logproccsw           = 0;
 	do_logprocexit          = 0;
 	do_listcounters         = 0;
@@ -510,7 +511,7 @@ main(int argc, char **argv)
 	CPU_COPY(&rootmask, &cpumask);
 
 	while ((option = getopt(argc, argv,
-	    "CD:EF:G:ILM:NO:P:R:S:TU:WZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) != -1)
+	    "CD:EF:G:ILM:NO:P:R:S:TUWZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) != -1)
 		switch (option) {
 		case 'a':	/* Annotate + callgraph */
 			args.pa_flags |= FLAG_DO_ANNOTATE;
@@ -677,8 +678,11 @@ main(int argc, char **argv)
 				ev->ev_cpu = PMC_CPU_ANY;
 
 			ev->ev_flags = 0;
-			if (do_callchain)
+			if (do_callchain) {
 				ev->ev_flags |= PMC_F_CALLCHAIN;
+				if (do_userspace)
+					ev->ev_flags |= PMC_F_USERCALLCHAIN;
+			}
 			if (do_descendants)
 				ev->ev_flags |= PMC_F_DESCENDANTS;
 			if (do_logprocexit)
@@ -776,9 +780,9 @@ main(int argc, char **argv)
 			do_descr = 1;
 			event = optarg;
 			break;
-		case 'U':
-			do_long_descr = 1;
-			event = optarg;
+		case 'U':	/* toggle user-space callchain capture */
+			do_userspace = !do_userspace;
+			args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
 			break;
 		case 'v':	/* verbose */
 			args.pa_verbosity++;
@@ -816,17 +820,15 @@ main(int argc, char **argv)
 			break;
 
 		}
-	if ((do_listcounters | do_descr | do_long_descr) &&
+	if ((do_listcounters | do_descr) &&
 		pmc_pmu_enabled() == 0)
 			errx(EX_USAGE, "pmu features not supported on host or hwpmc not loaded");
 	if (do_listcounters) {
 		pmc_pmu_print_counters(NULL);
 	} else if (do_descr) {
 		pmc_pmu_print_counter_desc(event);
-	} else if (do_long_descr) {
-		pmc_pmu_print_counter_desc_long(event);
 	}
-	if (do_listcounters | do_descr | do_long_descr)
+	if (do_listcounters | do_descr)
 		exit(0);
 
 	args.pa_argc = (argc -= optind);


More information about the svn-src-head mailing list