git: 46dd801acb23 - main - Add userland boot profiling to TSLOG

From: Colin Percival <cperciva_at_FreeBSD.org>
Date: Sat, 16 Oct 2021 18:55:36 UTC
The branch main has been updated by cperciva:

URL: https://cgit.FreeBSD.org/src/commit/?id=46dd801acb2318c8464b185893bb111f09b71138

commit 46dd801acb2318c8464b185893bb111f09b71138
Author:     Colin Percival <cperciva@FreeBSD.org>
AuthorDate: 2021-10-16 18:47:34 +0000
Commit:     Colin Percival <cperciva@FreeBSD.org>
CommitDate: 2021-10-16 18:47:34 +0000

    Add userland boot profiling to TSLOG
    
    On kernels compiled with 'options TSLOG', record for each process ID:
    * The timestamp of the fork() which creates it and the parent
    process ID,
    * The first path passed to execve(), if any,
    * The first path resolved by namei, if any, and
    * The timestamp of the exit() which terminates the process.
    
    Expose this information via a new sysctl, debug.tslog_user.
    
    On kernels lacking 'options TSLOG' (the default), no information is
    recorded and the sysctl does not exist.
    
    Note that recording namei is needed in order to obtain the names of
    rc.d scripts being launched, as the rc system sources them in a
    subshell rather than execing the scripts.
    
    With this commit it is now possible to generate flamecharts of the
    entire boot process from the start of the loader to the end of
    /etc/rc.  The code needed to perform this processing is currently
    found in github: https://github.com/cperciva/freebsd-boot-profiling
    
    Reviewed by:    mhorne
    Sponsored by:   https://www.patreon.com/cperciva
    Differential Revision: https://reviews.freebsd.org/D32493
---
 sys/kern/kern_exec.c  |  1 +
 sys/kern/kern_exit.c  |  1 +
 sys/kern/kern_fork.c  |  1 +
 sys/kern/kern_tslog.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/kern/vfs_lookup.c |  1 +
 sys/sys/tslog.h       |  7 +++++
 6 files changed, 96 insertions(+)

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index a9998471dd14..a8f07bf39564 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -381,6 +381,7 @@ kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 
+	TSEXEC(td->td_proc->p_pid, args->begin_argv);
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    exec_args_get_begin_envv(args) - args->begin_argv);
 	AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc,
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 1b2cf63c9b82..79f1d5bd63e5 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -228,6 +228,7 @@ exit1(struct thread *td, int rval, int signo)
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
+	TSPROCEXIT(td->td_proc->p_pid);
 
 	p = td->td_proc;
 	/*
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 606801283a1f..e8c34a31e6a1 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -392,6 +392,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	p2->p_state = PRS_NEW;		/* protect against others */
 	p2->p_pid = fork_findpid(fr->fr_flags);
 	AUDIT_ARG_PID(p2->p_pid);
+	TSFORK(p2->p_pid, p1->p_pid);
 
 	sx_xlock(&allproc_lock);
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c
index dd3b25158340..38c91911ec08 100644
--- a/sys/kern/kern_tslog.c
+++ b/sys/kern/kern_tslog.c
@@ -30,6 +30,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
@@ -131,3 +133,86 @@ sysctl_debug_tslog(SYSCTL_HANDLER_ARGS)
 
 SYSCTL_PROC(_debug, OID_AUTO, tslog, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0, 0, sysctl_debug_tslog, "", "Dump recorded event timestamps");
+
+MALLOC_DEFINE(M_TSLOGUSER, "tsloguser", "Strings used by userland tslog");
+static struct procdata {
+	pid_t ppid;
+	uint64_t tsc_forked;
+	uint64_t tsc_exited;
+	char * execname;
+	char * namei;
+	int reused;
+} procs[PID_MAX + 1];
+
+void
+tslog_user(pid_t pid, pid_t ppid, const char * execname, const char * namei)
+{
+	uint64_t tsc = get_cyclecount();
+
+	/* If we wrapped, do nothing. */
+	if (procs[pid].reused)
+		return;
+
+	/* If we have a ppid, we're recording a fork. */
+	if (ppid != (pid_t)(-1)) {
+		/* If we have a ppid already, we wrapped. */
+		if (procs[pid].ppid) {
+			procs[pid].reused = 1;
+			return;
+		}
+
+		/* Fill in some fields. */
+		procs[pid].ppid = ppid;
+		procs[pid].tsc_forked = tsc;
+		return;
+	}
+
+	/* If we have an execname, record it. */
+	if (execname != NULL) {
+		if (procs[pid].execname == NULL)
+			procs[pid].execname = strdup(execname, M_TSLOGUSER);
+		return;
+	}
+
+	/* Record the first namei for the process. */
+	if (namei != NULL) {
+		if (procs[pid].namei == NULL)
+			procs[pid].namei = strdup(namei, M_TSLOGUSER);
+		return;
+	}
+
+	/* Otherwise we're recording an exit. */
+	procs[pid].tsc_exited = tsc;
+}
+
+static int
+sysctl_debug_tslog_user(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	struct sbuf *sb;
+	pid_t pid;
+
+	sb = sbuf_new_for_sysctl(NULL, NULL, 1024, req);
+
+	/* Export the data we logged. */
+	for (pid = 0; pid <= PID_MAX; pid++) {
+		sbuf_printf(sb, "%zu", (size_t)pid);
+		sbuf_printf(sb, " %zu", (size_t)procs[pid].ppid);
+		sbuf_printf(sb, " %llu",
+		    (unsigned long long)procs[pid].tsc_forked);
+		sbuf_printf(sb, " %llu",
+		    (unsigned long long)procs[pid].tsc_exited);
+		sbuf_printf(sb, " \"%s\"", procs[pid].execname ?
+		    procs[pid].execname : "");
+		sbuf_printf(sb, " \"%s\"", procs[pid].namei ?
+		    procs[pid].namei : "");
+		sbuf_printf(sb, "\n");
+	}
+	error = sbuf_finish(sb);
+	sbuf_delete(sb);
+	return (error);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, tslog_user,
+    CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_debug_tslog_user,
+    "", "Dump recorded userland event timestamps");
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index c1cefae543da..9e10c3092f5a 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -611,6 +611,7 @@ namei(struct nameidata *ndp)
 		ktrnamei(cnp->cn_pnbuf);
 	}
 #endif
+	TSNAMEI(curthread->td_proc->p_pid, cnp->cn_pnbuf);
 
 	/*
 	 * First try looking up the target without locking any vnodes.
diff --git a/sys/sys/tslog.h b/sys/sys/tslog.h
index 4b2971e4e643..af3c0931cac8 100644
--- a/sys/sys/tslog.h
+++ b/sys/sys/tslog.h
@@ -51,12 +51,19 @@
 #define TSUNWAIT(x) TSEVENT2("UNWAIT", x);
 #define TSHOLD(x) TSEVENT2("HOLD", x);
 #define TSRELEASE(x) TSEVENT2("RELEASE", x);
+#define TSFORK(p, pp) TSRAW_USER(p, pp, NULL, NULL)
+#define TSEXEC(p, name) TSRAW_USER(p, (pid_t)(-1), name, NULL)
+#define TSNAMEI(p, name) TSRAW_USER(p, (pid_t)(-1), NULL, name)
+#define TSPROCEXIT(p) TSRAW_USER(p, (pid_t)(-1), NULL, NULL)
 
 #ifdef TSLOG
 #define TSRAW(a, b, c, d) tslog(a, b, c, d)
 void tslog(void *, int, const char *, const char *);
+#define TSRAW_USER(a, b, c, d) tslog_user(a, b, c, d)
+void tslog_user(pid_t, pid_t, const char *, const char *);
 #else
 #define TSRAW(a, b, c, d)		/* Timestamp logging disabled */
+#define TSRAW_USER(a, b, c, d)		/* Timestamp logging disabled */
 #endif
 
 #endif /* _TSLOG_H_ */