git: eb52de783a1c - stable/13 - tests/sys/audit: Avoid race caused by starting auditd(8) for testing

Wed Mar 17 22:22:57 UTC 2021

The branch stable/13 has been updated by arichardson:

URL: https://cgit.FreeBSD.org/src/commit/?id=eb52de783a1c079b2ef4d674090322886cc22fc0

commit eb52de783a1c079b2ef4d674090322886cc22fc0
Author:     Alex Richardson <arichardson at FreeBSD.org>
AuthorDate: 2021-02-18 10:14:27 +0000
Commit:     Alex Richardson <arichardson at FreeBSD.org>
CommitDate: 2021-03-17 22:22:48 +0000

    tests/sys/audit: Avoid race caused by starting auditd(8) for testing
    
    In the CheriBSD CI we reproducibly see the first test in sys/audit
    (administrative:acct_failure) fail due to a missing startup message.
    It appears this is caused by a race condition when starting auditd:
    `service auditd onestart` returns as soon as the initial auditd() parent
    exits (after the daemon(3) call).
    We can avoid this problem by setting up the auditd infrastructure
    in-process: libauditd contains audit_quick_{start,stop}() functions that
    look like they are ideally suited to this task.
    This patch also avoids forking lots of shell processes for each of the 418
    tests by using `auditon(A_SENDTRIGGER, &trigger, sizeof(trigger))` to check
    for a running auditd(8) instead of using `service auditd onestatus`.
    
    With these two changes (and D28388 to fix the XFAIL'd test) I can now
    boot and run `cd /usr/tests/sys/audit && kyua test` without any failures
    in a single-core QEMU instance. Before there would always be at least one
    failed test.
    
    Besides making the tests more reliable in CI, a nice side-effect of this
    change is that it also significantly speeds up running them by avoiding
    lots of fork()/execve() caused by shell scripts:
    Running kyua test on an AArch64 QEMU took 315s before and now takes 68s,
    so it's roughly 3.5 times faster. This effect is even larger when running
    on a CHERI-RISC-V QEMU since emulating CHERI instructions on an x86 host
    is noticeably slower than emulating AArch64.
    
    Test Plan: aarch64+amd64 QEMU no longer fail.
    
    Reviewed By:    asomers
    Differential Revision: https://reviews.freebsd.org/D28451
    
    (cherry picked from commit df093aa9463b2121d8307fb91c4ba7cf17f4ea64)
---
 tests/sys/audit/Makefile         |  9 +++++
 tests/sys/audit/administrative.c | 10 ++++-
 tests/sys/audit/utils.c          | 86 +++++++++++++++++++++++++++++++++++-----
 3 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/tests/sys/audit/Makefile b/tests/sys/audit/Makefile
index 215740020eb5..4cacd86d009a 100644
--- a/tests/sys/audit/Makefile
+++ b/tests/sys/audit/Makefile
@@ -48,11 +48,20 @@ SRCS.miscellaneous+=		utils.c
 
 TEST_METADATA+= timeout="30"
 TEST_METADATA+= required_user="root"
+# Only one process can be auditing, if we attempt to run these tests in parallel
+# some of them will fail to start auditing.
+# TODO: it would be nice to be able to run them in parallel with other non-audit
+# tests using some internal form of synchronization.
+# TODO: In addititon to test failures, running in parallel can trigger a kernel
+# panic: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=253616
 TEST_METADATA+= is_exclusive="true"
 TEST_METADATA+=	required_files="/etc/rc.d/auditd /dev/auditpipe"
 
 MK_PIE:=	no	# XXX libprivateauditd.a is not PIE
 LDFLAGS+=	-lbsm -lutil
+OPENBSMDIR=${SRCTOP}/contrib/openbsm
+CFLAGS+=	-I${OPENBSMDIR}
+LDADD+=	${LIBAUDITD}
 
 CFLAGS.process-control.c+=	-I${SRCTOP}/tests
 
diff --git a/tests/sys/audit/administrative.c b/tests/sys/audit/administrative.c
index 4ec73f4710e0..d75f6147cdf4 100644
--- a/tests/sys/audit/administrative.c
+++ b/tests/sys/audit/administrative.c
@@ -341,10 +341,16 @@ ATF_TC_CLEANUP(auditctl_success, tc)
 	 * at the configured path. To reset this, we need to stop and start the
 	 * auditd(8) again. Here, we check if auditd(8) was running already
 	 * before the test started. If so, we stop and start it again.
+	 *
+	 * TODO: should we skip this test if auditd(8) is already running to
+	 * avoid restarting it?
 	 */
-	system("service auditd onestop > /dev/null 2>&1");
-	if (!atf_utils_file_exists("started_auditd"))
+	if (!atf_utils_file_exists("started_fake_auditd")) {
+		system("service auditd onestop > /dev/null 2>&1");
 		system("service auditd onestart > /dev/null 2>&1");
+	} else {
+		cleanup();
+	}
 }
 
 
diff --git a/tests/sys/audit/utils.c b/tests/sys/audit/utils.c
index be31f4138412..e94279ff93bf 100644
--- a/tests/sys/audit/utils.c
+++ b/tests/sys/audit/utils.c
@@ -30,6 +30,7 @@
 #include <sys/ioctl.h>
 
 #include <bsm/libbsm.h>
+#include <bsm/auditd_lib.h>
 #include <security/audit/audit_ioctl.h>
 
 #include <atf-c.h>
@@ -222,13 +223,44 @@ skip_if_extattr_not_supported(const char *path)
 	}
 }
 
-FILE
-*setup(struct pollfd fd[], const char *name)
+static bool
+is_auditd_running(void)
+{
+	int trigger;
+	int err;
+
+	/*
+	 * AUDIT_TRIGGER_INITIALIZE is a no-op message on FreeBSD and can
+	 * therefore be used to check whether auditd has already been started.
+	 * This is significantly cheaper than running `service auditd onestatus`
+	 * for each test case. It is also slightly less racy since it will only
+	 * return true once auditd() has opened the trigger file rather than
+	 * just when the pidfile has been created.
+	 */
+	trigger = AUDIT_TRIGGER_INITIALIZE;
+	err = auditon(A_SENDTRIGGER, &trigger, sizeof(trigger));
+	if (err == 0) {
+		fprintf(stderr, "auditd(8) is running.\n");
+		return (true);
+	} else {
+		/*
+		 * A_SENDTRIGGER returns ENODEV if auditd isn't listening,
+		 * all other error codes indicate a fatal error.
+		 */
+		ATF_REQUIRE_MSG(errno == ENODEV,
+		    "Unexpected error from auditon(2): %s", strerror(errno));
+		return (false);
+	}
+
+}
+
+FILE *
+setup(struct pollfd fd[], const char *name)
 {
 	au_mask_t fmask, nomask;
+	FILE *pipestream;
 	fmask = get_audit_mask(name);
 	nomask = get_audit_mask("no");
-	FILE *pipestream;
 
 	ATF_REQUIRE((fd[0].fd = open("/dev/auditpipe", O_RDONLY)) != -1);
 	ATF_REQUIRE((pipestream = fdopen(fd[0].fd, "r")) != NULL);
@@ -244,12 +276,39 @@ FILE
 
 	/* Set local preselection audit_class as "no" for audit startup */
 	set_preselect_mode(fd[0].fd, &nomask);
-	ATF_REQUIRE_EQ(0, system("service auditd onestatus || \
-	{ service auditd onestart && touch started_auditd ; }"));
-
-	/* If 'started_auditd' exists, that means we started auditd(8) */
-	if (atf_utils_file_exists("started_auditd"))
+	if (!is_auditd_running()) {
+		fprintf(stderr, "Running audit_quick_start() for testing... ");
+		/*
+		 * Previously, this test started auditd using
+		 * `service auditd onestart`. However, there is a race condition
+		 * there since service can return before auditd(8) has
+		 * fully started (once the daemon parent process has forked)
+		 * and this can cause check_audit_startup() to fail sometimes.
+		 *
+		 * In the CheriBSD CI this caused the first test executed by
+		 * kyua (administrative:acct_failure) to fail every time, but
+		 * subsequent ones would almost always succeed.
+		 *
+		 * To avoid this problem (and as a nice side-effect this speeds
+		 * up the test quite a bit), we register this process as a
+		 * "fake" auditd(8) using the audit_quick_start() function from
+		 * libauditd.
+		 */
+		atf_utils_create_file("started_fake_auditd", "yes\n");
+		ATF_REQUIRE(atf_utils_file_exists("started_fake_auditd"));
+		ATF_REQUIRE_EQ_MSG(0, audit_quick_start(),
+		    "Failed to start fake auditd: %m");
+		fprintf(stderr, "done.\n");
+		/* audit_quick_start() should log an audit start event. */
 		check_audit_startup(fd, "audit startup", pipestream);
+		/*
+		 * If we exit cleanly shutdown audit_quick_start(), if not
+		 * cleanup() will take care of it.
+		 * This is not required, but makes it easier to run individual
+		 * tests outside of kyua.
+		 */
+		atexit(cleanup);
+	}
 
 	/* Set local preselection parameters specific to "name" audit_class */
 	set_preselect_mode(fd[0].fd, &fmask);
@@ -259,6 +318,13 @@ FILE
 void
 cleanup(void)
 {
-	if (atf_utils_file_exists("started_auditd"))
-		system("service auditd onestop > /dev/null 2>&1");
+	if (atf_utils_file_exists("started_fake_auditd")) {
+		fprintf(stderr, "Running audit_quick_stop()... ");
+		if (audit_quick_stop() != 0) {
+			fprintf(stderr, "Failed to stop fake auditd: %m\n");
+			abort();
+		}
+		fprintf(stderr, "done.\n");
+		unlink("started_fake_auditd");
+	}
 }