git: 87aaf3766643 - releng/15.1 - jaildesc: Make sure to drain selinfo sleepers in jaildesc_close()

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 20 May 2026 19:38:51 UTC
The branch releng/15.1 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=87aaf37666434b88d66e15a244d43fee5f9eaee3

commit 87aaf37666434b88d66e15a244d43fee5f9eaee3
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2026-05-10 15:15:45 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-05-20 13:51:59 +0000

    jaildesc: Make sure to drain selinfo sleepers in jaildesc_close()
    
    Otherwise they may be left on a freed selinfo list after the
    corresponding jaildesc struct is freed.  This can be exploited to
    elevate privileges.
    
    Remove the JDF_SELECTED micro-optimization.  doselwakeup() is a no-op if
    no one ever called selrecord() on the file description, so I see no
    reason to complicate the code to avoid the call.
    
    Add some regression tests.
    
    Approved by:    re
    Approved by:    so
    Security:       FreeBSD-SA-26:19.file
    Security:       CVE-2026-45251
    Fixes:          66d8ffe3046d ("jaildesc: add kevent support")
    Reviewed by:    kib, jamie
    Differential Revision:  https://reviews.freebsd.org/D56945
---
 sys/kern/kern_jaildesc.c  |  10 +--
 sys/sys/jaildesc.h        |   1 -
 tests/sys/kern/Makefile   |   2 +
 tests/sys/kern/jaildesc.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 206 insertions(+), 8 deletions(-)

diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
index f4e31801201f..d6a7649ec484 100644
--- a/sys/kern/kern_jaildesc.c
+++ b/sys/kern/kern_jaildesc.c
@@ -197,10 +197,7 @@ jaildesc_knote(struct prison *pr, long hint)
 			JAILDESC_LOCK(jd);
 			if (hint == NOTE_JAIL_REMOVE) {
 				jd->jd_flags |= JDF_REMOVED;
-				if (jd->jd_flags & JDF_SELECTED) {
-					jd->jd_flags &= ~JDF_SELECTED;
-					selwakeup(&jd->jd_selinfo);
-				}
+				selwakeup(&jd->jd_selinfo);
 			}
 			KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
 			JAILDESC_UNLOCK(jd);
@@ -257,6 +254,7 @@ jaildesc_close(struct file *fp, struct thread *td)
 			}
 			prison_free(pr);
 		}
+		seldrain(&jd->jd_selinfo);
 		knlist_destroy(&jd->jd_selinfo.si_note);
 		JAILDESC_LOCK_DESTROY(jd);
 		free(jd, M_JAILDESC);
@@ -276,10 +274,8 @@ jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
 	JAILDESC_LOCK(jd);
 	if (jd->jd_flags & JDF_REMOVED)
 		revents |= POLLHUP;
-	if (revents == 0) {
+	else
 		selrecord(td, &jd->jd_selinfo);
-		jd->jd_flags |= JDF_SELECTED;
-	}
 	JAILDESC_UNLOCK(jd);
 	return (revents);
 }
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
index fda270d62e70..77c017f7e14d 100644
--- a/sys/sys/jaildesc.h
+++ b/sys/sys/jaildesc.h
@@ -71,7 +71,6 @@ struct jaildesc {
 /*
  * Flags for the jd_flags field
  */
-#define	JDF_SELECTED	0x00000001	/* issue selwakeup() */
 #define	JDF_REMOVED	0x00000002	/* jail was removed */
 #define	JDF_OWNING	0x00000004	/* closing descriptor removes jail */
 
diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile
index 4e306148d544..a704581ee449 100644
--- a/tests/sys/kern/Makefile
+++ b/tests/sys/kern/Makefile
@@ -22,6 +22,7 @@ ATF_TESTS_C+=	exterr_test
 ATF_TESTS_C+=	fdgrowtable_test
 ATF_TESTS_C+=	getdirentries_test
 ATF_TESTS_C+=	jail_lookup_root
+ATF_TESTS_C+=	jaildesc
 ATF_TESTS_C+=	inotify_test
 ATF_TESTS_C+=	kill_zombie
 .if ${MK_OPENSSL} != "no"
@@ -89,6 +90,7 @@ PROGS+=		sendfile_helper
 
 LIBADD.copy_file_range+=		md
 LIBADD.jail_lookup_root+=		jail util
+LIBADD.jaildesc+=			pthread
 CFLAGS.sys_getrandom+=			-I${SRCTOP}/sys/contrib/zstd/lib
 LIBADD.sys_getrandom+=			zstd
 LIBADD.sys_getrandom+=			c
diff --git a/tests/sys/kern/jaildesc.c b/tests/sys/kern/jaildesc.c
new file mode 100644
index 000000000000..11d751554887
--- /dev/null
+++ b/tests/sys/kern/jaildesc.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2026 Mark Johnston <markj@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/uio.h>
+
+#include <atf-c.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <pwd.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Create a persistent jail and return an owning descriptor for it.
+ * The jail is removed when the returned descriptor is closed.
+ */
+static int
+create_jail(const char *name)
+{
+	struct iovec iov[8];
+	int desc, jid, n;
+
+	desc = -1;
+	n = 0;
+	iov[n].iov_base = __DECONST(void *, "name");
+	iov[n++].iov_len = strlen("name") + 1;
+	iov[n].iov_base = __DECONST(void *, name);
+	iov[n++].iov_len = strlen(name) + 1;
+	iov[n].iov_base = __DECONST(void *, "path");
+	iov[n++].iov_len = strlen("path") + 1;
+	iov[n].iov_base = __DECONST(void *, "/");
+	iov[n++].iov_len = strlen("/") + 1;
+	iov[n].iov_base = __DECONST(void *, "persist");
+	iov[n++].iov_len = strlen("persist") + 1;
+	iov[n].iov_base = NULL;
+	iov[n++].iov_len = 0;
+	iov[n].iov_base = __DECONST(void *, "desc");
+	iov[n++].iov_len = strlen("desc") + 1;
+	iov[n].iov_base = &desc;
+	iov[n++].iov_len = sizeof(desc);
+	jid = jail_set(iov, n, JAIL_CREATE | JAIL_OWN_DESC);
+	ATF_REQUIRE_MSG(jid >= 0, "jail_set: %s", strerror(errno));
+	return (desc);
+}
+
+static void *
+poll_jaildesc(void *arg)
+{
+	struct pollfd pfd;
+
+	pfd.fd = *(int *)arg;
+	pfd.events = POLLHUP;
+	(void)poll(&pfd, 1, 5000);
+	return ((void *)(uintptr_t)pfd.revents);
+}
+
+/*
+ * Regression test for the case where a jail descriptor is closed while a
+ * thread is blocking in poll(2) on it.
+ */
+ATF_TC(poll_close_race);
+ATF_TC_HEAD(poll_close_race, tc)
+{
+	atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(poll_close_race, tc)
+{
+	pthread_t thr;
+	uintptr_t revents;
+	int error, jd;
+
+	jd = create_jail("jaildesc_poll_close_race");
+
+	error = pthread_create(&thr, NULL, poll_jaildesc, &jd);
+	ATF_REQUIRE_MSG(error == 0, "pthread_create: %s", strerror(error));
+
+	/* Wait for the thread to block in poll(2). */
+	usleep(250000);
+
+	ATF_REQUIRE_MSG(close(jd) == 0, "close: %s", strerror(errno));
+
+	error = pthread_join(thr, (void *)&revents);
+	ATF_REQUIRE_MSG(error == 0, "pthread_join: %s", strerror(error));
+	ATF_REQUIRE_EQ(revents, POLLNVAL);
+}
+
+/*
+ * Verify that poll(2) of a jail descriptor returns POLLHUP when the jail
+ * is removed.
+ */
+ATF_TC(poll_remove_wakeup);
+ATF_TC_HEAD(poll_remove_wakeup, tc)
+{
+	atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(poll_remove_wakeup, tc)
+{
+	pthread_t thr;
+	uintptr_t revents;
+	int error, jd;
+
+	jd = create_jail("jaildesc_poll_remove_wakeup");
+
+	error = pthread_create(&thr, NULL, poll_jaildesc, &jd);
+	ATF_REQUIRE_MSG(error == 0, "pthread_create: %s", strerror(error));
+
+	/* Wait for the thread to block in poll(2). */
+	usleep(250000);
+
+	ATF_REQUIRE_MSG(jail_remove_jd(jd) == 0,
+	    "jail_remove_jd: %s", strerror(errno));
+
+	error = pthread_join(thr, (void *)&revents);
+	ATF_REQUIRE_MSG(error == 0, "pthread_join: %s", strerror(error));
+	ATF_REQUIRE_EQ(revents, POLLHUP);
+
+	ATF_REQUIRE_MSG(close(jd) == 0, "close: %s", strerror(errno));
+}
+
+static int
+get_jaildesc(const char *name)
+{
+	struct iovec iov[4];
+	char namebuf[MAXHOSTNAMELEN];
+	int desc, jid, n;
+
+	strlcpy(namebuf, name, sizeof(namebuf));
+	desc = -1;
+	n = 0;
+	iov[n].iov_base = __DECONST(void *, "name");
+	iov[n++].iov_len = strlen("name") + 1;
+	iov[n].iov_base = namebuf;
+	iov[n++].iov_len = sizeof(namebuf);
+	iov[n].iov_base = __DECONST(void *, "desc");
+	iov[n++].iov_len = strlen("desc") + 1;
+	iov[n].iov_base = &desc;
+	iov[n++].iov_len = sizeof(desc);
+	jid = jail_get(iov, n, JAIL_GET_DESC);
+	ATF_REQUIRE_MSG(jid >= 0, "jail_get: %s", strerror(errno));
+	return (desc);
+}
+
+/*
+ * Regression test for the same use-after-free as poll_close_race, but with a
+ * non-owning JAIL_GET_DESC descriptor obtained without root privileges.
+ */
+ATF_TC(poll_close_race_get_desc);
+ATF_TC_HEAD(poll_close_race_get_desc, tc)
+{
+	atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(poll_close_race_get_desc, tc)
+{
+	struct passwd *pw;
+	pthread_t thr;
+	uintptr_t revents;
+	int error, jd, owning_jd;
+
+	/* Create the jail as root; keep the owning descriptor for cleanup. */
+	owning_jd = create_jail("jaildesc_poll_close_get_desc");
+
+	/*
+	 * Drop root privileges.  jail_get(2) with JAIL_GET_DESC does not
+	 * require PRIV_JAIL_REMOVE, so a non-root process in the host prison
+	 * can obtain a read-only descriptor for any visible jail.
+	 */
+	pw = getpwnam("nobody");
+	ATF_REQUIRE_MSG(pw != NULL, "getpwnam: %s", strerror(errno));
+	ATF_REQUIRE_MSG(setuid(pw->pw_uid) == 0, "setuid: %s", strerror(errno));
+
+	jd = get_jaildesc("jaildesc_poll_close_get_desc");
+
+	error = pthread_create(&thr, NULL, poll_jaildesc, &jd);
+	ATF_REQUIRE_MSG(error == 0, "pthread_create: %s", strerror(error));
+
+	/* Wait for the thread to block in poll(2). */
+	usleep(250000);
+
+	ATF_REQUIRE_MSG(close(jd) == 0, "close: %s", strerror(errno));
+
+	error = pthread_join(thr, (void *)&revents);
+	ATF_REQUIRE_MSG(error == 0, "pthread_join: %s", strerror(error));
+	ATF_REQUIRE_EQ(revents, POLLNVAL);
+
+	ATF_REQUIRE_MSG(close(owning_jd) == 0, "close: %s", strerror(errno));
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, poll_close_race);
+	ATF_TP_ADD_TC(tp, poll_remove_wakeup);
+	ATF_TP_ADD_TC(tp, poll_close_race_get_desc);
+
+	return (atf_no_error());
+}