git: e22f3f55c360 - releng/15.0 - libcasper: switch from select(2) to poll(2)

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Wed, 20 May 2026 19:39:35 UTC
The branch releng/15.0 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=e22f3f55c360085e7b00cc00e7c8ac623802714e

commit e22f3f55c360085e7b00cc00e7c8ac623802714e
Author:     Mariusz Zaborski <oshogbo@FreeBSD.org>
AuthorDate: 2026-05-18 15:32:49 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-05-19 23:52:31 +0000

    libcasper: switch from select(2) to poll(2)
    
    The previous implementation used FD_SET() on a stack-allocated fd_set,
    which is an out-of-bounds write whenever the socket fd is >= FD_SETSIZE
    (1024).
    
    poll(2) takes an array indexed by slot rather than by fd value, so it
    has no FD_SETSIZE limit.
    
    Approved by:    so
    Security:       FreeBSD-SA-26:22.libcasper
    Security:       CVE-2026-39461
    Reported by:    Joshua Rogers
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D56695
---
 lib/libcasper/libcasper/libcasper_impl.h    |   6 +-
 lib/libcasper/libcasper/libcasper_service.c |  57 +--------
 lib/libcasper/libcasper/service.c           | 187 ++++++++++++++++------------
 lib/libcasper/tests/Makefile                |  11 +-
 lib/libcasper/tests/cap_main_test.c         | 142 +++++++++++++++++++++
 5 files changed, 262 insertions(+), 141 deletions(-)

diff --git a/lib/libcasper/libcasper/libcasper_impl.h b/lib/libcasper/libcasper/libcasper_impl.h
index 5f0aacf2afa8..43d2495cacf6 100644
--- a/lib/libcasper/libcasper/libcasper_impl.h
+++ b/lib/libcasper/libcasper/libcasper_impl.h
@@ -54,6 +54,8 @@ void		 service_message(struct service *service,
 void		 service_start(struct service *service, int sock, int procfd);
 const char	*service_name(struct service *service);
 int		 service_get_channel_flags(struct service *service);
+bool		 service_have_connections(void);
+bool		 service_poll_dispatch(void);
 
 /* Private service connection functions. */
 struct service_connection	*service_connection_add(struct service *service,
@@ -64,10 +66,6 @@ void				 service_connection_remove(
 int				 service_connection_clone(
 				    struct service *service,
 				    struct service_connection *sconn);
-struct service_connection	*service_connection_first(
-				    struct service *service);
-struct service_connection	*service_connection_next(
-				    struct service_connection *sconn);
 cap_channel_t			*service_connection_get_chan(
 				    const struct service_connection *sconn);
 int				 service_connection_get_sock(
diff --git a/lib/libcasper/libcasper/libcasper_service.c b/lib/libcasper/libcasper/libcasper_service.c
index df58f48d78eb..b951de58d229 100644
--- a/lib/libcasper/libcasper/libcasper_service.c
+++ b/lib/libcasper/libcasper/libcasper_service.c
@@ -222,10 +222,6 @@ service_register_core(int fd)
 void
 casper_main_loop(int fd)
 {
-	fd_set fds;
-	struct casper_service *casserv;
-	struct service_connection *sconn, *sconntmp;
-	int sock, maxfd, ret;
 
 	if (zygote_init() < 0)
 		_exit(1);
@@ -235,55 +231,10 @@ casper_main_loop(int fd)
 	 */
 	service_register_core(fd);
 
-	for (;;) {
-		FD_ZERO(&fds);
-		FD_SET(fd, &fds);
-		maxfd = -1;
-		TAILQ_FOREACH(casserv, &casper_services, cs_next) {
-			/* We handle only core services. */
-			if (!CSERVICE_IS_CORE(casserv))
-				continue;
-			for (sconn = service_connection_first(casserv->cs_service);
-			    sconn != NULL;
-			    sconn = service_connection_next(sconn)) {
-				sock = service_connection_get_sock(sconn);
-				FD_SET(sock, &fds);
-				maxfd = sock > maxfd ? sock : maxfd;
-			}
-		}
-		if (maxfd == -1) {
-			/* Nothing to do. */
-			_exit(0);
-		}
-		maxfd++;
-
-
-		assert(maxfd <= (int)FD_SETSIZE);
-		ret = select(maxfd, &fds, NULL, NULL, NULL);
-		assert(ret == -1 || ret > 0);	/* select() cannot timeout */
-		if (ret == -1) {
-			if (errno == EINTR)
-				continue;
+	while (service_have_connections()) {
+		if (!service_poll_dispatch())
 			_exit(1);
-		}
-
-		TAILQ_FOREACH(casserv, &casper_services, cs_next) {
-			/* We handle only core services. */
-			if (!CSERVICE_IS_CORE(casserv))
-				continue;
-			for (sconn = service_connection_first(casserv->cs_service);
-			    sconn != NULL; sconn = sconntmp) {
-				/*
-				 * Prepare for connection to be removed from
-				 * the list on failure.
-				 */
-				sconntmp = service_connection_next(sconn);
-				sock = service_connection_get_sock(sconn);
-				if (FD_ISSET(sock, &fds)) {
-					service_message(casserv->cs_service,
-					    sconn);
-				}
-			}
-		}
 	}
+
+	_exit(0);
 }
diff --git a/lib/libcasper/libcasper/service.c b/lib/libcasper/libcasper/service.c
index 70418db50085..72de10477480 100644
--- a/lib/libcasper/libcasper/service.c
+++ b/lib/libcasper/libcasper/service.c
@@ -30,7 +30,7 @@
  * SUCH DAMAGE.
  */
 
-#include <sys/types.h>
+#include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/nv.h>
@@ -41,6 +41,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <paths.h>
+#include <poll.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -71,7 +72,8 @@ struct service_connection {
 	int		 sc_magic;
 	cap_channel_t	*sc_chan;
 	nvlist_t	*sc_limits;
-	TAILQ_ENTRY(service_connection) sc_next;
+	struct service	*sc_service;
+	size_t		 sc_pollidx;
 };
 
 #define	SERVICE_MAGIC	0x5e91ce
@@ -81,9 +83,90 @@ struct service {
 	uint64_t		 s_flags;
 	service_limit_func_t	*s_limit;
 	service_command_func_t	*s_command;
-	TAILQ_HEAD(, service_connection) s_connections;
 };
 
+#define	POLLSET_CHUNK	8
+static struct pollfd		*pollset_pfds;
+static struct service_connection **pollset_conns;
+static size_t			 pollset_cap;
+static size_t			 pollset_size;
+
+static int
+pollset_add(struct service_connection *sconn, int sock)
+{
+	size_t i, newcap;
+	void *p;
+
+	for (i = 0; i < pollset_size; i++) {
+		if (pollset_pfds[i].fd < 0)
+			break;
+	}
+	if (i == pollset_size) {
+		newcap = roundup2(pollset_size + 1, POLLSET_CHUNK);
+		if (newcap > pollset_cap) {
+			p = reallocarray(pollset_pfds, newcap,
+			    sizeof(*pollset_pfds));
+			if (p == NULL)
+				return (-1);
+			pollset_pfds = p;
+			p = reallocarray(pollset_conns, newcap,
+			    sizeof(*pollset_conns));
+			if (p == NULL)
+				return (-1);
+			pollset_conns = p;
+			pollset_cap = newcap;
+		}
+		pollset_size++;
+	}
+	pollset_pfds[i].fd = sock;
+	pollset_pfds[i].events = POLLIN;
+	pollset_pfds[i].revents = 0;
+	pollset_conns[i] = sconn;
+	sconn->sc_pollidx = i;
+	return (0);
+}
+
+static void
+pollset_remove(struct service_connection *sconn)
+{
+
+	pollset_pfds[sconn->sc_pollidx].fd = -1;
+	pollset_conns[sconn->sc_pollidx] = NULL;
+}
+
+bool
+service_have_connections(void)
+{
+	size_t i;
+
+	for (i = 0; i < pollset_size; i++) {
+		if (pollset_pfds[i].fd >= 0)
+			return (true);
+	}
+	return (false);
+}
+
+bool
+service_poll_dispatch(void)
+{
+	size_t i;
+	int ret;
+
+	do {
+		ret = poll(pollset_pfds, pollset_size, -1);
+	} while (ret == -1 && errno == EINTR);
+	if (ret == -1)
+		return (false);
+
+	for (i = 0; i < pollset_size; i++) {
+		if (pollset_pfds[i].revents == 0)
+			continue;
+		service_message(pollset_conns[i]->sc_service,
+		    pollset_conns[i]);
+	}
+	return (true);
+}
+
 struct service *
 service_alloc(const char *name, service_limit_func_t *limitfunc,
     service_command_func_t *commandfunc, uint64_t flags)
@@ -101,7 +184,6 @@ service_alloc(const char *name, service_limit_func_t *limitfunc,
 	service->s_limit = limitfunc;
 	service->s_command = commandfunc;
 	service->s_flags = flags;
-	TAILQ_INIT(&service->s_connections);
 	service->s_magic = SERVICE_MAGIC;
 
 	return (service);
@@ -110,13 +192,16 @@ service_alloc(const char *name, service_limit_func_t *limitfunc,
 void
 service_free(struct service *service)
 {
-	struct service_connection *sconn;
+	size_t i;
 
 	assert(service->s_magic == SERVICE_MAGIC);
 
 	service->s_magic = 0;
-	while ((sconn = service_connection_first(service)) != NULL)
-		service_connection_remove(service, sconn);
+	for (i = 0; i < pollset_size; i++) {
+		if (pollset_conns[i] != NULL &&
+		    pollset_conns[i]->sc_service == service)
+			service_connection_remove(service, pollset_conns[i]);
+	}
 	free(service->s_name);
 	free(service);
 }
@@ -153,8 +238,16 @@ service_connection_add(struct service *service, int sock,
 			return (NULL);
 		}
 	}
+	sconn->sc_service = service;
+	if (pollset_add(sconn, sock) == -1) {
+		serrno = errno;
+		nvlist_destroy(sconn->sc_limits);
+		(void)cap_unwrap(sconn->sc_chan, NULL);
+		free(sconn);
+		errno = serrno;
+		return (NULL);
+	}
 	sconn->sc_magic = SERVICE_CONNECTION_MAGIC;
-	TAILQ_INSERT_TAIL(&service->s_connections, sconn, sc_next);
 	return (sconn);
 }
 
@@ -166,7 +259,7 @@ service_connection_remove(struct service *service,
 	assert(service->s_magic == SERVICE_MAGIC);
 	assert(sconn->sc_magic == SERVICE_CONNECTION_MAGIC);
 
-	TAILQ_REMOVE(&service->s_connections, sconn, sc_next);
+	pollset_remove(sconn);
 	sconn->sc_magic = 0;
 	nvlist_destroy(sconn->sc_limits);
 	cap_close(sconn->sc_chan);
@@ -196,31 +289,6 @@ service_connection_clone(struct service *service,
 	return (sock[1]);
 }
 
-struct service_connection *
-service_connection_first(struct service *service)
-{
-	struct service_connection *sconn;
-
-	assert(service->s_magic == SERVICE_MAGIC);
-
-	sconn = TAILQ_FIRST(&service->s_connections);
-	assert(sconn == NULL ||
-	    sconn->sc_magic == SERVICE_CONNECTION_MAGIC);
-	return (sconn);
-}
-
-struct service_connection *
-service_connection_next(struct service_connection *sconn)
-{
-
-	assert(sconn->sc_magic == SERVICE_CONNECTION_MAGIC);
-
-	sconn = TAILQ_NEXT(sconn, sc_next);
-	assert(sconn == NULL ||
-	    sconn->sc_magic == SERVICE_CONNECTION_MAGIC);
-	return (sconn);
-}
-
 cap_channel_t *
 service_connection_get_chan(const struct service_connection *sconn)
 {
@@ -329,14 +397,6 @@ service_message(struct service *service, struct service_connection *sconn)
 	nvlist_destroy(nvlout);
 }
 
-static int
-fd_add(fd_set *fdsp, int maxfd, int fd)
-{
-
-	FD_SET(fd, fdsp);
-	return (fd > maxfd ? fd : maxfd);
-}
-
 const char *
 service_name(struct service *service)
 {
@@ -417,9 +477,6 @@ service_clean(int *sockp, int *procfdp, uint64_t flags)
 void
 service_start(struct service *service, int sock, int procfd)
 {
-	struct service_connection *sconn, *sconntmp;
-	fd_set fds;
-	int maxfd, nfds;
 
 	assert(service != NULL);
 	assert(service->s_magic == SERVICE_MAGIC);
@@ -429,43 +486,9 @@ service_start(struct service *service, int sock, int procfd)
 	if (service_connection_add(service, sock, NULL) == NULL)
 		_exit(1);
 
-	for (;;) {
-		FD_ZERO(&fds);
-		maxfd = -1;
-		for (sconn = service_connection_first(service); sconn != NULL;
-		    sconn = service_connection_next(sconn)) {
-			maxfd = fd_add(&fds, maxfd,
-			    service_connection_get_sock(sconn));
-		}
-
-		assert(maxfd >= 0);
-		assert(maxfd + 1 <= (int)FD_SETSIZE);
-		nfds = select(maxfd + 1, &fds, NULL, NULL, NULL);
-		if (nfds < 0) {
-			if (errno != EINTR)
-				_exit(1);
-			continue;
-		} else if (nfds == 0) {
-			/* Timeout. */
-			abort();
-		}
-
-		for (sconn = service_connection_first(service); sconn != NULL;
-		    sconn = sconntmp) {
-			/*
-			 * Prepare for connection to be removed from the list
-			 * on failure.
-			 */
-			sconntmp = service_connection_next(sconn);
-			if (FD_ISSET(service_connection_get_sock(sconn), &fds))
-				service_message(service, sconn);
-		}
-		if (service_connection_first(service) == NULL) {
-			/*
-			 * No connections left, exiting.
-			 */
-			break;
-		}
+	while (service_have_connections()) {
+		if (!service_poll_dispatch())
+			_exit(1);
 	}
 
 	_exit(0);
diff --git a/lib/libcasper/tests/Makefile b/lib/libcasper/tests/Makefile
index 4b6c72fd86e8..1ddb7e72128b 100644
--- a/lib/libcasper/tests/Makefile
+++ b/lib/libcasper/tests/Makefile
@@ -1,6 +1,13 @@
-.PATH:		${SRCTOP}/tests
+.include <src.opts.mk>
 
 PACKAGE=	tests
-KYUAFILE=	yes
+
+ATF_TESTS_C=	cap_main_test
+
+.if ${MK_CASPER} != "no"
+LIBADD+=	casper
+CFLAGS+=	-DWITH_CASPER
+.endif
+LIBADD+=	nv
 
 .include <bsd.test.mk>
diff --git a/lib/libcasper/tests/cap_main_test.c b/lib/libcasper/tests/cap_main_test.c
new file mode 100644
index 000000000000..0551f12de66e
--- /dev/null
+++ b/lib/libcasper/tests/cap_main_test.c
@@ -0,0 +1,142 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2026 Mariusz Zaborski <oshogbo@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/resource.h>
+#include <sys/select.h>
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libcasper.h>
+
+#include <atf-c.h>
+
+#define	NCONNECTIONS	(FD_SETSIZE + 64)
+#define	FD_HEADROOM	64
+
+/* Test that file descriptors past FD_SETSIZE (1024) work. */
+ATF_TC_WITHOUT_HEAD(many_connections);
+ATF_TC_BODY(many_connections, tc)
+{
+	struct rlimit rl;
+	cap_channel_t *chan;
+	cap_channel_t **clones;
+	size_t i;
+
+	if (getrlimit(RLIMIT_NOFILE, &rl) != 0)
+		atf_tc_skip("getrlimit: %s", strerror(errno));
+	if (rl.rlim_max < NCONNECTIONS + FD_HEADROOM)
+		atf_tc_skip("RLIMIT_NOFILE hard cap %ju below required %d",
+		    (uintmax_t)rl.rlim_max, NCONNECTIONS + FD_HEADROOM);
+	rl.rlim_cur = rl.rlim_max;
+	ATF_REQUIRE_MSG(setrlimit(RLIMIT_NOFILE, &rl) == 0,
+	    "setrlimit: %s", strerror(errno));
+
+	chan = cap_init();
+	ATF_REQUIRE_MSG(chan != NULL, "cap_init failed: %s", strerror(errno));
+
+	clones = calloc(NCONNECTIONS, sizeof(*clones));
+	ATF_REQUIRE(clones != NULL);
+
+	/*
+	 * Every cap_clone(3) adds one more connection to the helper.
+	 * After this loop the helper is watching more fds than an
+	 * fd_set can hold.
+	 */
+	for (i = 0; i < NCONNECTIONS; i++) {
+		clones[i] = cap_clone(chan);
+		ATF_REQUIRE_MSG(clones[i] != NULL,
+		    "cap_clone failed at %zu/%d: %s",
+		    i, NCONNECTIONS, strerror(errno));
+	}
+
+	for (i = 0; i < NCONNECTIONS; i++)
+		cap_close(clones[i]);
+	free(clones);
+	cap_close(chan);
+}
+
+#define	CHURN_CONNECTIONS	50
+#define	CHURN_CLOSE_STEP	5
+
+/* Test that gaps in the file descriptor list do not break casper. */
+ATF_TC_WITHOUT_HEAD(connection_churn);
+ATF_TC_BODY(connection_churn, tc)
+{
+	cap_channel_t *chan, *survivor, *extra;
+	cap_channel_t *clones[CHURN_CONNECTIONS];
+	size_t i, survivor_idx;
+
+	chan = cap_init();
+	ATF_REQUIRE_MSG(chan != NULL, "cap_init failed: %s", strerror(errno));
+
+	for (i = 0; i < CHURN_CONNECTIONS; i++) {
+		clones[i] = cap_clone(chan);
+		ATF_REQUIRE_MSG(clones[i] != NULL,
+		    "cap_clone failed at %zu: %s", i, strerror(errno));
+	}
+
+	/*
+	 * Close every Nth clone.
+	 */
+	for (i = 0; i < CHURN_CONNECTIONS; i += CHURN_CLOSE_STEP) {
+		cap_close(clones[i]);
+		clones[i] = NULL;
+	}
+
+	/*
+	 * Force a poll() cycle: the helper handles POLLIN on chan and
+	 * POLLHUP on the closed clones in the same walk.
+	 */
+	extra = cap_clone(chan);
+	ATF_REQUIRE_MSG(extra != NULL, "cap_clone after churn failed: %s",
+	    strerror(errno));
+
+	/* A surviving clone must still round-trip. */
+	survivor_idx = 1;
+	survivor = cap_clone(clones[survivor_idx]);
+	ATF_REQUIRE_MSG(survivor != NULL,
+	    "cap_clone on survivor failed: %s", strerror(errno));
+
+	cap_close(survivor);
+	cap_close(extra);
+	for (i = 0; i < CHURN_CONNECTIONS; i++) {
+		if (clones[i] != NULL)
+			cap_close(clones[i]);
+	}
+	cap_close(chan);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, many_connections);
+	ATF_TP_ADD_TC(tp, connection_churn);
+	return (atf_no_error());
+}