git: d617806aac14 - main - libc: report _SC_NPROCESSORS_ONLN more accurately in cpu-limited jails
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 29 Oct 2025 16:32:57 UTC
The branch main has been updated by kevans:
URL: https://cgit.FreeBSD.org/src/commit/?id=d617806aac1469319970e3551656e9deabb98a35
commit d617806aac1469319970e3551656e9deabb98a35
Author: Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2025-10-29 16:32:30 +0000
Commit: Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2025-10-29 16:32:30 +0000
libc: report _SC_NPROCESSORS_ONLN more accurately in cpu-limited jails
We don't support CPU hotplug, but we do support cpuset(8) restrictions
on jails (including prison0, which uses cpuset 1). The process cannot
widen its cpuset beyond its root set, so it makes sense to instead
report the number of cpus enabled there rather than the total number
in the system.
This change is effectively a nop for the majority of systems and jails
in the wild, though it does reduce the performance of this query now
that we can't take advantage of AT_NCPUS being provided in the auxinfo.
The implementation here is notably different than Linux, which would not
take cgroups into account. They do, however, take CPU hotplug into
account, so the possibility for it to diverge from (and be lower than)
the # configured count to reflect what the process can actually be
scheduled on doesn't really diverge in semantics.
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D52295
---
lib/libc/gen/sysconf.3 | 6 +-
lib/libc/gen/sysconf.c | 16 ++++-
lib/libc/tests/sys/cpuset_test.c | 140 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 156 insertions(+), 6 deletions(-)
diff --git a/lib/libc/gen/sysconf.3 b/lib/libc/gen/sysconf.3
index e38357b898a7..290ef0dc158c 100644
--- a/lib/libc/gen/sysconf.3
+++ b/lib/libc/gen/sysconf.3
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 26, 2013
+.Dd August 30, 2025
.Dt SYSCONF 3
.Os
.Sh NAME
@@ -77,7 +77,9 @@ The maximum number of supplemental groups.
.It Li _SC_NPROCESSORS_CONF
The number of processors configured.
.It Li _SC_NPROCESSORS_ONLN
-The number of processors currently online.
+The number of processors currently online, taking into account current jail
+restrictions to report only the number of processors that are usable to the
+process.
.It Li _SC_OPEN_MAX
One more than the maximum value the system may assign to a new file descriptor.
.It Li _SC_PAGESIZE
diff --git a/lib/libc/gen/sysconf.c b/lib/libc/gen/sysconf.c
index b5b732eed05d..87aedc07c110 100644
--- a/lib/libc/gen/sysconf.c
+++ b/lib/libc/gen/sysconf.c
@@ -72,6 +72,7 @@ long
sysconf(int name)
{
struct rlimit rl;
+ cpuset_t cpus;
size_t len;
int mib[2], sverrno, value;
long lvalue, defaultresult;
@@ -581,8 +582,21 @@ yesno:
return (_POSIX_IPV6);
#endif
- case _SC_NPROCESSORS_CONF:
case _SC_NPROCESSORS_ONLN:
+ /*
+ * Consult our root set first, because our CPU availability
+ * may not match the total number of CPUs available on the
+ * system and we may have a non-uniform layout even within
+ * userland. In particular, each jail has a root set that can
+ * be constrained by its parent and processes within the jail
+ * cannot widen beyond those constraints, so to those processes
+ * it makes sense to claim the more limited count.
+ */
+ if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+ sizeof(cpus), &cpus) == 0)
+ return (CPU_COUNT(&cpus));
+ /* FALLTHROUGH */
+ case _SC_NPROCESSORS_CONF:
if (_elf_aux_info(AT_NCPUS, &value, sizeof(value)) == 0)
return ((long)value);
mib[0] = CTL_HW;
diff --git a/lib/libc/tests/sys/cpuset_test.c b/lib/libc/tests/sys/cpuset_test.c
index 53d6a8215bbc..c8ad225fadfc 100644
--- a/lib/libc/tests/sys/cpuset_test.c
+++ b/lib/libc/tests/sys/cpuset_test.c
@@ -34,8 +34,10 @@
#include <sys/uio.h>
#include <sys/wait.h>
+#include <assert.h>
#include <errno.h>
#include <stdio.h>
+#include <stdlib.h>
#include <unistd.h>
#include <atf-c.h>
@@ -107,6 +109,19 @@ skip_ltncpu(int ncpu, cpuset_t *mask)
atf_tc_skip("Test requires %d or more cores.", ncpu);
}
+static void
+skip_ltncpu_root(int ncpu, cpuset_t *mask)
+{
+
+ CPU_ZERO(mask);
+ ATF_REQUIRE_EQ(0, cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID,
+ -1, sizeof(*mask), mask));
+ if (CPU_COUNT(mask) < ncpu) {
+ atf_tc_skip("Test requires cpuset root with %d or more cores.",
+ ncpu);
+ }
+}
+
ATF_TC(newset);
ATF_TC_HEAD(newset, tc)
{
@@ -234,9 +249,8 @@ ATF_TC_BODY(deadlk, tc)
}
static int
-do_jail(int sock)
+create_jail(void)
{
- struct jail_test_info info;
struct iovec iov[2];
char *name;
int error;
@@ -250,8 +264,22 @@ do_jail(int sock)
iov[1].iov_base = name;
iov[1].iov_len = strlen(name) + 1;
- if (jail_set(iov, 2, JAIL_CREATE | JAIL_ATTACH) < 0)
+ error = jail_set(iov, 2, JAIL_CREATE | JAIL_ATTACH);
+ free(name);
+ if (error < 0)
return (FAILURE_JAIL);
+ return (0);
+}
+
+static int
+do_jail(int sock)
+{
+ struct jail_test_info info;
+ int error;
+
+ error = create_jail();
+ if (error != 0)
+ return (error);
/* Record parameters, kick them over, then make a swift exit. */
CPU_ZERO(&info.jail_tidmask);
@@ -641,6 +669,111 @@ ATF_TC_BODY(jail_attach_disjoint, tc)
try_attach(jid, &smask);
}
+struct nproc_info {
+ long nproc_init;
+ long nproc_final;
+ long nproc_global;
+};
+
+ATF_TC(jail_nproc);
+ATF_TC_HEAD(jail_nproc, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test that _SC_PROCESSORS_ONLN reflects jail cpuset constraints");
+}
+ATF_TC_BODY(jail_nproc, tc)
+{
+ cpuset_t jmask;
+ struct nproc_info ninfo = { };
+ int sockpair[2];
+ cpusetid_t setid;
+ ssize_t readsz;
+ pid_t pid;
+ int fcpu, error, pfd, sock;
+ char okb = 0x7f, rcvb;
+
+ skip_ltncpu_root(2, &jmask);
+ fcpu = CPU_FFS(&jmask) - 1;
+
+ /*
+ * Just adjusting our affinity should not affect the number of
+ * processors considered online- we want to be sure that it's only
+ * adjusted if our jail's root set is.
+ */
+ CPU_CLR(fcpu, &jmask);
+ error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1,
+ sizeof(jmask), &jmask);
+ ATF_REQUIRE_EQ(0, error);
+ ATF_REQUIRE(sysconf(_SC_NPROCESSORS_ONLN) > CPU_COUNT(&jmask));
+
+ ATF_REQUIRE_EQ(0, socketpair(PF_UNIX, SOCK_STREAM, 0, sockpair));
+
+ /* We'll wait on the procdesc, too, so we can fail faster if it dies. */
+ ATF_REQUIRE((pid = pdfork(&pfd, 0)) != -1);
+
+ if (pid == 0) {
+ /* First child sets up the jail. */
+ sock = sockpair[SP_CHILD];
+ close(sockpair[SP_PARENT]);
+
+ error = create_jail();
+ if (error != 0)
+ _exit(error);
+
+ ninfo.nproc_init = sysconf(_SC_NPROCESSORS_ONLN);
+
+ /* Signal the parent that we're jailed. */
+ readsz = write(sock, &okb, sizeof(okb));
+ assert(readsz == sizeof(okb));
+
+ /* Wait for parent to adjust our mask and signal OK. */
+ readsz = read(sock, &rcvb, sizeof(rcvb));
+ assert(readsz == sizeof(rcvb));
+ assert(rcvb == okb);
+
+ ninfo.nproc_final = sysconf(_SC_NPROCESSORS_ONLN);
+ ninfo.nproc_global = sysconf(_SC_NPROCESSORS_CONF);
+ readsz = write(sock, &ninfo, sizeof(ninfo));
+ assert(readsz == sizeof(ninfo));
+
+ _exit(0);
+ }
+
+ close(sockpair[SP_CHILD]);
+ sock = sockpair[SP_PARENT];
+
+ /* Wait for signal that they are jailed. */
+ readsz = read(sock, &rcvb, sizeof(rcvb));
+ assert(readsz == sizeof(rcvb));
+ assert(rcvb == okb);
+
+ /* Grab the cpuset id and adjust it. */
+ error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, pid, &setid);
+ ATF_REQUIRE_EQ(0, error);
+ error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_CPUSET,
+ setid, sizeof(jmask), &jmask);
+ ATF_REQUIRE_EQ(0, error);
+
+ /* Signal OK to proceed. */
+ readsz = write(sock, &okb, sizeof(okb));
+ ATF_REQUIRE_EQ(sizeof(okb), readsz);
+
+ /* Grab our final nproc info. */
+ readsz = read(sock, &ninfo, sizeof(ninfo));
+ ATF_REQUIRE_EQ(sizeof(ninfo), readsz);
+
+ /*
+ * We set our own affinity to jmask, which is derived from *our* root
+ * set, at the beginning of the test. The jail would inherit from this
+ * set, so we just re-use that mask here to confirm that
+ * _SC_NPROCESSORS_ONLN did actually drop in response to us limiting the
+ * jail, and that its _SC_NPROCESSORS_CONF did not.
+ */
+ ATF_REQUIRE_EQ(CPU_COUNT(&jmask) + 1, ninfo.nproc_init);
+ ATF_REQUIRE_EQ(CPU_COUNT(&jmask) + 1, ninfo.nproc_global);
+ ATF_REQUIRE_EQ(CPU_COUNT(&jmask), ninfo.nproc_final);
+}
+
ATF_TC(badparent);
ATF_TC_HEAD(badparent, tc)
{
@@ -686,6 +819,7 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, jail_attach_prevbase);
ATF_TP_ADD_TC(tp, jail_attach_plain);
ATF_TP_ADD_TC(tp, jail_attach_disjoint);
+ ATF_TP_ADD_TC(tp, jail_nproc);
ATF_TP_ADD_TC(tp, badparent);
return (atf_no_error());
}