git: 9a8d333368ba - main - libc: fix memfd_create's HUGETLB handling

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Thu, 09 Apr 2026 02:37:35 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=9a8d333368baef356f0a611b47ec592568dd14f9

commit 9a8d333368baef356f0a611b47ec592568dd14f9
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2026-04-09 02:37:00 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2026-04-09 02:37:11 +0000

    libc: fix memfd_create's HUGETLB handling
    
    The 'simplification' commit referenced below actually broke one aspect
    of MFD_HUGETLB: the caller isn't supposed to be required to specify a
    size.  MFD_HUGETLB by itself without a shift mask just requests a large
    page, so we revert that part of memfd_create() back.
    
    While we're here, fix up the related parts of the manpages a little bit,
    since MFD_HUGETLB is actually supported.  The manpage claims that we
    would return ENOSYS if forced mappings weren't supported, but this was
    actually not true.  However, that seems like a very important
    distinction to make between ENOSYS and EOPNOTSUPP, so fix the
    implementation to match the docs.
    
    Fixes:  8b8cf4ece660f ("memfd_create: simplify HUGETLB support [...]")
    Reviewed by:    kib, markj
    Differential Revision:  https://reviews.freebsd.org/D56114
---
 lib/libc/gen/memfd_create.c        | 23 +++++++++++++------
 lib/libsys/shm_open.2              | 31 ++++++++++++++++++++++----
 tests/sys/posixshm/memfd_test.c    | 35 +++++++++++++++++++++++++++++
 tests/sys/posixshm/posixshm.h      | 45 ++++++++++++++++++++++++++++++++++++++
 tests/sys/posixshm/posixshm_test.c | 38 +++++++++++---------------------
 5 files changed, 136 insertions(+), 36 deletions(-)

diff --git a/lib/libc/gen/memfd_create.c b/lib/libc/gen/memfd_create.c
index 78131f46d7b1..8e6c93be4337 100644
--- a/lib/libc/gen/memfd_create.c
+++ b/lib/libc/gen/memfd_create.c
@@ -95,16 +95,25 @@ memfd_create(const char *name, unsigned int flags)
 	npgs = getpagesizes(pgs, nitems(pgs));
 	if (npgs == -1)
 		goto clean;
-	pgsize = (size_t)1 << ((flags & MFD_HUGE_MASK) >> MFD_HUGE_SHIFT);
-	for (pgidx = 0; pgidx < npgs; pgidx++) {
-		if (pgsize == pgs[pgidx])
-			break;
-	}
-	if (pgidx == npgs) {
-		errno = EOPNOTSUPP;
+	else if (npgs == 1) {
+		errno = ENOSYS;
 		goto clean;
 	}
 
+	if ((flags & MFD_HUGE_MASK) == 0) {
+		pgidx = 1;
+	} else {
+		pgsize = 1UL << ((flags & MFD_HUGE_MASK) >> MFD_HUGE_SHIFT);
+		for (pgidx = 1; pgidx < npgs; pgidx++) {
+			if (pgsize == pgs[pgidx])
+				break;
+		}
+		if (pgidx == npgs) {
+			errno = EOPNOTSUPP;
+			goto clean;
+		}
+	}
+
 	memset(&slc, 0, sizeof(slc));
 	slc.psind = pgidx;
 	slc.alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT;
diff --git a/lib/libsys/shm_open.2 b/lib/libsys/shm_open.2
index c3196d966e6b..58597a341b9e 100644
--- a/lib/libsys/shm_open.2
+++ b/lib/libsys/shm_open.2
@@ -26,7 +26,7 @@
 .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd August 4, 2025
+.Dd March 26, 2025
 .Dt SHM_OPEN 2
 .Os
 .Sh NAME
@@ -343,7 +343,25 @@ Allow adding seals to the resulting file descriptor using the
 .Xr fcntl 2
 command.
 .It Dv MFD_HUGETLB
-This flag is currently unsupported.
+Create a memfd backed by a
+.Dq largepage
+object.
+One of the
+.Dv MFD_HUGE_*
+flags defined in
+.In sys/mman.h
+may be included to specify a fixed size.
+If a specific size is not requested, the smallest supported large page size is
+selected.
+.Pp
+The behavior documented above for the
+.Fn shm_create_largepage
+.Fa psind
+argument also applies to largepage objects created by
+.Fn memfd_create ,
+and the
+.Dv SHM_LARGEPAGE_ALLOC_DEFAULT
+policy will always be used.
 .El
 .Sh RETURN VALUES
 If successful,
@@ -458,17 +476,22 @@ argument was too long.
 .Pp
 An invalid or unsupported flag was included in
 .Fa flags .
+.It Bq Er EINVAL
+A hugetlb mapping was requested, but
+.Dv MFD_HUGETLB
+was not specified in
+.Fa flags .
 .It Bq Er EMFILE
 The process has already reached its limit for open file descriptors.
 .It Bq Er ENFILE
 The system file table is full.
 .It Bq Er ENOSYS
-In
-.Fa memfd_create ,
 .Dv MFD_HUGETLB
 was specified in
 .Fa flags ,
 and this system does not support forced hugetlb mappings.
+.It Bq Er EOPNOTSUPP
+This system does not support the requested hugetlb page size.
 .El
 .Pp
 .Fn shm_open
diff --git a/tests/sys/posixshm/memfd_test.c b/tests/sys/posixshm/memfd_test.c
index 5cae184206b1..387eca4cafe9 100644
--- a/tests/sys/posixshm/memfd_test.c
+++ b/tests/sys/posixshm/memfd_test.c
@@ -34,6 +34,8 @@
 #include <errno.h>
 #include <unistd.h>
 
+#include "posixshm.h"
+
 ATF_TC_WITHOUT_HEAD(basic);
 ATF_TC_BODY(basic, tc)
 {
@@ -277,6 +279,38 @@ ATF_TC_BODY(immutable_seals, tc)
 	close(fd);
 }
 
+ATF_TC_WITHOUT_HEAD(hugetlb);
+ATF_TC_BODY(hugetlb, tc)
+{
+	size_t ps[MAXPAGESIZES], pgsize;
+	int fd, pscnt;
+
+	pscnt = pagesizes(ps, false);
+#define	MFD_HUGE_SUPPORTED(sz)	(sz <= (1 << 24))
+#define	MFD_HUGE_FLAGS(sz) (((ffsl(sz) - 1U) << MFD_HUGE_SHIFT) & MFD_HUGE_MASK)
+	for (int psidx = 1; psidx < pscnt; psidx++) {
+		pgsize = ps[psidx];
+
+		if (!MFD_HUGE_SUPPORTED(pgsize))
+			continue;
+
+		ATF_REQUIRE_MSG((fd = memfd_create("...",
+		    MFD_HUGETLB | MFD_HUGE_FLAGS(pgsize))) != -1,
+		    "Creating a %zu-size hugetlb memfd", pgsize);
+	}
+
+	fd = memfd_create("...", MFD_HUGETLB);
+	if (pscnt == 1) {
+		ATF_REQUIRE_MSG(fd == -1,
+		    "Creating an unspecified hugetlb memfd without large page support");
+		ATF_REQUIRE(errno == ENOSYS);
+	} else {
+		ATF_REQUIRE_MSG(fd != -1,
+		    "Creating an unspecified hugetlb memfd with large page support");
+		close(fd);
+	}
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 
@@ -289,5 +323,6 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, get_seals);
 	ATF_TP_ADD_TC(tp, dup_seals);
 	ATF_TP_ADD_TC(tp, immutable_seals);
+	ATF_TP_ADD_TC(tp, hugetlb);
 	return (atf_no_error());
 }
diff --git a/tests/sys/posixshm/posixshm.h b/tests/sys/posixshm/posixshm.h
new file mode 100644
index 000000000000..84c73e0d10df
--- /dev/null
+++ b/tests/sys/posixshm/posixshm.h
@@ -0,0 +1,45 @@
+/*-
+ *
+ * Copyright (c) 2020 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <stdbool.h>
+
+static int
+pagesizes(size_t ps[MAXPAGESIZES], bool required)
+{
+	int pscnt;
+
+	pscnt = getpagesizes(ps, MAXPAGESIZES);
+	ATF_REQUIRE_MSG(pscnt != -1, "getpagesizes failed; errno=%d", errno);
+	ATF_REQUIRE_MSG(ps[0] != 0, "psind 0 is %zu", ps[0]);
+	ATF_REQUIRE_MSG(pscnt <= MAXPAGESIZES, "invalid pscnt %d", pscnt);
+	if (pscnt == 1 && required)
+		atf_tc_skip("no large page support");
+	return (pscnt);
+}
+
diff --git a/tests/sys/posixshm/posixshm_test.c b/tests/sys/posixshm/posixshm_test.c
index 55514a5f4bde..680a443b6eac 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -49,6 +49,8 @@
 
 #include <atf-c.h>
 
+#include "posixshm.h"
+
 #define	TEST_PATH_LEN	256
 static char test_path[TEST_PATH_LEN];
 static char test_path2[TEST_PATH_LEN];
@@ -1239,20 +1241,6 @@ shm_open_large(int psind, int policy, size_t sz)
 	return (fd);
 }
 
-static int
-pagesizes(size_t ps[MAXPAGESIZES])
-{
-	int pscnt;
-
-	pscnt = getpagesizes(ps, MAXPAGESIZES);
-	ATF_REQUIRE_MSG(pscnt != -1, "getpagesizes failed; errno=%d", errno);
-	ATF_REQUIRE_MSG(ps[0] != 0, "psind 0 is %zu", ps[0]);
-	ATF_REQUIRE_MSG(pscnt <= MAXPAGESIZES, "invalid pscnt %d", pscnt);
-	if (pscnt == 1)
-		atf_tc_skip("no large page support");
-	return (pscnt);
-}
-
 ATF_TC_WITHOUT_HEAD(largepage_basic);
 ATF_TC_BODY(largepage_basic, tc)
 {
@@ -1261,7 +1249,7 @@ ATF_TC_BODY(largepage_basic, tc)
 	size_t ps[MAXPAGESIZES];
 	int error, fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	zeroes = calloc(1, ps[0]);
 	ATF_REQUIRE(zeroes != NULL);
 	for (int i = 1; i < pscnt; i++) {
@@ -1317,7 +1305,7 @@ ATF_TC_BODY(largepage_config, tc)
 	size_t ps[MAXPAGESIZES + 1]; /* silence warnings if MAXPAGESIZES == 1 */
 	int error, fd;
 
-	(void)pagesizes(ps);
+	(void)pagesizes(ps, true);
 
 	fd = shm_open(SHM_ANON, O_CREAT | O_RDWR, 0);
 	ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; error=%d", errno);
@@ -1379,7 +1367,7 @@ ATF_TC_BODY(largepage_mmap, tc)
 	size_t ps[MAXPAGESIZES];
 	int fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
 
@@ -1475,7 +1463,7 @@ ATF_TC_BODY(largepage_munmap, tc)
 	size_t ps[MAXPAGESIZES], ps1;
 	int fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
 		ps1 = ps[i - 1];
@@ -1526,7 +1514,7 @@ ATF_TC_BODY(largepage_madvise, tc)
 	size_t ps[MAXPAGESIZES];
 	int fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
 		addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1595,7 +1583,7 @@ ATF_TC_BODY(largepage_mlock, tc)
 	    "sysctlbyname(vm.stats.vm.v_user_wire_count) failed; error=%d",
 	    errno);
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		if (ps[i] / ps[0] > max_wired - wired) {
 			/* Cannot wire past the limit. */
@@ -1638,7 +1626,7 @@ ATF_TC_BODY(largepage_msync, tc)
 	size_t ps[MAXPAGESIZES];
 	int fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
 		addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1697,7 +1685,7 @@ ATF_TC_BODY(largepage_mprotect, tc)
 	size_t ps[MAXPAGESIZES];
 	int fd, pscnt;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		/*
 		 * Reserve a contiguous region in the address space to avoid
@@ -1767,7 +1755,7 @@ ATF_TC_BODY(largepage_minherit, tc)
 	pid_t child;
 	int fd, pscnt, status;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
 		addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1855,7 +1843,7 @@ ATF_TC_BODY(largepage_pipe, tc)
 	int fd, pfd[2], pscnt, status;
 	pid_t child;
 
-	pscnt = pagesizes(ps);
+	pscnt = pagesizes(ps, true);
 
 	for (int i = 1; i < pscnt; i++) {
 		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
@@ -1908,7 +1896,7 @@ ATF_TC_BODY(largepage_reopen, tc)
 	size_t ps[MAXPAGESIZES];
 	int fd, psind;
 
-	(void)pagesizes(ps);
+	(void)pagesizes(ps, true);
 	psind = 1;
 
 	gen_test_path();