git: 9a8d333368ba - main - libc: fix memfd_create's HUGETLB handling
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 09 Apr 2026 02:37:35 UTC
The branch main has been updated by kevans:
URL: https://cgit.FreeBSD.org/src/commit/?id=9a8d333368baef356f0a611b47ec592568dd14f9
commit 9a8d333368baef356f0a611b47ec592568dd14f9
Author: Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2026-04-09 02:37:00 +0000
Commit: Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2026-04-09 02:37:11 +0000
libc: fix memfd_create's HUGETLB handling
The 'simplification' commit referenced below actually broke one aspect
of MFD_HUGETLB: the caller isn't supposed to be required to specify a
size. MFD_HUGETLB by itself without a shift mask just requests a large
page, so we revert that part of memfd_create() back.
While we're here, fix up the related parts of the manpages a little bit,
since MFD_HUGETLB is actually supported. The manpage claims that we
would return ENOSYS if forced mappings weren't supported, but this was
actually not true. However, that seems like a very important
distinction to make between ENOSYS and EOPNOTSUPP, so fix the
implementation to match the docs.
Fixes: 8b8cf4ece660f ("memfd_create: simplify HUGETLB support [...]")
Reviewed by: kib, markj
Differential Revision: https://reviews.freebsd.org/D56114
---
lib/libc/gen/memfd_create.c | 23 +++++++++++++------
lib/libsys/shm_open.2 | 31 ++++++++++++++++++++++----
tests/sys/posixshm/memfd_test.c | 35 +++++++++++++++++++++++++++++
tests/sys/posixshm/posixshm.h | 45 ++++++++++++++++++++++++++++++++++++++
tests/sys/posixshm/posixshm_test.c | 38 +++++++++++---------------------
5 files changed, 136 insertions(+), 36 deletions(-)
diff --git a/lib/libc/gen/memfd_create.c b/lib/libc/gen/memfd_create.c
index 78131f46d7b1..8e6c93be4337 100644
--- a/lib/libc/gen/memfd_create.c
+++ b/lib/libc/gen/memfd_create.c
@@ -95,16 +95,25 @@ memfd_create(const char *name, unsigned int flags)
npgs = getpagesizes(pgs, nitems(pgs));
if (npgs == -1)
goto clean;
- pgsize = (size_t)1 << ((flags & MFD_HUGE_MASK) >> MFD_HUGE_SHIFT);
- for (pgidx = 0; pgidx < npgs; pgidx++) {
- if (pgsize == pgs[pgidx])
- break;
- }
- if (pgidx == npgs) {
- errno = EOPNOTSUPP;
+ else if (npgs == 1) {
+ errno = ENOSYS;
goto clean;
}
+ if ((flags & MFD_HUGE_MASK) == 0) {
+ pgidx = 1;
+ } else {
+ pgsize = 1UL << ((flags & MFD_HUGE_MASK) >> MFD_HUGE_SHIFT);
+ for (pgidx = 1; pgidx < npgs; pgidx++) {
+ if (pgsize == pgs[pgidx])
+ break;
+ }
+ if (pgidx == npgs) {
+ errno = EOPNOTSUPP;
+ goto clean;
+ }
+ }
+
memset(&slc, 0, sizeof(slc));
slc.psind = pgidx;
slc.alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT;
diff --git a/lib/libsys/shm_open.2 b/lib/libsys/shm_open.2
index c3196d966e6b..58597a341b9e 100644
--- a/lib/libsys/shm_open.2
+++ b/lib/libsys/shm_open.2
@@ -26,7 +26,7 @@
.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd August 4, 2025
+.Dd March 26, 2025
.Dt SHM_OPEN 2
.Os
.Sh NAME
@@ -343,7 +343,25 @@ Allow adding seals to the resulting file descriptor using the
.Xr fcntl 2
command.
.It Dv MFD_HUGETLB
-This flag is currently unsupported.
+Create a memfd backed by a
+.Dq largepage
+object.
+One of the
+.Dv MFD_HUGE_*
+flags defined in
+.In sys/mman.h
+may be included to specify a fixed size.
+If a specific size is not requested, the smallest supported large page size is
+selected.
+.Pp
+The behavior documented above for the
+.Fn shm_create_largepage
+.Fa psind
+argument also applies to largepage objects created by
+.Fn memfd_create ,
+and the
+.Dv SHM_LARGEPAGE_ALLOC_DEFAULT
+policy will always be used.
.El
.Sh RETURN VALUES
If successful,
@@ -458,17 +476,22 @@ argument was too long.
.Pp
An invalid or unsupported flag was included in
.Fa flags .
+.It Bq Er EINVAL
+A hugetlb mapping was requested, but
+.Dv MFD_HUGETLB
+was not specified in
+.Fa flags .
.It Bq Er EMFILE
The process has already reached its limit for open file descriptors.
.It Bq Er ENFILE
The system file table is full.
.It Bq Er ENOSYS
-In
-.Fa memfd_create ,
.Dv MFD_HUGETLB
was specified in
.Fa flags ,
and this system does not support forced hugetlb mappings.
+.It Bq Er EOPNOTSUPP
+This system does not support the requested hugetlb page size.
.El
.Pp
.Fn shm_open
diff --git a/tests/sys/posixshm/memfd_test.c b/tests/sys/posixshm/memfd_test.c
index 5cae184206b1..387eca4cafe9 100644
--- a/tests/sys/posixshm/memfd_test.c
+++ b/tests/sys/posixshm/memfd_test.c
@@ -34,6 +34,8 @@
#include <errno.h>
#include <unistd.h>
+#include "posixshm.h"
+
ATF_TC_WITHOUT_HEAD(basic);
ATF_TC_BODY(basic, tc)
{
@@ -277,6 +279,38 @@ ATF_TC_BODY(immutable_seals, tc)
close(fd);
}
+ATF_TC_WITHOUT_HEAD(hugetlb);
+ATF_TC_BODY(hugetlb, tc)
+{
+ size_t ps[MAXPAGESIZES], pgsize;
+ int fd, pscnt;
+
+ pscnt = pagesizes(ps, false);
+#define MFD_HUGE_SUPPORTED(sz) (sz <= (1 << 24))
+#define MFD_HUGE_FLAGS(sz) (((ffsl(sz) - 1U) << MFD_HUGE_SHIFT) & MFD_HUGE_MASK)
+ for (int psidx = 1; psidx < pscnt; psidx++) {
+ pgsize = ps[psidx];
+
+ if (!MFD_HUGE_SUPPORTED(pgsize))
+ continue;
+
+ ATF_REQUIRE_MSG((fd = memfd_create("...",
+ MFD_HUGETLB | MFD_HUGE_FLAGS(pgsize))) != -1,
+ "Creating a %zu-size hugetlb memfd", pgsize);
+ }
+
+ fd = memfd_create("...", MFD_HUGETLB);
+ if (pscnt == 1) {
+ ATF_REQUIRE_MSG(fd == -1,
+ "Creating an unspecified hugetlb memfd without large page support");
+ ATF_REQUIRE(errno == ENOSYS);
+ } else {
+ ATF_REQUIRE_MSG(fd != -1,
+ "Creating an unspecified hugetlb memfd with large page support");
+ close(fd);
+ }
+}
+
ATF_TP_ADD_TCS(tp)
{
@@ -289,5 +323,6 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, get_seals);
ATF_TP_ADD_TC(tp, dup_seals);
ATF_TP_ADD_TC(tp, immutable_seals);
+ ATF_TP_ADD_TC(tp, hugetlb);
return (atf_no_error());
}
diff --git a/tests/sys/posixshm/posixshm.h b/tests/sys/posixshm/posixshm.h
new file mode 100644
index 000000000000..84c73e0d10df
--- /dev/null
+++ b/tests/sys/posixshm/posixshm.h
@@ -0,0 +1,45 @@
+/*-
+ *
+ * Copyright (c) 2020 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+
+#include <stdbool.h>
+
+static int
+pagesizes(size_t ps[MAXPAGESIZES], bool required)
+{
+ int pscnt;
+
+ pscnt = getpagesizes(ps, MAXPAGESIZES);
+ ATF_REQUIRE_MSG(pscnt != -1, "getpagesizes failed; errno=%d", errno);
+ ATF_REQUIRE_MSG(ps[0] != 0, "psind 0 is %zu", ps[0]);
+ ATF_REQUIRE_MSG(pscnt <= MAXPAGESIZES, "invalid pscnt %d", pscnt);
+ if (pscnt == 1 && required)
+ atf_tc_skip("no large page support");
+ return (pscnt);
+}
+
diff --git a/tests/sys/posixshm/posixshm_test.c b/tests/sys/posixshm/posixshm_test.c
index 55514a5f4bde..680a443b6eac 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -49,6 +49,8 @@
#include <atf-c.h>
+#include "posixshm.h"
+
#define TEST_PATH_LEN 256
static char test_path[TEST_PATH_LEN];
static char test_path2[TEST_PATH_LEN];
@@ -1239,20 +1241,6 @@ shm_open_large(int psind, int policy, size_t sz)
return (fd);
}
-static int
-pagesizes(size_t ps[MAXPAGESIZES])
-{
- int pscnt;
-
- pscnt = getpagesizes(ps, MAXPAGESIZES);
- ATF_REQUIRE_MSG(pscnt != -1, "getpagesizes failed; errno=%d", errno);
- ATF_REQUIRE_MSG(ps[0] != 0, "psind 0 is %zu", ps[0]);
- ATF_REQUIRE_MSG(pscnt <= MAXPAGESIZES, "invalid pscnt %d", pscnt);
- if (pscnt == 1)
- atf_tc_skip("no large page support");
- return (pscnt);
-}
-
ATF_TC_WITHOUT_HEAD(largepage_basic);
ATF_TC_BODY(largepage_basic, tc)
{
@@ -1261,7 +1249,7 @@ ATF_TC_BODY(largepage_basic, tc)
size_t ps[MAXPAGESIZES];
int error, fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
zeroes = calloc(1, ps[0]);
ATF_REQUIRE(zeroes != NULL);
for (int i = 1; i < pscnt; i++) {
@@ -1317,7 +1305,7 @@ ATF_TC_BODY(largepage_config, tc)
size_t ps[MAXPAGESIZES + 1]; /* silence warnings if MAXPAGESIZES == 1 */
int error, fd;
- (void)pagesizes(ps);
+ (void)pagesizes(ps, true);
fd = shm_open(SHM_ANON, O_CREAT | O_RDWR, 0);
ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; error=%d", errno);
@@ -1379,7 +1367,7 @@ ATF_TC_BODY(largepage_mmap, tc)
size_t ps[MAXPAGESIZES];
int fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
@@ -1475,7 +1463,7 @@ ATF_TC_BODY(largepage_munmap, tc)
size_t ps[MAXPAGESIZES], ps1;
int fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
ps1 = ps[i - 1];
@@ -1526,7 +1514,7 @@ ATF_TC_BODY(largepage_madvise, tc)
size_t ps[MAXPAGESIZES];
int fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1595,7 +1583,7 @@ ATF_TC_BODY(largepage_mlock, tc)
"sysctlbyname(vm.stats.vm.v_user_wire_count) failed; error=%d",
errno);
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
if (ps[i] / ps[0] > max_wired - wired) {
/* Cannot wire past the limit. */
@@ -1638,7 +1626,7 @@ ATF_TC_BODY(largepage_msync, tc)
size_t ps[MAXPAGESIZES];
int fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1697,7 +1685,7 @@ ATF_TC_BODY(largepage_mprotect, tc)
size_t ps[MAXPAGESIZES];
int fd, pscnt;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
/*
* Reserve a contiguous region in the address space to avoid
@@ -1767,7 +1755,7 @@ ATF_TC_BODY(largepage_minherit, tc)
pid_t child;
int fd, pscnt, status;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
@@ -1855,7 +1843,7 @@ ATF_TC_BODY(largepage_pipe, tc)
int fd, pfd[2], pscnt, status;
pid_t child;
- pscnt = pagesizes(ps);
+ pscnt = pagesizes(ps, true);
for (int i = 1; i < pscnt; i++) {
fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
@@ -1908,7 +1896,7 @@ ATF_TC_BODY(largepage_reopen, tc)
size_t ps[MAXPAGESIZES];
int fd, psind;
- (void)pagesizes(ps);
+ (void)pagesizes(ps, true);
psind = 1;
gen_test_path();