git: eca4dd133883 - main - execve: Add guard pages around execve KVA buffers
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 04 May 2026 17:28:21 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=eca4dd133883c9e9aaeca68e0119a638ba0aaca7
commit eca4dd133883c9e9aaeca68e0119a638ba0aaca7
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2026-05-04 15:38:54 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-05-04 17:28:02 +0000
execve: Add guard pages around execve KVA buffers
This helps ensure that overflows will trigger a panic instead of
silently corrupting adjacent buffers, as happened in SA-26:13.exec.
Extend kmap_alloc_wait() to support allocation of guard pages on both
sides of a KVA allocation. Modify the exec_map setup accordingly. Add
the "vm.exec_map_guard_pages" tunable to provide control over the guard
page allocations.
Reviewed by: kib
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D56711
---
sys/kern/kern_exec.c | 3 ++-
sys/vm/vm_extern.h | 2 +-
sys/vm/vm_init.c | 8 ++++++--
sys/vm/vm_kern.c | 39 ++++++++++++++++++++++++++++++++-------
sys/vm/vm_kern.h | 1 +
5 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 8e3b41170cab..958ec559fd8d 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1435,7 +1435,8 @@ exec_prealloc_args_kva(void *arg __unused)
mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
for (i = 0; i < exec_map_entries; i++) {
argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
- argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
+ argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size,
+ ptoa(exec_map_guard_pages));
argkva->gen = exec_args_gen;
SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
}
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 207eb721d129..799b89ce2fc8 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -51,7 +51,7 @@ void *kva_alloc_aligned(vm_size_t, vm_size_t);
void kva_free(void *, vm_size_t);
/* These operate on pageable virtual addresses. */
-void *kmap_alloc_wait(vm_map_t, vm_size_t);
+void *kmap_alloc_wait(vm_map_t, vm_size_t, vm_size_t);
void kmap_free_wakeup(vm_map_t, void *, vm_size_t);
/* These operate on virtual addresses backed by memory. */
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index 40a001afb300..316b43c1c240 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -274,8 +274,12 @@ again:
exec_map_entries = 2 * mp_ncpus + 4;
#endif
exec_map_entry_size = round_page(PATH_MAX + ARG_MAX);
- kmem_subinit(exec_map, kernel_map, &minaddr, &maxaddr,
- exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, false);
+ exec_map_guard_pages = 1;
+ TUNABLE_INT_FETCH("vm.exec_map_guard_pages", &exec_map_guard_pages);
+ size = exec_map_entries *
+ (exec_map_entry_size + 2 * ptoa(exec_map_guard_pages)) +
+ 64 * PAGE_SIZE;
+ kmem_subinit(exec_map, kernel_map, &minaddr, &maxaddr, size, false);
kmem_subinit(pipe_map, kernel_map, &minaddr, &maxaddr, maxpipekva,
false);
TSEXIT();
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 3a6fed6cb3e1..fc5d0de424bd 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -107,6 +107,7 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS;
u_int exec_map_entry_size;
+u_int exec_map_guard_pages;
u_int exec_map_entries;
SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
@@ -706,34 +707,52 @@ kmem_free(void *addr, vm_size_t size)
vmem_free(arena, (uintptr_t)addr, size);
}
+static void
+kmap_alloc_map(vm_map_t map, vm_offset_t addr, vm_size_t size,
+ vm_prot_t prot, int flags)
+{
+ int error __diagused;
+
+ error = vm_map_insert(map, NULL, 0,
+ addr, addr + size, prot, prot, flags);
+ KASSERT(error == KERN_SUCCESS,
+ ("%s: unexpected error %d", __func__, error));
+}
+
/*
* kmap_alloc_wait:
*
* Allocates pageable memory from a sub-map of the kernel. If the submap
* has no room, the caller sleeps waiting for more memory in the submap.
+ * If "guard_size" is non-zero, then unmapped KVA is left at the beginning
+ * and end of the allocated range.
*
* This routine may block.
*/
void *
-kmap_alloc_wait(vm_map_t map, vm_size_t size)
+kmap_alloc_wait(vm_map_t map, vm_size_t size, vm_size_t guard_size)
{
vm_offset_t addr;
+ vm_size_t total_size;
+
+ KASSERT(size % PAGE_SIZE == 0 && guard_size % PAGE_SIZE == 0,
+ ("%s: size %zu guard_size %zu", __func__, size, guard_size));
- size = round_page(size);
if (!swap_reserve(size))
return (NULL);
+ total_size = size + 2 * guard_size;
for (;;) {
/*
* To make this work for more than one map, use the map's lock
* to lock out sleepers/wakers.
*/
vm_map_lock(map);
- addr = vm_map_findspace(map, vm_map_min(map), size);
- if (addr + size <= vm_map_max(map))
+ addr = vm_map_findspace(map, vm_map_min(map), total_size);
+ if (addr + total_size <= vm_map_max(map))
break;
/* no space now; see if we can ever get space */
- if (vm_map_max(map) - vm_map_min(map) < size) {
+ if (vm_map_max(map) - vm_map_min(map) < total_size) {
vm_map_unlock(map);
swap_release(size);
return (0);
@@ -741,10 +760,16 @@ kmap_alloc_wait(vm_map_t map, vm_size_t size)
vm_map_modflags(map, MAP_NEEDS_WAKEUP, 0);
vm_map_unlock_and_wait(map, 0);
}
- vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_RW, VM_PROT_RW,
+ if (guard_size != 0) {
+ kmap_alloc_map(map, addr, guard_size,
+ VM_PROT_NONE, MAP_CREATE_GUARD);
+ kmap_alloc_map(map, addr + guard_size + size, guard_size,
+ VM_PROT_NONE, MAP_CREATE_GUARD);
+ }
+ kmap_alloc_map(map, addr + guard_size, size, VM_PROT_RW,
MAP_ACC_CHARGED);
vm_map_unlock(map);
- return ((void *)addr);
+ return ((void *)(addr + guard_size));
}
/*
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
index 942c03480364..296a50ae0058 100644
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -75,4 +75,5 @@ extern struct vmem *memguard_arena;
extern u_long vm_kmem_size;
extern u_int exec_map_entries;
extern u_int exec_map_entry_size;
+extern u_int exec_map_guard_pages;
#endif /* _VM_VM_KERN_H_ */