git: 5fa005e91560 - stable/13 - exec: Reimplement stack address randomization
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 16 Feb 2022 17:59:15 UTC
The branch stable/13 has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=5fa005e91560785dad5183db080209447afde3c2 commit 5fa005e91560785dad5183db080209447afde3c2 Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2022-01-17 16:42:56 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2022-02-16 16:55:03 +0000 exec: Reimplement stack address randomization The approach taken by the stack gap implementation was to insert a random gap between the top of the fixed stack mapping and the true top of the main process stack. This approach was chosen so as to avoid randomizing the previously fixed address of certain process metadata stored at the top of the stack, but had some shortcomings. In particular, mlockall(2) calls would wire the gap, bloating the process' memory usage, and RLIMIT_STACK included the size of the gap so small (< several MB) limits could not be used. There is little value in storing each process' ps_strings at a fixed location, as only very old programs hard-code this address; consumers were converted decades ago to use a sysctl-based interface for this purpose. Thus, this change re-implements stack address randomization by simply breaking the convention of storing ps_strings at a fixed location, and randomizing the location of the entire stack mapping. This implementation is simpler and avoids the problems mentioned above, while being unlikely to break compatibility anywhere the default ASLR settings are used. The kern.elfN.aslr.stack_gap sysctl is renamed to kern.elfN.aslr.stack, and is re-enabled by default. PR: 260303 Reviewed by: kib Discussed with: emaste, mw Sponsored by: The FreeBSD Foundation (cherry picked from commit 1811c1e957ee1250b08b3246fc0db37ddf64b736) --- share/man/man7/security.7 | 16 ++++----- sys/i386/linux/imgact_linux.c | 4 +++ sys/kern/imgact_aout.c | 4 +++ sys/kern/imgact_elf.c | 27 ++++++++------ sys/kern/kern_exec.c | 84 ++++++++++++++++++++++++++++++------------- sys/sys/exec.h | 3 +- sys/sys/imgact.h | 1 + sys/vm/vm_map.c | 4 +-- sys/vm/vm_map.h | 9 ++--- 9 files changed, 102 insertions(+), 50 deletions(-) diff --git a/share/man/man7/security.7 b/share/man/man7/security.7 index bb7e120a1d46..1bb5338e54e6 100644 --- a/share/man/man7/security.7 +++ b/share/man/man7/security.7 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 28, 2021 +.Dd January 14, 2022 .Dt SECURITY 7 .Os .Sh NAME @@ -1062,19 +1062,19 @@ position-independent (PIE) 32bit binaries. .It Dv kern.elf32.aslr.honor_sbrk Makes ASLR less aggressive and more compatible with old binaries relying on the sbrk area. -.It Dv kern.elf32.aslr.stack_gap -If ASLR is enabled for a binary, a non-zero value creates a randomized -stack gap between strings and the end of the aux vector. -The value is the maximum percentage of main stack to waste on the gap. -Cannot be greater than 50, i.e., at most half of the stack. +.It Dv kern.elf32.aslr.stack +If ASLR is enabled for a binary, a non-zero value enables randomization +of the stack. +Otherwise, the stack is mapped at a fixed location determined by the +process ABI. .It Dv kern.elf64.aslr.enable 64bit binaries ASLR control. .It Dv kern.elf64.aslr.pie_enable 64bit PIE binaries ASLR control. .It Dv kern.elf64.aslr.honor_sbrk 64bit binaries ASLR sbrk compatibility control. -.It Dv kern.elf64.aslr.stack_gap -Controls stack gap for 64bit binaries. +.It Dv kern.elf64.aslr.stack +Controls stack address randomization for 64bit binaries. .It Dv kern.elf32.nxstack Enables non-executable stack for 32bit processes. Enabled by default if supported by hardware and corresponding binary. diff --git a/sys/i386/linux/imgact_linux.c b/sys/i386/linux/imgact_linux.c index 661620b6ceaf..85357f41a705 100644 --- a/sys/i386/linux/imgact_linux.c +++ b/sys/i386/linux/imgact_linux.c @@ -213,6 +213,10 @@ exec_linux_imgact(struct image_params *imgp) vmspace->vm_daddr = (caddr_t)(void *)(uintptr_t)(virtual_offset + a_out->a_text); + error = exec_map_stack(imgp); + if (error != 0) + goto fail; + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 6eb6e85cc9d7..b7ff48dd8cdc 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -348,6 +348,10 @@ exec_aout_imgact(struct image_params *imgp) vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); + error = exec_map_stack(imgp); + if (error != 0) + return (error); + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 4857c848ee67..a8f3c6959b3b 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -188,11 +188,11 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, &__elfN(aslr_honor_sbrk), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); -static int __elfN(aslr_stack_gap) = 3; -SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW, - &__elfN(aslr_stack_gap), 0, +static int __elfN(aslr_stack) = 1; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN, + &__elfN(aslr_stack), 0, __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": maximum percentage of main stack to waste on a random gap"); + ": enable stack address randomization"); static int __elfN(sigfastblock) = 1; SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, @@ -1290,6 +1290,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (!__elfN(aslr_honor_sbrk) || (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) imgp->map_flags |= MAP_ASLR_IGNSTART; + if (__elfN(aslr_stack)) + imgp->map_flags |= MAP_ASLR_STACK; } if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 && @@ -1298,13 +1300,15 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) imgp->map_flags |= MAP_WXORX; error = exec_new_vmspace(imgp, sv); - vmspace = imgp->proc->p_vmspace; - map = &vmspace->vm_map; imgp->proc->p_sysent = sv; - maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); - if (mapsz >= maxv - vm_map_min(map)) { + vmspace = imgp->proc->p_vmspace; + map = &vmspace->vm_map; + maxv = sv->sv_usrstack; + if ((imgp->map_flags & MAP_ASLR_STACK) == 0) + maxv -= lim_max(td, RLIMIT_STACK); + if (error == 0 && mapsz >= maxv - vm_map_min(map)) { uprintf("Excessive mapping size\n"); error = ENOEXEC; } @@ -1330,8 +1334,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (error != 0) goto ret; - entry = (u_long)hdr->e_entry + et_dyn_addr; - /* * We load the dynamic linker where a userland call * to mmap(0, ...) would put it. The rationale behind this @@ -1352,6 +1354,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) map->anon_loc = addr; } + entry = (u_long)hdr->e_entry + et_dyn_addr; imgp->entry_addr = entry; if (interp != NULL) { @@ -1372,6 +1375,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) } else addr = et_dyn_addr; + error = exec_map_stack(imgp); + if (error != 0) + goto ret; + /* * Construct auxargs table (used by the copyout_auxargs routine) */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 9148daf6182e..49ab7d7b8d27 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -178,19 +178,19 @@ static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; - int error; + vm_offset_t val; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { - unsigned int val; - val = (unsigned int)p->p_sysent->sv_usrstack; - error = SYSCTL_OUT(req, &val, sizeof(val)); - } else + unsigned int val32; + + val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop); + return (SYSCTL_OUT(req, &val32, sizeof(val32))); + } #endif - error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, - sizeof(p->p_sysent->sv_usrstack)); - return error; + val = round_page(p->p_vmspace->vm_stacktop); + return (SYSCTL_OUT(req, &val, sizeof(val))); } static int @@ -1119,9 +1119,8 @@ exec_free_abi_mappings(struct proc *p) } /* - * Destroy old address space, and allocate a new stack. - * The new stack is only sgrowsiz large because it is grown - * automatically on a page fault. + * Run down the current address space and install a new one. Map the shared + * page. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) @@ -1131,11 +1130,8 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) struct vmspace *vmspace = p->p_vmspace; struct thread *td = curthread; vm_object_t obj; - struct rlimit rlim_stack; - vm_offset_t sv_minuser, stack_addr; + vm_offset_t sv_minuser; vm_map_t map; - vm_prot_t stack_prot; - u_long ssiz; imgp->vmspace_destroyed = true; imgp->sysent = sv; @@ -1172,7 +1168,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | - MAP_ASLR_IGNSTART | MAP_WXORX); + MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); @@ -1198,7 +1194,28 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) } } - /* Allocate a new stack */ + return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0); +} + +/* + * Compute the stack size limit and map the main process stack. + */ +int +exec_map_stack(struct image_params *imgp) +{ + struct rlimit rlim_stack; + struct sysentvec *sv; + struct proc *p; + vm_map_t map; + struct vmspace *vmspace; + vm_offset_t stack_addr, stack_top; + u_long ssiz; + int error, find_space, stack_off; + vm_prot_t stack_prot; + + p = imgp->proc; + sv = p->p_sysent; + if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); @@ -1215,25 +1232,44 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) } else { ssiz = maxssiz; } - stack_addr = sv->sv_usrstack - ssiz; - stack_prot = obj != NULL && imgp->stack_prot != 0 ? + + vmspace = p->p_vmspace; + map = &vmspace->vm_map; + + stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot; - error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot, - VM_PROT_ALL, MAP_STACK_GROWS_DOWN); + if ((map->flags & MAP_ASLR_STACK) != 0) { + stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + + lim_max(curthread, RLIMIT_DATA)); + find_space = VMFS_ANY_SPACE; + } else { + stack_addr = sv->sv_usrstack - ssiz; + find_space = VMFS_NO_SPACE; + } + error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz, + sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL, + MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) { uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x " - "failed mach error %d errno %d\n", (uintmax_t)ssiz, + "failed, mach error %d errno %d\n", (uintmax_t)ssiz, stack_prot, error, vm_mmap_to_errno(error)); return (vm_mmap_to_errno(error)); } - vmspace->vm_stkgap = 0; + + stack_top = stack_addr + ssiz; + if ((map->flags & MAP_ASLR_STACK) != 0) { + /* Randomize within the first page of the stack. */ + arc4rand(&stack_off, sizeof(stack_off), 0); + stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *)); + } /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ - vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; + vmspace->vm_stacktop = stack_top; + vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; return (0); } diff --git a/sys/sys/exec.h b/sys/sys/exec.h index ccd7eb0ecd7d..94d2f698bd63 100644 --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -77,7 +77,8 @@ struct execsw { * Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant. */ #define PS_STRINGS (USRSTACK - sizeof(struct ps_strings)) -#define PROC_PS_STRINGS(p) ((p)->p_sysent->sv_psstrings) +#define PROC_PS_STRINGS(p) \ + ((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz) int exec_map_first_page(struct image_params *); void exec_unmap_first_page(struct image_params *); diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 4c333b74b7b8..a56c15331746 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -114,6 +114,7 @@ int exec_check_permissions(struct image_params *); void exec_cleanup(struct thread *td, struct vmspace *); int exec_copyout_strings(struct image_params *, uintptr_t *); void exec_free_args(struct image_args *); +int exec_map_stack(struct image_params *); int exec_new_vmspace(struct image_params *, struct sysentvec *); void exec_setregs(struct thread *, struct image_params *, uintptr_t); int exec_shell_imgact(struct image_params *); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 87a290b998b9..cf2c96f9ab73 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -343,7 +343,6 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) vm->vm_taddr = 0; vm->vm_daddr = 0; vm->vm_maxsaddr = 0; - vm->vm_stkgap = 0; return (vm); } @@ -4266,7 +4265,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) vm2->vm_taddr = vm1->vm_taddr; vm2->vm_daddr = vm1->vm_daddr; vm2->vm_maxsaddr = vm1->vm_maxsaddr; - vm2->vm_stkgap = vm1->vm_stkgap; vm_map_lock(old_map); if (old_map->busy) vm_map_wait_busy(old_map); @@ -4285,7 +4283,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) new_map->anon_loc = old_map->anon_loc; new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART | - MAP_WXORX); + MAP_ASLR_STACK | MAP_WXORX); VM_MAP_ENTRY_FOREACH(old_entry, old_map) { if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 873ff62eec4a..8f318b34e601 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -223,12 +223,13 @@ struct vm_map { * vm_flags_t values */ #define MAP_WIREFUTURE 0x01 /* wire all future pages */ -#define MAP_BUSY_WAKEUP 0x02 +#define MAP_BUSY_WAKEUP 0x02 /* thread(s) waiting on busy state */ #define MAP_IS_SUB_MAP 0x04 /* has parent */ #define MAP_ASLR 0x08 /* enabled ASLR */ -#define MAP_ASLR_IGNSTART 0x10 -#define MAP_REPLENISH 0x20 +#define MAP_ASLR_IGNSTART 0x10 /* ASLR ignores data segment */ +#define MAP_REPLENISH 0x20 /* kmapent zone needs to be refilled */ #define MAP_WXORX 0x40 /* enforce W^X */ +#define MAP_ASLR_STACK 0x80 /* stack location is randomized */ #ifdef _KERNEL #if defined(KLD_MODULE) && !defined(KLD_TIED) @@ -293,7 +294,7 @@ struct vmspace { caddr_t vm_taddr; /* (c) user virtual address of text */ caddr_t vm_daddr; /* (c) user virtual address of data */ caddr_t vm_maxsaddr; /* user VA at max stack growth */ - vm_size_t vm_stkgap; /* stack gap size in bytes */ + vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */ u_int vm_refcnt; /* number of references */ /* * Keep the PMAP last, so that CPU-specific variations of that