svn commit: r343964 - in head: sys/amd64/amd64 sys/arm/arm sys/compat/freebsd32 sys/compat/ia32 sys/i386/i386 sys/kern sys/sys sys/vm usr.bin/proccontrol

Konstantin Belousov kib at FreeBSD.org
Sun Feb 10 17:19:49 UTC 2019


Author: kib
Date: Sun Feb 10 17:19:45 2019
New Revision: 343964
URL: https://svnweb.freebsd.org/changeset/base/343964

Log:
  Implement Address Space Layout Randomization (ASLR)
  
  With this change, randomization can be enabled for all non-fixed
  mappings.  It means that the base address for the mapping is selected
  with a guaranteed amount of entropy (bits). If the mapping was
  requested to be superpage aligned, the randomization honours the
  superpage attributes.
  
  Although the value of ASLR is diminshing over time as exploit authors
  work out simple ASLR bypass techniques, it elimintates the trivial
  exploitation of certain vulnerabilities, at least in theory.  This
  implementation is relatively small and happens at the correct
  architectural level.  Also, it is not expected to introduce
  regressions in existing cases when turned off (default for now), or
  cause any significant maintaince burden.
  
  The randomization is done on a best-effort basis - that is, the
  allocator falls back to a first fit strategy if fragmentation prevents
  entropy injection.  It is trivial to implement a strong mode where
  failure to guarantee the requested amount of entropy results in
  mapping request failure, but I do not consider that to be usable.
  
  I have not fine-tuned the amount of entropy injected right now. It is
  only a quantitive change that will not change the implementation.  The
  current amount is controlled by aslr_pages_rnd.
  
  To not spoil coalescing optimizations, to reduce the page table
  fragmentation inherent to ASLR, and to keep the transient superpage
  promotion for the malloced memory, locality clustering is implemented
  for anonymous private mappings, which are automatically grouped until
  fragmentation kicks in.  The initial location for the anon group range
  is, of course, randomized.  This is controlled by vm.cluster_anon,
  enabled by default.
  
  The default mode keeps the sbrk area unpopulated by other mappings,
  but this can be turned off, which gives much more breathing bits on
  architectures with small address space, such as i386.  This is tied
  with the question of following an application's hint about the mmap(2)
  base address. Testing shows that ignoring the hint does not affect the
  function of common applications, but I would expect more demanding
  code could break. By default sbrk is preserved and mmap hints are
  satisfied, which can be changed by using the
  kern.elf{32,64}.aslr.honor_sbrk sysctl.
  
  ASLR is enabled on per-ABI basis, and currently it is only allowed on
  FreeBSD native i386 and amd64 (including compat 32bit) ABIs.  Support
  for additional architectures will be added after further testing.
  
  Both per-process and per-image controls are implemented:
  - procctl(2) adds PROC_ASLR_CTL/PROC_ASLR_STATUS;
  - NT_FREEBSD_FCTL_ASLR_DISABLE feature control note bit makes it possible
    to force ASLR off for the given binary.  (A tool to edit the feature
    control note is in development.)
  Global controls are:
  - kern.elf{32,64}.aslr.enable - for non-fixed mappings done by mmap(2);
  - kern.elf{32,64}.aslr.pie_enable - for PIE image activation mappings;
  - kern.elf{32,64}.aslr.honor_sbrk - allow to use sbrk area for mmap(2);
  - vm.cluster_anon - enables anon mapping clustering.
  
  PR:	208580 (exp runs)
  Exp-runs done by:	antoine
  Reviewed by:	markj (previous version)
  Discussed with:	emaste
  Tested by:	pho
  MFC after:	1 month
  Sponsored by:	The FreeBSD Foundation
  Differential revision:	https://reviews.freebsd.org/D5603

Modified:
  head/sys/amd64/amd64/elf_machdep.c
  head/sys/arm/arm/elf_machdep.c
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/ia32/ia32_sysvec.c
  head/sys/i386/i386/elf_machdep.c
  head/sys/kern/imgact_elf.c
  head/sys/kern/kern_exec.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_procctl.c
  head/sys/sys/imgact.h
  head/sys/sys/proc.h
  head/sys/sys/procctl.h
  head/sys/sys/sysent.h
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/usr.bin/proccontrol/proccontrol.c

Modified: head/sys/amd64/amd64/elf_machdep.c
==============================================================================
--- head/sys/amd64/amd64/elf_machdep.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/amd64/amd64/elf_machdep.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -73,7 +73,8 @@ struct sysentvec elf64_freebsd_sysvec = {
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
-	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP,
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
+			    SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,

Modified: head/sys/arm/arm/elf_machdep.c
==============================================================================
--- head/sys/arm/arm/elf_machdep.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/arm/arm/elf_machdep.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -82,7 +82,7 @@ struct sysentvec elf32_freebsd_sysvec = {
 	.sv_maxssiz	= NULL,
 	.sv_flags	=
 #if __ARM_ARCH >= 6
-			  SV_SHP | SV_TIMEKEEP |
+			  SV_ASLR | SV_SHP | SV_TIMEKEEP |
 #endif
 			  SV_ABI_FREEBSD | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -3328,6 +3328,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_
 	int error, error1, flags, signum;
 
 	switch (uap->com) {
+	case PROC_ASLR_CTL:
 	case PROC_SPROTECT:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
@@ -3359,6 +3360,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_
 			return (error);
 		data = &x.rk;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		data = &flags;
@@ -3387,6 +3389,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_
 		if (error == 0)
 			error = error1;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		if (error == 0)

Modified: head/sys/compat/ia32/ia32_sysvec.c
==============================================================================
--- head/sys/compat/ia32/ia32_sysvec.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/compat/ia32/ia32_sysvec.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -119,7 +119,7 @@ struct sysentvec ia32_freebsd_sysvec = {
 	.sv_setregs	= ia32_setregs,
 	.sv_fixlimit	= ia32_fixlimit,
 	.sv_maxssiz	= &ia32_maxssiz,
-	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 |
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
 			    SV_SHP | SV_TIMEKEEP,
 	.sv_set_syscall_retval = ia32_set_syscall_retval,
 	.sv_fetch_syscall_args = ia32_fetch_syscall_args,

Modified: head/sys/i386/i386/elf_machdep.c
==============================================================================
--- head/sys/i386/i386/elf_machdep.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/i386/i386/elf_machdep.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -75,8 +75,8 @@ struct sysentvec elf32_freebsd_sysvec = {
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
-	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP |
-			    SV_TIMEKEEP,
+	.sv_flags	= SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
+			    SV_SHP | SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,

Modified: head/sys/kern/imgact_elf.c
==============================================================================
--- head/sys/kern/imgact_elf.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/kern/imgact_elf.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -136,6 +136,27 @@ SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_R
     "enable execution from readable segments");
 #endif
 
+SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0,
+    "");
+#define	ASLR_NODE_OID	__CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)
+
+static int __elfN(aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
+    &__elfN(aslr_enabled), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+    ": enable address map randomization");
+
+static int __elfN(pie_aslr_enabled) = 0;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
+    &__elfN(pie_aslr_enabled), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+    ": enable address map randomization for PIE binaries");
+
+static int __elfN(aslr_honor_sbrk) = 1;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
+    &__elfN(aslr_honor_sbrk), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
+
 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
 
 #define	trunc_page_ps(va, ps)	rounddown2(va, ps)
@@ -773,6 +794,36 @@ fail:
 	return (error);
 }
 
+static u_long
+__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv,
+    u_int align)
+{
+	u_long rbase, res;
+
+	MPASS(vm_map_min(map) <= minv);
+	MPASS(maxv <= vm_map_max(map));
+	MPASS(minv < maxv);
+	MPASS(minv + align < maxv);
+	arc4rand(&rbase, sizeof(rbase), 0);
+	res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
+	res &= ~((u_long)align - 1);
+	if (res >= maxv)
+		res -= align;
+	KASSERT(res >= minv,
+	    ("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
+	    res, minv, maxv, rbase));
+	KASSERT(res < maxv,
+	    ("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
+	    res, maxv, minv, rbase));
+	return (res);
+}
+
+/*
+ * Impossible et_dyn_addr initial value indicating that the real base
+ * must be calculated later with some randomization applied.
+ */
+#define	ET_DYN_ADDR_RAND	1
+
 static int
 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 {
@@ -781,6 +832,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	const Elf_Phdr *phdr;
 	Elf_Auxargs *elf_auxargs;
 	struct vmspace *vmspace;
+	vm_map_t map;
 	const char *err_str, *newinterp;
 	char *interp, *interp_buf, *path;
 	Elf_Brandinfo *brand_info;
@@ -788,6 +840,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	vm_prot_t prot;
 	u_long text_size, data_size, total_size, text_addr, data_addr;
 	u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr;
+	u_long maxalign, mapsz, maxv, maxv1;
 	uint32_t fctl0;
 	int32_t osrel;
 	int error, i, n, interp_name_len, have_interp;
@@ -831,12 +884,17 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	err_str = newinterp = NULL;
 	interp = interp_buf = NULL;
 	td = curthread;
+	maxalign = PAGE_SIZE;
+	mapsz = 0;
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch (phdr[i].p_type) {
 		case PT_LOAD:
 			if (n == 0)
 				baddr = phdr[i].p_vaddr;
+			if (phdr[i].p_align > maxalign)
+				maxalign = phdr[i].p_align;
+			mapsz += phdr[i].p_memsz;
 			n++;
 			break;
 		case PT_INTERP:
@@ -897,6 +955,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 		error = ENOEXEC;
 		goto ret;
 	}
+	sv = brand_info->sysvec;
 	et_dyn_addr = 0;
 	if (hdr->e_type == ET_DYN) {
 		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
@@ -908,10 +967,18 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 		 * Honour the base load address from the dso if it is
 		 * non-zero for some reason.
 		 */
-		if (baddr == 0)
-			et_dyn_addr = ET_DYN_LOAD_ADDR;
+		if (baddr == 0) {
+			if ((sv->sv_flags & SV_ASLR) == 0 ||
+			    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
+				et_dyn_addr = ET_DYN_LOAD_ADDR;
+			else if ((__elfN(pie_aslr_enabled) &&
+			    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
+			    (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
+				et_dyn_addr = ET_DYN_ADDR_RAND;
+			else
+				et_dyn_addr = ET_DYN_LOAD_ADDR;
+		}
 	}
-	sv = brand_info->sysvec;
 	if (interp != NULL && brand_info->interp_newpath != NULL)
 		newinterp = brand_info->interp_newpath;
 
@@ -928,9 +995,54 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
+	/*
+	 * Decide whether to enable randomization of user mappings.
+	 * First, reset user preferences for the setid binaries.
+	 * Then, account for the support of the randomization by the
+	 * ABI, by user preferences, and make special treatment for
+	 * PIE binaries.
+	 */
+	if (imgp->credential_setid) {
+		PROC_LOCK(imgp->proc);
+		imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+		PROC_UNLOCK(imgp->proc);
+	}
+	if ((sv->sv_flags & SV_ASLR) == 0 ||
+	    (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
+	    (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
+		KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
+		    ("et_dyn_addr == RAND and !ASLR"));
+	} else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
+	    (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
+	    et_dyn_addr == ET_DYN_ADDR_RAND) {
+		imgp->map_flags |= MAP_ASLR;
+		/*
+		 * If user does not care about sbrk, utilize the bss
+		 * grow region for mappings as well.  We can select
+		 * the base for the image anywere and still not suffer
+		 * from the fragmentation.
+		 */
+		if (!__elfN(aslr_honor_sbrk) ||
+		    (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
+			imgp->map_flags |= MAP_ASLR_IGNSTART;
+	}
+
 	error = exec_new_vmspace(imgp, sv);
+	vmspace = imgp->proc->p_vmspace;
+	map = &vmspace->vm_map;
+
 	imgp->proc->p_sysent = sv;
 
+	maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
+	if (et_dyn_addr == ET_DYN_ADDR_RAND) {
+		KASSERT((map->flags & MAP_ASLR) != 0,
+		    ("ET_DYN_ADDR_RAND but !MAP_ASLR"));
+		et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map,
+		    vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
+		    /* reserve half of the address space to interpreter */
+		    maxv / 2, 1UL << flsl(maxalign));
+	}
+
 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error != 0)
 		goto ret;
@@ -1022,7 +1134,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 		goto ret;
 	}
 
-	vmspace = imgp->proc->p_vmspace;
 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
@@ -1036,6 +1147,14 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	 */
 	addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
 	    RLIMIT_DATA));
+	if ((map->flags & MAP_ASLR) != 0) {
+		maxv1 = maxv / 2 + addr / 2;
+		MPASS(maxv1 >= addr);	/* No overflow */
+		map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
+		    MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]);
+	} else {
+		map->anon_loc = addr;
+	}
 	PROC_UNLOCK(imgp->proc);
 
 	imgp->entry_addr = entry;
@@ -1043,6 +1162,13 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
 	if (interp != NULL) {
 		have_interp = FALSE;
 		VOP_UNLOCK(imgp->vp, 0);
+		if ((map->flags & MAP_ASLR) != 0) {
+			/* Assume that interpeter fits into 1/4 of AS */
+			maxv1 = maxv / 2 + addr / 2;
+			MPASS(maxv1 >= addr);	/* No overflow */
+			addr = __CONCAT(rnd_, __elfN(base))(map, addr,
+			    maxv1, PAGE_SIZE);
+		}
 		if (brand_info->emul_path != NULL &&
 		    brand_info->emul_path[0] != '\0') {
 			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/kern/kern_exec.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -1104,9 +1104,13 @@ exec_new_vmspace(struct image_params *imgp, struct sys
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
-		/* An exec terminates mlockall(MCL_FUTURE). */
+		/*
+		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
+		 * must be re-evaluated.
+		 */
 		vm_map_lock(map);
-		vm_map_modflags(map, 0, MAP_WIREFUTURE);
+		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
+		    MAP_ASLR_IGNSTART);
 		vm_map_unlock(map);
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
@@ -1115,6 +1119,7 @@ exec_new_vmspace(struct image_params *imgp, struct sys
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
+	map->flags |= imgp->map_flags;
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/kern/kern_fork.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -466,7 +466,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct
 	 * Increase reference counts on shared objects.
 	 */
 	p2->p_flag = P_INMEM;
-	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
+	p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE |
+	    P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
 	p2->p_swtick = ticks;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);

Modified: head/sys/kern/kern_procctl.c
==============================================================================
--- head/sys/kern/kern_procctl.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/kern/kern_procctl.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -43,6 +43,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysproto.h>
 #include <sys/wait.h>
 
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+
 static int
 protect_setchild(struct thread *td, struct proc *p, int flags)
 {
@@ -413,6 +418,62 @@ trapcap_status(struct thread *td, struct proc *p, int 
 	return (0);
 }
 
+static int
+aslr_ctl(struct thread *td, struct proc *p, int state)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	switch (state) {
+	case PROC_ASLR_FORCE_ENABLE:
+		p->p_flag2 &= ~P2_ASLR_DISABLE;
+		p->p_flag2 |= P2_ASLR_ENABLE;
+		break;
+	case PROC_ASLR_FORCE_DISABLE:
+		p->p_flag2 |= P2_ASLR_DISABLE;
+		p->p_flag2 &= ~P2_ASLR_ENABLE;
+		break;
+	case PROC_ASLR_NOFORCE:
+		p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+aslr_status(struct thread *td, struct proc *p, int *data)
+{
+	struct vmspace *vm;
+	int d;
+
+	switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
+	case 0:
+		d = PROC_ASLR_NOFORCE;
+		break;
+	case P2_ASLR_ENABLE:
+		d = PROC_ASLR_FORCE_ENABLE;
+		break;
+	case P2_ASLR_DISABLE:
+		d = PROC_ASLR_FORCE_DISABLE;
+		break;
+	}
+	if ((p->p_flag & P_WEXIT) == 0) {
+		_PHOLD(p);
+		PROC_UNLOCK(p);
+		vm = vmspace_acquire_ref(p);
+		if (vm != NULL && (vm->vm_map.flags & MAP_ASLR) != 0) {
+			d |= PROC_ASLR_ACTIVE;
+			vmspace_free(vm);
+		}
+		PROC_LOCK(p);
+		_PRELE(p);
+	}
+	*data = d;
+	return (0);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct procctl_args {
 	idtype_t idtype;
@@ -434,6 +495,7 @@ sys_procctl(struct thread *td, struct procctl_args *ua
 	int error, error1, flags, signum;
 
 	switch (uap->com) {
+	case PROC_ASLR_CTL:
 	case PROC_SPROTECT:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
@@ -463,6 +525,7 @@ sys_procctl(struct thread *td, struct procctl_args *ua
 			return (error);
 		data = &x.rk;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		data = &flags;
@@ -490,6 +553,7 @@ sys_procctl(struct thread *td, struct procctl_args *ua
 		if (error == 0)
 			error = error1;
 		break;
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		if (error == 0)
@@ -509,6 +573,10 @@ kern_procctl_single(struct thread *td, struct proc *p,
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	switch (com) {
+	case PROC_ASLR_CTL:
+		return (aslr_ctl(td, p, *(int *)data));
+	case PROC_ASLR_STATUS:
+		return (aslr_status(td, p, data));
 	case PROC_SPROTECT:
 		return (protect_set(td, p, *(int *)data));
 	case PROC_REAP_ACQUIRE:
@@ -544,6 +612,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t 
 	bool tree_locked;
 
 	switch (com) {
+	case PROC_ASLR_CTL:
+	case PROC_ASLR_STATUS:
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 	case PROC_REAP_STATUS:
@@ -593,6 +663,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t 
 		sx_xlock(&proctree_lock);
 		tree_locked = true;
 		break;
+	case PROC_ASLR_CTL:
+	case PROC_ASLR_STATUS:
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		tree_locked = false;

Modified: head/sys/sys/imgact.h
==============================================================================
--- head/sys/sys/imgact.h	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/sys/imgact.h	Sun Feb 10 17:19:45 2019	(r343964)
@@ -89,6 +89,7 @@ struct image_params {
 	u_long stack_sz;
 	struct ucred *newcred;		/* new credentials if changing */
 	bool credential_setid;		/* true if becoming setid */
+	u_int map_flags;
 };
 
 #ifdef _KERNEL

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/sys/proc.h	Sun Feb 10 17:19:45 2019	(r343964)
@@ -756,6 +756,9 @@ struct proc {
 #define	P2_AST_SU	0x00000008	/* Handles SU ast for kthreads. */
 #define	P2_PTRACE_FSTP	0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
 #define	P2_TRAPCAP	0x00000020	/* SIGTRAP on ENOTCAPABLE */
+#define	P2_ASLR_ENABLE	0x00000040	/* Force enable ASLR. */
+#define	P2_ASLR_DISABLE	0x00000080	/* Force disable ASLR. */
+#define	P2_ASLR_IGNSTART 0x00000100	/* Enable ASLR to consume sbrk area. */
 
 /* Flags protected by proctree_lock, kept in p_treeflags. */
 #define	P_TREE_ORPHANED		0x00000001	/* Reparented, on orphan list */

Modified: head/sys/sys/procctl.h
==============================================================================
--- head/sys/sys/procctl.h	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/sys/procctl.h	Sun Feb 10 17:19:45 2019	(r343964)
@@ -53,6 +53,8 @@
 #define	PROC_TRAPCAP_STATUS	10	/* query trap capability status */
 #define	PROC_PDEATHSIG_CTL	11	/* set parent death signal */
 #define	PROC_PDEATHSIG_STATUS	12	/* get parent death signal */
+#define	PROC_ASLR_CTL		13	/* en/dis ASLR */
+#define	PROC_ASLR_STATUS	14	/* query ASLR status */
 
 /* Operations for PROC_SPROTECT (passed in integer arg). */
 #define	PPROT_OP(x)	((x) & 0xf)
@@ -115,6 +117,11 @@ struct procctl_reaper_kill {
 
 #define	PROC_TRAPCAP_CTL_ENABLE		1
 #define	PROC_TRAPCAP_CTL_DISABLE	2
+
+#define	PROC_ASLR_FORCE_ENABLE		1
+#define	PROC_ASLR_FORCE_DISABLE		2
+#define	PROC_ASLR_NOFORCE		3
+#define	PROC_ASLR_ACTIVE		0x80000000
 
 #ifndef _KERNEL
 __BEGIN_DECLS

Modified: head/sys/sys/sysent.h
==============================================================================
--- head/sys/sys/sysent.h	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/sys/sysent.h	Sun Feb 10 17:19:45 2019	(r343964)
@@ -144,6 +144,7 @@ struct sysentvec {
 #define	SV_SHP		0x010000	/* Shared page. */
 #define	SV_CAPSICUM	0x020000	/* Force cap_enter() on startup. */
 #define	SV_TIMEKEEP	0x040000	/* Shared page timehands. */
+#define	SV_ASLR		0x080000	/* ASLR allowed. */
 
 #define	SV_ABI_MASK	0xff
 #define	SV_ABI_ERRNO(p, e)	((p)->p_sysent->sv_errsize <= 0 ? e :	\

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/vm/vm_map.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -801,6 +801,7 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t mi
 	map->root = NULL;
 	map->timestamp = 0;
 	map->busy = 0;
+	map->anon_loc = 0;
 }
 
 void
@@ -1480,6 +1481,21 @@ vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooff
 	return (result);
 }
 
+static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
+static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
+
+static int cluster_anon = 1;
+SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
+    &cluster_anon, 0,
+    "Cluster anonymous mappings");
+
+static long aslr_restarts;
+SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
+    &aslr_restarts, 0,
+    "Number of aslr failures");
+
+#define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
+
 /*
  * Searches for the specified amount of free space in the given map with the
  * specified alignment.  Performs an address-ordered, first-fit search from
@@ -1559,8 +1575,9 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffs
 	    vm_size_t length, vm_offset_t max_addr, int find_space,
 	    vm_prot_t prot, vm_prot_t max, int cow)
 {
-	vm_offset_t alignment, min_addr;
-	int rv;
+	vm_offset_t alignment, curr_min_addr, min_addr;
+	int gap, pidx, rv, try;
+	bool cluster, en_aslr, update_anon;
 
 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 	    object == NULL,
@@ -1575,24 +1592,96 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffs
 		alignment = (vm_offset_t)1 << (find_space >> 8);
 	} else
 		alignment = 0;
+	en_aslr = (map->flags & MAP_ASLR) != 0;
+	update_anon = cluster = cluster_anon != 0 &&
+	    (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
+	    find_space != VMFS_NO_SPACE && object == NULL &&
+	    (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
+	    MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
+	curr_min_addr = min_addr = *addr;
+	if (en_aslr && min_addr == 0 && !cluster &&
+	    find_space != VMFS_NO_SPACE &&
+	    (map->flags & MAP_ASLR_IGNSTART) != 0)
+		curr_min_addr = min_addr = vm_map_min(map);
+	try = 0;
 	vm_map_lock(map);
+	if (cluster) {
+		curr_min_addr = map->anon_loc;
+		if (curr_min_addr == 0)
+			cluster = false;
+	}
 	if (find_space != VMFS_NO_SPACE) {
 		KASSERT(find_space == VMFS_ANY_SPACE ||
 		    find_space == VMFS_OPTIMAL_SPACE ||
 		    find_space == VMFS_SUPER_SPACE ||
 		    alignment != 0, ("unexpected VMFS flag"));
-		min_addr = *addr;
 again:
-		if (vm_map_findspace(map, min_addr, length, addr) ||
+		/*
+		 * When creating an anonymous mapping, try clustering
+		 * with an existing anonymous mapping first.
+		 *
+		 * We make up to two attempts to find address space
+		 * for a given find_space value. The first attempt may
+		 * apply randomization or may cluster with an existing
+		 * anonymous mapping. If this first attempt fails,
+		 * perform a first-fit search of the available address
+		 * space.
+		 *
+		 * If all tries failed, and find_space is
+		 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
+		 * Again enable clustering and randomization.
+		 */
+		try++;
+		MPASS(try <= 2);
+
+		if (try == 2) {
+			/*
+			 * Second try: we failed either to find a
+			 * suitable region for randomizing the
+			 * allocation, or to cluster with an existing
+			 * mapping.  Retry with free run.
+			 */
+			curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
+			    vm_map_min(map) : min_addr;
+			atomic_add_long(&aslr_restarts, 1);
+		}
+
+		if (try == 1 && en_aslr && !cluster) {
+			/*
+			 * Find space for allocation, including
+			 * gap needed for later randomization.
+			 */
+			pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
+			    (find_space == VMFS_SUPER_SPACE || find_space ==
+			    VMFS_OPTIMAL_SPACE) ? 1 : 0;
+			gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
+			    (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
+			    aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
+			if (vm_map_findspace(map, curr_min_addr, length +
+			    gap * pagesizes[pidx], addr) ||
+			    (max_addr != 0 && *addr + length > max_addr))
+				goto again;
+			/* And randomize the start address. */
+			*addr += (arc4random() % gap) * pagesizes[pidx];
+		} else if (vm_map_findspace(map, curr_min_addr, length, addr) ||
 		    (max_addr != 0 && *addr + length > max_addr)) {
+			if (cluster) {
+				cluster = false;
+				MPASS(try == 1);
+				goto again;
+			}
 			rv = KERN_NO_SPACE;
 			goto done;
 		}
+
 		if (find_space != VMFS_ANY_SPACE &&
 		    (rv = vm_map_alignspace(map, object, offset, addr, length,
 		    max_addr, alignment)) != KERN_SUCCESS) {
 			if (find_space == VMFS_OPTIMAL_SPACE) {
 				find_space = VMFS_ANY_SPACE;
+				curr_min_addr = min_addr;
+				cluster = update_anon;
+				try = 0;
 				goto again;
 			}
 			goto done;
@@ -1613,6 +1702,8 @@ again:
 		rv = vm_map_insert(map, object, offset, *addr, *addr + length,
 		    prot, max, cow);
 	}
+	if (rv == KERN_SUCCESS && update_anon)
+		map->anon_loc = *addr + length;
 done:
 	vm_map_unlock(map);
 	return (rv);
@@ -1922,8 +2013,14 @@ vm_map_submap(
 	vm_map_t submap)
 {
 	vm_map_entry_t entry;
-	int result = KERN_INVALID_ARGUMENT;
+	int result;
 
+	result = KERN_INVALID_ARGUMENT;
+
+	vm_map_lock(submap);
+	submap->flags |= MAP_IS_SUB_MAP;
+	vm_map_unlock(submap);
+
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
@@ -1944,6 +2041,11 @@ vm_map_submap(
 	}
 	vm_map_unlock(map);
 
+	if (result != KERN_SUCCESS) {
+		vm_map_lock(submap);
+		submap->flags &= ~MAP_IS_SUB_MAP;
+		vm_map_unlock(submap);
+	}
 	return (result);
 }
 
@@ -3170,6 +3272,9 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
 		    entry->object.vm_object != NULL)
 			pmap_remove(map->pmap, entry->start, entry->end);
 
+		if (entry->end == map->anon_loc)
+			map->anon_loc = entry->start;
+
 		/*
 		 * Delete the entry only after removing all pmap
 		 * entries pointing to its pages.  (Otherwise, its
@@ -3443,6 +3548,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
 	KASSERT(locked, ("vmspace_fork: lock failed"));
 
+	new_map->anon_loc = old_map->anon_loc;
 	old_entry = old_map->header.next;
 
 	while (old_entry != &old_map->header) {

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/sys/vm/vm_map.h	Sun Feb 10 17:19:45 2019	(r343964)
@@ -202,6 +202,7 @@ struct vm_map {
 	vm_flags_t flags;		/* flags for this vm_map */
 	vm_map_entry_t root;		/* Root of a binary search tree */
 	pmap_t pmap;			/* (c) Physical map */
+	vm_offset_t anon_loc;
 	int busy;
 };
 
@@ -210,6 +211,9 @@ struct vm_map {
  */
 #define MAP_WIREFUTURE		0x01	/* wire all future pages */
 #define	MAP_BUSY_WAKEUP		0x02
+#define	MAP_IS_SUB_MAP		0x04	/* has parent */
+#define	MAP_ASLR		0x08	/* enabled ASLR */
+#define	MAP_ASLR_IGNSTART	0x10
 
 #ifdef	_KERNEL
 #if defined(KLD_MODULE) && !defined(KLD_TIED)

Modified: head/usr.bin/proccontrol/proccontrol.c
==============================================================================
--- head/usr.bin/proccontrol/proccontrol.c	Sun Feb 10 14:30:15 2019	(r343963)
+++ head/usr.bin/proccontrol/proccontrol.c	Sun Feb 10 17:19:45 2019	(r343964)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <unistd.h>
 
 enum {
+	MODE_ASLR,
 	MODE_INVALID,
 	MODE_TRACE,
 	MODE_TRAPCAP,
@@ -62,7 +63,7 @@ static void __dead2
 usage(void)
 {
 
-	fprintf(stderr, "Usage: proccontrol -m (trace|trapcap) [-q] "
+	fprintf(stderr, "Usage: proccontrol -m (aslr|trace|trapcap) [-q] "
 	    "[-s (enable|disable)] [-p pid | command]\n");
 	exit(1);
 }
@@ -81,7 +82,9 @@ main(int argc, char *argv[])
 	while ((ch = getopt(argc, argv, "m:qs:p:")) != -1) {
 		switch (ch) {
 		case 'm':
-			if (strcmp(optarg, "trace") == 0)
+			if (strcmp(optarg, "aslr") == 0)
+				mode = MODE_ASLR;
+			else if (strcmp(optarg, "trace") == 0)
 				mode = MODE_TRACE;
 			else if (strcmp(optarg, "trapcap") == 0)
 				mode = MODE_TRAPCAP;
@@ -121,6 +124,9 @@ main(int argc, char *argv[])
 
 	if (query) {
 		switch (mode) {
+		case MODE_ASLR:
+			error = procctl(P_PID, pid, PROC_ASLR_STATUS, &arg);
+			break;
 		case MODE_TRACE:
 			error = procctl(P_PID, pid, PROC_TRACE_STATUS, &arg);
 			break;
@@ -134,6 +140,23 @@ main(int argc, char *argv[])
 		if (error != 0)
 			err(1, "procctl status");
 		switch (mode) {
+		case MODE_ASLR:
+			switch (arg & ~PROC_ASLR_ACTIVE) {
+			case PROC_ASLR_FORCE_ENABLE:
+				printf("force enabled");
+				break;
+			case PROC_ASLR_FORCE_DISABLE:
+				printf("force disabled");
+				break;
+			case PROC_ASLR_NOFORCE:
+				printf("not forced");
+				break;
+			}
+			if ((arg & PROC_ASLR_ACTIVE) != 0)
+				printf(", active\n");
+			else
+				printf(", not active\n");
+			break;
 		case MODE_TRACE:
 			if (arg == -1)
 				printf("disabled\n");
@@ -155,6 +178,11 @@ main(int argc, char *argv[])
 		}
 	} else {
 		switch (mode) {
+		case MODE_ASLR:
+			arg = enable ? PROC_ASLR_FORCE_ENABLE :
+			    PROC_ASLR_FORCE_DISABLE;
+			error = procctl(P_PID, pid, PROC_ASLR_CTL, &arg);
+			break;
 		case MODE_TRACE:
 			arg = enable ? PROC_TRACE_CTL_ENABLE :
 			    PROC_TRACE_CTL_DISABLE;


More information about the svn-src-head mailing list