svn commit: r320666 - in stable/11: lib/libc/sys sys/sys sys/vm

Konstantin Belousov kib at FreeBSD.org
Wed Jul 5 06:18:02 UTC 2017


Author: kib
Date: Wed Jul  5 06:18:00 2017
New Revision: 320666
URL: https://svnweb.freebsd.org/changeset/base/320666

Log:
  Add MAP_GUARD and use it for stack grow area protection.
  Bump __FreeBSD_version.
  
  MFC r320317:
  Implement address space guards.
  
  MFC r320338:
  Remove stale part of the comment.
  
  MFC r320339:
  Correctly handle small MAP_STACK requests.
  
  MFC r320344:
  For now, allow mprotect(2) over the guards to succeed regardless of
  the requested protection.
  
  MFC r320430:
  Treat the addr argument for mmap(2) request without MAP_FIXED flag as
  a hint.
  
  MFC r320560 (by alc):
  Modify vm_map_growstack() to protect itself from the possibility of the
  gap entry in the vm map being smaller than the sysctl-derived stack guard
  size.

Modified:
  stable/11/lib/libc/sys/mmap.2
  stable/11/lib/libc/sys/munmap.2
  stable/11/sys/sys/mman.h
  stable/11/sys/sys/param.h
  stable/11/sys/vm/vm.h
  stable/11/sys/vm/vm_fault.c
  stable/11/sys/vm/vm_map.c
  stable/11/sys/vm/vm_map.h
  stable/11/sys/vm/vm_mmap.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/lib/libc/sys/mmap.2
==============================================================================
--- stable/11/lib/libc/sys/mmap.2	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/lib/libc/sys/mmap.2	Wed Jul  5 06:18:00 2017	(r320666)
@@ -199,6 +199,21 @@ In contrast, if
 .Dv MAP_EXCL
 is specified, the request will fail if a mapping
 already exists within the range.
+.It Dv MAP_GUARD
+Instead of a mapping, create a guard of the specified size.
+Guards allow a process to create reservations in its address space,
+which can later be replaced by actual mappings.
+.Pp
+.Fa mmap
+will not create mappings in the address range of a guard unless
+the request specifies
+.Dv MAP_FIXED .
+Guards can be destroyed with
+.Xr munmap 2 .
+Any memory access by a thread to the guarded range results
+in the delivery of a
+.Dv SIGSEGV
+signal to that thread.
 .It Dv MAP_NOCORE
 Region is not included in a core file.
 .It Dv MAP_NOSYNC
@@ -303,6 +318,7 @@ must include at least
 .Dv PROT_READ
 and
 .Dv PROT_WRITE .
+.Pp
 This option creates
 a memory region that grows to at most
 .Fa len
@@ -313,6 +329,10 @@ stack top is the starting address returned by the call
 bytes.
 The bottom of the stack at maximum growth is the starting
 address returned by the call.
+The system uses guards to prevent the inadvertent use of
+regions into which stacks created with
+.Dv MAP_STACK
+will automatically grow, without mapping the whole stack in advance.
 .El
 .Pp
 The
@@ -406,6 +426,7 @@ were specified.
 .It Bq Er EINVAL
 None of
 .Dv MAP_ANON ,
+.Dv MAP_GUARD ,
 .Dv MAP_PRIVATE ,
 .Dv MAP_SHARED ,
 or
@@ -455,6 +476,25 @@ were specified, but the requested region is already us
 was specified, but
 .Dv MAP_FIXED
 was not.
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified, but the
+.Fa offset
+argument was not zero, the
+.Fa fd
+argument was not -1, or the
+.Fa prot
+argument was not
+.Dv PROT_NONE .
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified together with one of the flags
+.Dv MAP_ANON ,
+.Dv MAP_PREFAULT ,
+.Dv MAP_PREFAULT_READ ,
+.Dv MAP_PRIVATE ,
+.Dv MAP_SHARED ,
+.Dv MAP_STACK .
 .It Bq Er ENODEV
 .Dv MAP_ANON
 has not been specified and

Modified: stable/11/lib/libc/sys/munmap.2
==============================================================================
--- stable/11/lib/libc/sys/munmap.2	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/lib/libc/sys/munmap.2	Wed Jul  5 06:18:00 2017	(r320666)
@@ -28,7 +28,7 @@
 .\"	@(#)munmap.2	8.3 (Berkeley) 5/27/94
 .\" $FreeBSD$
 .\"
-.Dd May 27, 1994
+.Dd June 22, 2017
 .Dt MUNMAP 2
 .Os
 .Sh NAME
@@ -44,7 +44,7 @@
 The
 .Fn munmap
 system call
-deletes the mappings for the specified address range,
+deletes the mappings and guards for the specified address range,
 and causes further references to addresses within the range
 to generate invalid memory references.
 .Sh RETURN VALUES

Modified: stable/11/sys/sys/mman.h
==============================================================================
--- stable/11/sys/sys/mman.h	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/sys/mman.h	Wed Jul  5 06:18:00 2017	(r320666)
@@ -90,6 +90,7 @@
 /*
  * Extended flags
  */
+#define	MAP_GUARD	 0x00002000 /* reserve but don't map address range */
 #define	MAP_EXCL	 0x00004000 /* for MAP_FIXED, fail if address is used */
 #define	MAP_NOCORE	 0x00020000 /* dont include these pages in a coredump */
 #define	MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */

Modified: stable/11/sys/sys/param.h
==============================================================================
--- stable/11/sys/sys/param.h	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/sys/param.h	Wed Jul  5 06:18:00 2017	(r320666)
@@ -58,7 +58,7 @@
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1101500	/* Master, propagated to newvers */
+#define __FreeBSD_version 1101501	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@@ -76,12 +76,14 @@
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(IN_RTLD)
 #define	P_OSREL_SIGWAIT			700000
 #define	P_OSREL_SIGSEGV			700004
 #define	P_OSREL_MAP_ANON		800104
 #define	P_OSREL_MAP_FSTRICT		1100036
 #define	P_OSREL_SHUTDOWN_ENOTCONN	1100077
+#define	P_OSREL_MAP_GUARD		1200035
+#define	P_OSREL_MAP_GUARD_11		1101501
 
 #define	P_OSREL_MAJOR(x)		((x) / 100000)
 #endif

Modified: stable/11/sys/vm/vm.h
==============================================================================
--- stable/11/sys/vm/vm.h	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/vm/vm.h	Wed Jul  5 06:18:00 2017	(r320666)
@@ -78,6 +78,7 @@ typedef u_char vm_prot_t;	/* protection codes */
 #define	VM_PROT_WRITE		((vm_prot_t) 0x02)
 #define	VM_PROT_EXECUTE		((vm_prot_t) 0x04)
 #define	VM_PROT_COPY		((vm_prot_t) 0x08)	/* copy-on-read */
+#define	VM_PROT_FAULT_LOOKUP	((vm_prot_t) 0x010)
 
 #define	VM_PROT_ALL		(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
 #define VM_PROT_RW		(VM_PROT_READ|VM_PROT_WRITE)

Modified: stable/11/sys/vm/vm_fault.c
==============================================================================
--- stable/11/sys/vm/vm_fault.c	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/vm/vm_fault.c	Wed Jul  5 06:18:00 2017	(r320666)
@@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot
 	int locked, nera, result, rv;
 	u_char behavior;
 	boolean_t wired;	/* Passed by reference. */
-	bool dead, growstack, hardfault, is_first_object_locked;
+	bool dead, hardfault, is_first_object_locked;
 
 	PCPU_INC(cnt.v_vm_faults);
 	fs.vp = NULL;
 	faultcount = 0;
 	nera = -1;
-	growstack = true;
 	hardfault = false;
 
 RetryFault:;
@@ -511,17 +510,10 @@ RetryFault:;
 	 * search.
 	 */
 	fs.map = map;
-	result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
-	    &fs.first_object, &fs.first_pindex, &prot, &wired);
+	result = vm_map_lookup(&fs.map, vaddr, fault_type |
+	    VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
+	    &fs.first_pindex, &prot, &wired);
 	if (result != KERN_SUCCESS) {
-		if (growstack && result == KERN_INVALID_ADDRESS &&
-		    map != kernel_map) {
-			result = vm_map_growstack(curproc, vaddr);
-			if (result != KERN_SUCCESS)
-				return (KERN_FAILURE);
-			growstack = false;
-			goto RetryFault;
-		}
 		unlock_vp(&fs);
 		return (result);
 	}
@@ -546,6 +538,8 @@ RetryFault:;
 			vm_map_unlock(fs.map);
 		goto RetryFault;
 	}
+
+	MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
 
 	if (wired)
 		fault_type = prot | (fault_type & VM_PROT_COPY);

Modified: stable/11/sys/vm/vm_map.c
==============================================================================
--- stable/11/sys/vm/vm_map.c	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/vm/vm_map.c	Wed Jul  5 06:18:00 2017	(r320666)
@@ -133,6 +133,8 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm
 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
+static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
+    vm_map_entry_t gap_entry);
 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
 #ifdef INVARIANTS
@@ -1214,6 +1216,10 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
 	if (prev_entry->next != &map->header && prev_entry->next->start < end)
 		return (KERN_NO_SPACE);
 
+	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
+	    max != VM_PROT_NONE))
+		return (KERN_INVALID_ARGUMENT);
+
 	protoeflags = 0;
 	if (cow & MAP_COPY_ON_WRITE)
 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
@@ -1229,13 +1235,19 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
 		protoeflags |= MAP_ENTRY_GROWS_UP;
 	if (cow & MAP_VN_WRITECOUNT)
 		protoeflags |= MAP_ENTRY_VN_WRITECNT;
+	if ((cow & MAP_CREATE_GUARD) != 0)
+		protoeflags |= MAP_ENTRY_GUARD;
+	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
+		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
+	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
+		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
 	if (cow & MAP_INHERIT_SHARE)
 		inheritance = VM_INHERIT_SHARE;
 	else
 		inheritance = VM_INHERIT_DEFAULT;
 
 	cred = NULL;
-	if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
+	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
 		goto charged;
 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
@@ -1284,7 +1296,8 @@ charged:
 		if (prev_entry->inheritance == inheritance &&
 		    prev_entry->protection == prot &&
 		    prev_entry->max_protection == max) {
-			map->size += end - prev_entry->end;
+			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
+				map->size += end - prev_entry->end;
 			prev_entry->end = end;
 			vm_map_entry_resize_free(map, prev_entry);
 			vm_map_simplify_entry(map, prev_entry);
@@ -1321,7 +1334,6 @@ charged:
 	new_entry->eflags = protoeflags;
 	new_entry->object.vm_object = object;
 	new_entry->offset = offset;
-	new_entry->avail_ssize = 0;
 
 	new_entry->inheritance = inheritance;
 	new_entry->protection = prot;
@@ -1339,7 +1351,8 @@ charged:
 	 * Insert the new entry into the list
 	 */
 	vm_map_entry_link(map, prev_entry, new_entry);
-	map->size += new_entry->end - new_entry->start;
+	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
+		map->size += new_entry->end - new_entry->start;
 
 	/*
 	 * Try to coalesce the new entry with both the previous and next
@@ -1543,6 +1556,25 @@ again:
 	return (result);
 }
 
+int
+vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
+    vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+    vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
+    int cow)
+{
+	vm_offset_t hint;
+	int rv;
+
+	hint = *addr;
+	for (;;) {
+		rv = vm_map_find(map, object, offset, addr, length, max_addr,
+		    find_space, prot, max, cow);
+		if (rv == KERN_SUCCESS || min_addr >= hint)
+			return (rv);
+		*addr = min_addr;
+	}
+}
+
 /*
  *	vm_map_simplify_entry:
  *
@@ -1674,7 +1706,8 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
 	 * map.  This is a bit of a hack, but is also about the best place to
 	 * put this improvement.
 	 */
-	if (entry->object.vm_object == NULL && !map->system_map) {
+	if (entry->object.vm_object == NULL && !map->system_map &&
+	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 		vm_object_t object;
 		object = vm_object_allocate(OBJT_DEFAULT,
 				atop(entry->end - entry->start));
@@ -1753,7 +1786,8 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v
 	 * map.  This is a bit of a hack, but is also about the best place to
 	 * put this improvement.
 	 */
-	if (entry->object.vm_object == NULL && !map->system_map) {
+	if (entry->object.vm_object == NULL && !map->system_map &&
+	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 		vm_object_t object;
 		object = vm_object_allocate(OBJT_DEFAULT,
 				atop(entry->end - entry->start));
@@ -1988,6 +2022,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
 	 */
 	for (current = entry; current != &map->header && current->start < end;
 	    current = current->next) {
+		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+			continue;
 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ARGUMENT);
@@ -2010,7 +2046,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
 
 		if (set_max ||
 		    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
-		    ENTRY_CHARGED(current)) {
+		    ENTRY_CHARGED(current) ||
+		    (current->eflags & MAP_ENTRY_GUARD) != 0) {
 			continue;
 		}
 
@@ -2059,6 +2096,9 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
 	 */
 	for (current = entry; current != &map->header && current->start < end;
 	    current = current->next) {
+		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+			continue;
+
 		old_prot = current->protection;
 
 		if (set_max)
@@ -2312,7 +2352,9 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
 		entry = temp_entry->next;
 	while ((entry != &map->header) && (entry->start < end)) {
 		vm_map_clip_end(map, entry, end);
-		entry->inheritance = new_inheritance;
+		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
+		    new_inheritance != VM_INHERIT_ZERO)
+			entry->inheritance = new_inheritance;
 		vm_map_simplify_entry(map, entry);
 		entry = entry->next;
 	}
@@ -2918,6 +2960,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
 
 	vm_map_entry_unlink(map, entry);
 	object = entry->object.vm_object;
+
+	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
+		MPASS(entry->cred == NULL);
+		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
+		MPASS(object == NULL);
+		vm_map_entry_deallocate(entry, map->system_map);
+		return;
+	}
+
 	size = entry->end - entry->start;
 	map->size -= size;
 
@@ -3276,6 +3327,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, st
 	vm_size_t entrysize;
 	vm_offset_t newend;
 
+	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
+		return;
 	entrysize = entry->end - entry->start;
 	vm2->vm_map.size += entrysize;
 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
@@ -3312,6 +3365,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 	vm_map_entry_t new_entry, old_entry;
 	vm_object_t object;
 	int locked;
+	vm_inherit_t inh;
 
 	old_map = &vm1->vm_map;
 	/* Copy immutable fields of vm1 to vm2. */
@@ -3334,7 +3388,12 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			panic("vm_map_fork: encountered a submap");
 
-		switch (old_entry->inheritance) {
+		inh = old_entry->inheritance;
+		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+		    inh != VM_INHERIT_NONE)
+			inh = VM_INHERIT_COPY;
+
+		switch (inh) {
 		case VM_INHERIT_NONE:
 			break;
 
@@ -3467,7 +3526,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 
 			new_entry->start = old_entry->start;
 			new_entry->end = old_entry->end;
-			new_entry->avail_ssize = old_entry->avail_ssize;
 			new_entry->eflags = old_entry->eflags &
 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
 			    MAP_ENTRY_VN_WRITECNT);
@@ -3530,30 +3588,40 @@ out:
 	return (rv);
 }
 
+static int stack_guard_page = 1;
+SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
+    &stack_guard_page, 0,
+    "Specifies the number of guard pages for a stack that grows");
+
 static int
 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 {
 	vm_map_entry_t new_entry, prev_entry;
-	vm_offset_t bot, top;
-	vm_size_t init_ssize;
+	vm_offset_t bot, gap_bot, gap_top, top;
+	vm_size_t init_ssize, sgp;
 	int orient, rv;
 
 	/*
 	 * The stack orientation is piggybacked with the cow argument.
 	 * Extract it into orient and mask the cow argument so that we
 	 * don't pass it around further.
-	 * NOTE: We explicitly allow bi-directional stacks.
 	 */
-	orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
+	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
 	KASSERT(orient != 0, ("No stack grow direction"));
+	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
+	    ("bi-dir stack"));
 
+	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
 	if (addrbos < vm_map_min(map) ||
 	    addrbos > vm_map_max(map) ||
-	    addrbos + max_ssize < addrbos)
+	    addrbos + max_ssize < addrbos ||
+	    sgp >= max_ssize)
 		return (KERN_NO_SPACE);
 
-	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
+	init_ssize = growsize;
+	if (max_ssize < init_ssize + sgp)
+		init_ssize = max_ssize - sgp;
 
 	/* If addr is already mapped, no go */
 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
@@ -3561,12 +3629,6 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
 
 	/*
 	 * If we can't accommodate max_ssize in the current mapping, no go.
-	 * However, we need to be aware that subsequent user mappings might
-	 * map into the space we have reserved for stack, and currently this
-	 * space is not protected.
-	 *
-	 * Hopefully we will at least detect this condition when we try to
-	 * grow the stack.
 	 */
 	if ((prev_entry->next != &map->header) &&
 	    (prev_entry->next->start < addrbos + max_ssize))
@@ -3582,57 +3644,53 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
 	 * and cow to be 0.  Possibly we should eliminate these as input
 	 * parameters, and just pass these values here in the insert call.
 	 */
-	if (orient == MAP_STACK_GROWS_DOWN)
+	if (orient == MAP_STACK_GROWS_DOWN) {
 		bot = addrbos + max_ssize - init_ssize;
-	else if (orient == MAP_STACK_GROWS_UP)
+		top = bot + init_ssize;
+		gap_bot = addrbos;
+		gap_top = bot;
+	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
 		bot = addrbos;
-	else
-		bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
-	top = bot + init_ssize;
-	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
-
-	/* Now set the avail_ssize amount. */
-	if (rv == KERN_SUCCESS) {
-		new_entry = prev_entry->next;
-		if (new_entry->end != top || new_entry->start != bot)
-			panic("Bad entry start/end for new stack entry");
-
-		new_entry->avail_ssize = max_ssize - init_ssize;
-		KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
-		    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
-		    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-		KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
-		    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
-		    ("new entry lacks MAP_ENTRY_GROWS_UP"));
+		top = bot + init_ssize;
+		gap_bot = top;
+		gap_top = addrbos + max_ssize;
 	}
-
+	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
+	if (rv != KERN_SUCCESS)
+		return (rv);
+	new_entry = prev_entry->next;
+	KASSERT(new_entry->end == top || new_entry->start == bot,
+	    ("Bad entry start/end for new stack entry"));
+	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
+	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
+	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
+	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
+	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
+	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
+	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
+	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
+	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
+	if (rv != KERN_SUCCESS)
+		(void)vm_map_delete(map, bot, top);
 	return (rv);
 }
 
-static int stack_guard_page = 0;
-SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
-    &stack_guard_page, 0,
-    "Insert stack guard page ahead of the growable segments.");
-
-/* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
- * desired address is already mapped, or if we successfully grow
- * the stack.  Also returns KERN_SUCCESS if addr is outside the
- * stack range (this is strange, but preserves compatibility with
- * the grow function in vm_machdep.c).
+/*
+ * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
+ * successfully grow the stack.
  */
-int
-vm_map_growstack(struct proc *p, vm_offset_t addr)
+static int
+vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 {
-	vm_map_entry_t next_entry, prev_entry;
-	vm_map_entry_t new_entry, stack_entry;
-	struct vmspace *vm = p->p_vmspace;
-	vm_map_t map = &vm->vm_map;
-	vm_offset_t end;
-	vm_size_t growsize;
-	size_t grow_amount, max_grow;
-	rlim_t lmemlim, stacklim, vmemlim;
-	int is_procstack, rv;
+	vm_map_entry_t stack_entry;
+	struct proc *p;
+	struct vmspace *vm;
 	struct ucred *cred;
+	vm_offset_t gap_end, gap_start, grow_start;
+	size_t grow_amount, guard, max_grow;
+	rlim_t lmemlim, stacklim, vmemlim;
+	int rv, rv1;
+	bool gap_deleted, grow_down, is_procstack;
 #ifdef notyet
 	uint64_t limit;
 #endif
@@ -3640,125 +3698,74 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
 	int error;
 #endif
 
+	p = curproc;
+	vm = p->p_vmspace;
+	MPASS(map == &p->p_vmspace->vm_map);
+	MPASS(!map->system_map);
+
+	guard = stack_guard_page * PAGE_SIZE;
 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
 	stacklim = lim_cur(curthread, RLIMIT_STACK);
 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
-Retry:
-
-	vm_map_lock_read(map);
-
-	/* If addr is already in the entry range, no need to grow.*/
-	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
-		vm_map_unlock_read(map);
+retry:
+	/* If addr is not in a hole for a stack grow area, no need to grow. */
+	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
+		return (KERN_FAILURE);
+	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
 		return (KERN_SUCCESS);
-	}
-
-	next_entry = prev_entry->next;
-	if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
-		/*
-		 * This entry does not grow upwards. Since the address lies
-		 * beyond this entry, the next entry (if one exists) has to
-		 * be a downward growable entry. The entry list header is
-		 * never a growable entry, so it suffices to check the flags.
-		 */
-		if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
-			vm_map_unlock_read(map);
-			return (KERN_SUCCESS);
-		}
-		stack_entry = next_entry;
+	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
+		stack_entry = gap_entry->next;
+		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
+		    stack_entry->start != gap_entry->end)
+			return (KERN_FAILURE);
+		grow_amount = round_page(stack_entry->start - addr);
+		grow_down = true;
+	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
+		stack_entry = gap_entry->prev;
+		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
+		    stack_entry->end != gap_entry->start)
+			return (KERN_FAILURE);
+		grow_amount = round_page(addr + 1 - stack_entry->end);
+		grow_down = false;
 	} else {
-		/*
-		 * This entry grows upward. If the next entry does not at
-		 * least grow downwards, this is the entry we need to grow.
-		 * otherwise we have two possible choices and we have to
-		 * select one.
-		 */
-		if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
-			/*
-			 * We have two choices; grow the entry closest to
-			 * the address to minimize the amount of growth.
-			 */
-			if (addr - prev_entry->end <= next_entry->start - addr)
-				stack_entry = prev_entry;
-			else
-				stack_entry = next_entry;
-		} else
-			stack_entry = prev_entry;
+		return (KERN_FAILURE);
 	}
-
-	if (stack_entry == next_entry) {
-		KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
-		KASSERT(addr < stack_entry->start, ("foo"));
-		end = (prev_entry != &map->header) ? prev_entry->end :
-		    stack_entry->start - stack_entry->avail_ssize;
-		grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
-		max_grow = stack_entry->start - end;
-	} else {
-		KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
-		KASSERT(addr >= stack_entry->end, ("foo"));
-		end = (next_entry != &map->header) ? next_entry->start :
-		    stack_entry->end + stack_entry->avail_ssize;
-		grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
-		max_grow = end - stack_entry->end;
-	}
-
-	if (grow_amount > stack_entry->avail_ssize) {
-		vm_map_unlock_read(map);
+	max_grow = gap_entry->end - gap_entry->start;
+	if (guard > max_grow)
 		return (KERN_NO_SPACE);
-	}
-
-	/*
-	 * If there is no longer enough space between the entries nogo, and
-	 * adjust the available space.  Note: this  should only happen if the
-	 * user has mapped into the stack area after the stack was created,
-	 * and is probably an error.
-	 *
-	 * This also effectively destroys any guard page the user might have
-	 * intended by limiting the stack size.
-	 */
-	if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
-		if (vm_map_lock_upgrade(map))
-			goto Retry;
-
-		stack_entry->avail_ssize = max_grow;
-
-		vm_map_unlock(map);
+	max_grow -= guard;
+	if (grow_amount > max_grow)
 		return (KERN_NO_SPACE);
-	}
 
-	is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr &&
-	    addr < (vm_offset_t)p->p_sysent->sv_usrstack) ? 1 : 0;
-
 	/*
 	 * If this is the main process stack, see if we're over the stack
 	 * limit.
 	 */
-	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
-		vm_map_unlock_read(map);
+	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
+	    addr < (vm_offset_t)p->p_sysent->sv_usrstack;
+	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
 		return (KERN_NO_SPACE);
-	}
+
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		if (is_procstack && racct_set(p, RACCT_STACK,
 		    ctob(vm->vm_ssize) + grow_amount)) {
 			PROC_UNLOCK(p);
-			vm_map_unlock_read(map);
 			return (KERN_NO_SPACE);
 		}
 		PROC_UNLOCK(p);
 	}
 #endif
 
-	/* Round up the grow amount modulo sgrowsiz */
-	growsize = sgrowsiz;
-	grow_amount = roundup(grow_amount, growsize);
-	if (grow_amount > stack_entry->avail_ssize)
-		grow_amount = stack_entry->avail_ssize;
+	grow_amount = roundup(grow_amount, sgrowsiz);
+	if (grow_amount > max_grow)
+		grow_amount = max_grow;
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
 		grow_amount = trunc_page((vm_size_t)stacklim) -
 		    ctob(vm->vm_ssize);
 	}
+
 #ifdef notyet
 	PROC_LOCK(p);
 	limit = racct_get_available(p, RACCT_STACK);
@@ -3766,9 +3773,9 @@ Retry:
 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
 		grow_amount = limit - ctob(vm->vm_ssize);
 #endif
-	if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+
+	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
-			vm_map_unlock_read(map);
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
@@ -3778,7 +3785,6 @@ Retry:
 			if (racct_set(p, RACCT_MEMLOCK,
 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
 				PROC_UNLOCK(p);
-				vm_map_unlock_read(map);
 				rv = KERN_NO_SPACE;
 				goto out;
 			}
@@ -3786,9 +3792,9 @@ Retry:
 		}
 #endif
 	}
+
 	/* If we would blow our VMEM resource limit, no go */
 	if (map->size + grow_amount > vmemlim) {
-		vm_map_unlock_read(map);
 		rv = KERN_NO_SPACE;
 		goto out;
 	}
@@ -3797,7 +3803,6 @@ Retry:
 		PROC_LOCK(p);
 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
 			PROC_UNLOCK(p);
-			vm_map_unlock_read(map);
 			rv = KERN_NO_SPACE;
 			goto out;
 		}
@@ -3805,62 +3810,42 @@ Retry:
 	}
 #endif
 
-	if (vm_map_lock_upgrade(map))
-		goto Retry;
+	if (vm_map_lock_upgrade(map)) {
+		gap_entry = NULL;
+		vm_map_lock_read(map);
+		goto retry;
+	}
 
-	if (stack_entry == next_entry) {
-		/*
-		 * Growing downward.
-		 */
-		/* Get the preliminary new entry start value */
-		addr = stack_entry->start - grow_amount;
-
-		/*
-		 * If this puts us into the previous entry, cut back our
-		 * growth to the available space. Also, see the note above.
-		 */
-		if (addr < end) {
-			stack_entry->avail_ssize = max_grow;
-			addr = end;
-			if (stack_guard_page)
-				addr += PAGE_SIZE;
+	if (grow_down) {
+		grow_start = gap_entry->end - grow_amount;
+		if (gap_entry->start + grow_amount == gap_entry->end) {
+			gap_start = gap_entry->start;
+			gap_end = gap_entry->end;
+			vm_map_entry_delete(map, gap_entry);
+			gap_deleted = true;
+		} else {
+			MPASS(gap_entry->start < gap_entry->end - grow_amount);
+			gap_entry->end -= grow_amount;
+			vm_map_entry_resize_free(map, gap_entry);
+			gap_deleted = false;
 		}
-
-		rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
-		    next_entry->protection, next_entry->max_protection,
+		rv = vm_map_insert(map, NULL, 0, grow_start,
+		    grow_start + grow_amount,
+		    stack_entry->protection, stack_entry->max_protection,
 		    MAP_STACK_GROWS_DOWN);
-
-		/* Adjust the available stack space by the amount we grew. */
-		if (rv == KERN_SUCCESS) {
-			new_entry = prev_entry->next;
-			KASSERT(new_entry == stack_entry->prev, ("foo"));
-			KASSERT(new_entry->end == stack_entry->start, ("foo"));
-			KASSERT(new_entry->start == addr, ("foo"));
-			KASSERT((new_entry->eflags & MAP_ENTRY_GROWS_DOWN) !=
-			    0, ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-			grow_amount = new_entry->end - new_entry->start;
-			new_entry->avail_ssize = stack_entry->avail_ssize -
-			    grow_amount;
-			stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
+		if (rv != KERN_SUCCESS) {
+			if (gap_deleted) {
+				rv1 = vm_map_insert(map, NULL, 0, gap_start,
+				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
+				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
+				MPASS(rv1 == KERN_SUCCESS);
+			} else {
+				gap_entry->end += grow_amount;
+				vm_map_entry_resize_free(map, gap_entry);
+			}
 		}
 	} else {
-		/*
-		 * Growing upward.
-		 */
-		addr = stack_entry->end + grow_amount;
-
-		/*
-		 * If this puts us into the next entry, cut back our growth
-		 * to the available space. Also, see the note above.
-		 */
-		if (addr > end) {
-			stack_entry->avail_ssize = end - stack_entry->end;
-			addr = end;
-			if (stack_guard_page)
-				addr -= PAGE_SIZE;
-		}
-
-		grow_amount = addr - stack_entry->end;
+		grow_start = stack_entry->end;
 		cred = stack_entry->cred;
 		if (cred == NULL && stack_entry->object.vm_object != NULL)
 			cred = stack_entry->object.vm_object->cred;
@@ -3872,30 +3857,30 @@ Retry:
 		    stack_entry->offset,
 		    (vm_size_t)(stack_entry->end - stack_entry->start),
 		    (vm_size_t)grow_amount, cred != NULL)) {
-			map->size += (addr - stack_entry->end);
-			/* Update the current entry. */
-			stack_entry->end = addr;
-			stack_entry->avail_ssize -= grow_amount;
+			if (gap_entry->start + grow_amount == gap_entry->end)
+				vm_map_entry_delete(map, gap_entry);
+			else
+				gap_entry->start += grow_amount;
+			stack_entry->end += grow_amount;
+			map->size += grow_amount;
 			vm_map_entry_resize_free(map, stack_entry);
 			rv = KERN_SUCCESS;
 		} else
 			rv = KERN_FAILURE;
 	}
-
 	if (rv == KERN_SUCCESS && is_procstack)
 		vm->vm_ssize += btoc(grow_amount);
 
-	vm_map_unlock(map);
-
 	/*
 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
 	 */
-	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
-		vm_map_wire(map,
-		    (stack_entry == next_entry) ? addr : addr - grow_amount,
-		    (stack_entry == next_entry) ? stack_entry->start : addr,
+	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
+		vm_map_unlock(map);
+		vm_map_wire(map, grow_start, grow_start + grow_amount,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-	}
+		vm_map_lock_read(map);
+	} else
+		vm_map_lock_downgrade(map);
 
 out:
 #ifdef RACCT
@@ -4019,10 +4004,11 @@ vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
 	vm_size_t size;
 	struct ucred *cred;
 
-RetryLookup:;
+RetryLookup:
 
 	vm_map_lock_read(map);
 
+RetryLookupLocked:
 	/*
 	 * Lookup the faulting address.
 	 */
@@ -4048,7 +4034,16 @@ RetryLookup:;
 	 * Check whether this task is allowed to have this page.
 	 */
 	prot = entry->protection;
-	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
+		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
+		if (prot == VM_PROT_NONE && map != kernel_map &&
+		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
+		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
+		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
+			goto RetryLookupLocked;
+	}
+	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
 		vm_map_unlock_read(map);
 		return (KERN_PROTECTION_FAILURE);
@@ -4282,8 +4277,9 @@ vm_map_print(vm_map_t map)
 	db_indent += 2;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
-		db_iprintf("map entry %p: start=%p, end=%p\n",
-		    (void *)entry, (void *)entry->start, (void *)entry->end);
+		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
+		    (void *)entry, (void *)entry->start, (void *)entry->end,
+		    entry->eflags);
 		{
 			static char *inheritance_name[4] =
 			{"share", "copy", "none", "donate_copy"};

Modified: stable/11/sys/vm/vm_map.h
==============================================================================
--- stable/11/sys/vm/vm_map.h	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/vm/vm_map.h	Wed Jul  5 06:18:00 2017	(r320666)
@@ -103,7 +103,6 @@ struct vm_map_entry {
 	struct vm_map_entry *right;	/* right child in binary search tree */
 	vm_offset_t start;		/* start address */
 	vm_offset_t end;		/* end address */
-	vm_offset_t avail_ssize;	/* amt can grow if this is a stack */
 	vm_offset_t next_read;		/* vaddr of the next sequential read */
 	vm_size_t adj_free;		/* amount of adjacent free space */
 	vm_size_t max_free;		/* max free space in subtree */
@@ -142,6 +141,9 @@ struct vm_map_entry {
 
 #define	MAP_ENTRY_WIRE_SKIPPED		0x4000
 #define	MAP_ENTRY_VN_WRITECNT		0x8000	/* writeable vnode mapping */
+#define	MAP_ENTRY_GUARD			0x10000
+#define	MAP_ENTRY_STACK_GAP_DN		0x20000
+#define	MAP_ENTRY_STACK_GAP_UP		0x40000
 
 #ifdef	_KERNEL
 static __inline u_char
@@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define MAP_PREFAULT_PARTIAL	0x0010
 #define MAP_DISABLE_SYNCER	0x0020
 #define	MAP_CHECK_EXCL		0x0040
+#define	MAP_CREATE_GUARD	0x0080
 #define MAP_DISABLE_COREDUMP	0x0100
 #define MAP_PREFAULT_MADVISE	0x0200	/* from (user) madvise request */
 #define	MAP_VN_WRITECOUNT	0x0400
@@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define	MAP_STACK_GROWS_UP	0x2000
 #define	MAP_ACC_CHARGED		0x4000
 #define	MAP_ACC_NO_CHARGE	0x8000
+#define	MAP_CREATE_STACK_GAP_UP	0x10000
+#define	MAP_CREATE_STACK_GAP_DN	0x20000
 
 /*
  * vm_fault option flags
@@ -367,6 +372,8 @@ vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_
 int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
     vm_offset_t, int, vm_prot_t, vm_prot_t, int);
+int vm_map_find_min(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *,
+    vm_size_t, vm_offset_t, vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t,
     vm_prot_t, vm_prot_t, int);
 int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *);
@@ -387,7 +394,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t,
 int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
-int vm_map_growstack (struct proc *p, vm_offset_t addr);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,

Modified: stable/11/sys/vm/vm_mmap.c
==============================================================================
--- stable/11/sys/vm/vm_mmap.c	Wed Jul  5 06:12:21 2017	(r320665)
+++ stable/11/sys/vm/vm_mmap.c	Wed Jul  5 06:18:00 2017	(r320666)
@@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t s
 	}

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list