mlockall() for 5.1-RELEASE
    Bruce M Simpson 
    bms at spc.org
       
    Tue Jul  8 01:53:21 PDT 2003
    
    
  
Hi all,
Here is a patch to add mlockall()/munlockall() to FreeBSD 5.1-RELEASE.
I believe it is ready for public consumption (here, anyway), with
some caveats/points:-
 - Uses fine grained locking, but acquires Giant right now.
 - Tested with WITNESS, MUTEX_PROFILING, INVARIANTS. Caught a lock not
   being backed out in vm_unix.c, fixed now.
 - Inspected at the vmstat, top and vm pager sysctl stats level.
 - The RES size for a process may appear to be greater than its SIZE.
   This is believed to be due to pre-faulted stack pages being wired
   when they may not count against the per-process SIZE.
Please share your feedback and reviews with me.
Regards
BMS
-------------- next part --------------
Generated by diffcoll on Thu  3 Jul 2003 22:57:53 BST
diff -uN src/lib/libc/alpha/sys/Makefile.inc.orig src/lib/libc/alpha/sys/Makefile.inc
--- /usr/src/lib/libc/alpha/sys/Makefile.inc.orig	Fri May 30 11:25:46 2003
+++ /usr/src/lib/libc/alpha/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -5,7 +5,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/amd64/sys/Makefile.inc.orig src/lib/libc/amd64/sys/Makefile.inc
--- /usr/src/lib/libc/amd64/sys/Makefile.inc.orig	Fri May 30 11:26:03 2003
+++ /usr/src/lib/libc/amd64/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -6,7 +6,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/i386/sys/Makefile.inc.orig src/lib/libc/i386/sys/Makefile.inc
--- /usr/src/lib/libc/i386/sys/Makefile.inc.orig	Fri May 30 04:44:28 2003
+++ /usr/src/lib/libc/i386/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -9,7 +9,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/ia64/sys/Makefile.inc.orig src/lib/libc/ia64/sys/Makefile.inc
--- /usr/src/lib/libc/ia64/sys/Makefile.inc.orig	Fri May 30 11:26:25 2003
+++ /usr/src/lib/libc/ia64/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -5,7 +5,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/powerpc/sys/Makefile.inc.orig src/lib/libc/powerpc/sys/Makefile.inc
--- /usr/src/lib/libc/powerpc/sys/Makefile.inc.orig	Fri May 30 11:26:40 2003
+++ /usr/src/lib/libc/powerpc/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -4,7 +4,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/sparc64/sys/Makefile.inc.orig src/lib/libc/sparc64/sys/Makefile.inc
--- /usr/src/lib/libc/sparc64/sys/Makefile.inc.orig	Fri May 30 11:27:02 2003
+++ /usr/src/lib/libc/sparc64/sys/Makefile.inc	Fri Jun 27 07:33:35 2003
@@ -16,7 +16,7 @@
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o ftruncate.o getdomainname.o getlogin.o \
-	lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \
+	lseek.o mmap.o openbsd_poll.o pread.o \
 	pwrite.o setdomainname.o sstk.o truncate.o uname.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
diff -uN src/lib/libc/sys/Makefile.inc.orig src/lib/libc/sys/Makefile.inc
--- /usr/src/lib/libc/sys/Makefile.inc.orig	Sat Jun 21 10:03:26 2003
+++ /usr/src/lib/libc/sys/Makefile.inc	Fri Jun 27 08:51:45 2003
@@ -69,8 +69,8 @@
 	kldfind.2 kldfirstmod.2 kldload.2 kldnext.2 kldstat.2 kldsym.2 \
 	kldunload.2 kqueue.2 kse.2 ktrace.2 link.2 lio_listio.2 listen.2 \
 	lseek.2 \
-	madvise.2 mincore.2 minherit.2 mkdir.2 mkfifo.2 mknod.2 mlock.2 mmap.2 \
-	modfind.2 modnext.2 modstat.2 mount.2 \
+	madvise.2 mincore.2 minherit.2 mkdir.2 mkfifo.2 mknod.2 mlock.2 \
+	mlockall.2 mmap.2 modfind.2 modnext.2 modstat.2 mount.2 \
 	mprotect.2 msync.2 munmap.2 nanosleep.2 ntp_adjtime.2 ntp_gettime.2 \
 	nfssvc.2 open.2 pathconf.2 pipe.2 poll.2 profil.2 ptrace.2 quotactl.2 \
 	read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
diff -uN src/lib/libc/sys/mlockall.2.orig src/lib/libc/sys/mlockall.2
--- /usr/src/lib/libc/sys/mlockall.2.orig	Fri May 30 07:01:26 2003
+++ /usr/src/lib/libc/sys/mlockall.2	Fri Jun 27 07:33:35 2003
@@ -0,0 +1,140 @@
+.\"	$NetBSD: mlockall.2,v 1.11 2003/04/16 13:34:54 wiz Exp $
+.\"
+.\" Copyright (c) 1999 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+.\" NASA Ames Research Center.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"        This product includes software developed by the NetBSD
+.\"        Foundation, Inc. and its contributors.
+.\" 4. Neither the name of The NetBSD Foundation nor the names of its
+.\"    contributors may be used to endorse or promote products derived
+.\"    from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 12, 1999
+.Dt MLOCKALL 2
+.Os
+.Sh NAME
+.Nm mlockall ,
+.Nm munlockall
+.Nd lock (unlock) the address space of a process
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/mman.h
+.Ft int
+.Fn mlockall "int flags"
+.Ft int
+.Fn munlockall "void"
+.Sh DESCRIPTION
+The
+.Nm mlockall
+system call locks into memory the physical pages associated with the
+address space of a process until the address space is unlocked, the
+process exits, or execs another program image.
+.Pp
+The following flags affect the behavior of
+.Nm mlockall :
+.Bl -tag -width MCL_CURRENT
+.It Dv MCL_CURRENT
+Lock all pages currently mapped into the process's address space.
+.It Dv MCL_FUTURE
+Lock all pages mapped into the process's address space in the future,
+at the time the mapping is established.
+Note that this may cause future mappings to fail if those mappings
+cause resource limits to be exceeded.
+.El
+.Pp
+Since physical memory is a potentially scarce resource, processes are
+limited in how much they can lock down.
+A single process can lock the minimum of a system-wide
+.Dq wired pages
+limit and the per-process
+.Li RLIMIT_MEMLOCK
+resource limit.
+.Pp
+The
+.Nm munlockall
+call unlocks any locked memory regions in the process address space.
+Any regions mapped after an
+.Nm munlockall
+call will not be locked.
+.Sh RETURN VALUES
+A return value of 0 indicates that the call
+succeeded and all pages in the range have either been locked or unlocked.
+A return value of -1 indicates an error occurred and the locked
+status of all pages in the range remains unchanged.
+In this case, the global location
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+.Fn mlockall
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The
+.Ar flags
+argument is zero, or includes unimplemented flags.
+.It Bq Er ENOMEM
+Locking the indicated range would exceed either the system or per-process
+limit for locked memory.
+.It Bq Er EAGAIN
+Some or all of the memory mapped into the process's address space
+could not be locked when the call was made.
+.It Bq Er EPERM
+The calling process does not have the appropriate privilege to perform
+the requested operation.
+.El
+.Sh SEE ALSO
+.Xr mincore 2 ,
+.Xr mlock 2 ,
+.Xr mmap 2 ,
+.Xr munmap 2 ,
+.Xr setrlimit 2
+.Sh STANDARDS
+The
+.Fn mlockall
+and
+.Fn munlockall
+functions are believed to conform to
+.St -p1003.1-2001 .
+.Sh HISTORY
+The
+.Fn mlockall
+and
+.Fn munlockall
+functions first appeared in
+.Fx 5.1 .
+.Sh BUGS
+The per-process resource limit is a limit on the amount of virtual
+memory locked, while the system-wide limit is for the number of locked
+physical pages.
+Hence a process with two distinct locked mappings of the same physical page
+counts as 2 pages against the per-process limit and as only a single page
+in the system limit.
diff -uN src/sys/kern/link_elf.c.orig src/sys/kern/link_elf.c
--- /usr/src/sys/kern/link_elf.c.orig	Fri Jun 27 07:47:39 2003
+++ /usr/src/sys/kern/link_elf.c	Fri Jun 27 07:48:13 2003
@@ -744,7 +744,7 @@
 	vm_map_wire(kernel_map,
 		    (vm_offset_t) segbase,
 		    (vm_offset_t) segbase + segs[i]->p_memsz,
-		    FALSE);
+		    VM_MAP_WIRE_SYSTEM);
 #endif
     }
 
diff -uN src/sys/vm/vm_contig.c.orig src/sys/vm/vm_contig.c
--- /usr/src/sys/vm/vm_contig.c.orig	Fri Jun 27 07:48:49 2003
+++ /usr/src/sys/vm/vm_contig.c	Fri Jun 27 07:49:14 2003
@@ -254,7 +254,7 @@
 			tmp_addr += PAGE_SIZE;
 		}
 		VM_OBJECT_UNLOCK(kernel_object);
-		vm_map_wire(map, addr, addr + size, FALSE);
+		vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM);
 
 		splx(s);
 		return ((void *)addr);
diff -uN src/sys/vm/vm_glue.c.orig src/sys/vm/vm_glue.c
--- /usr/src/sys/vm/vm_glue.c.orig	Fri Jun 27 07:49:23 2003
+++ /usr/src/sys/vm/vm_glue.c	Fri Jun 27 08:00:33 2003
@@ -186,7 +186,7 @@
 {
 
 	vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
-	    round_page((vm_offset_t)addr + len), FALSE);
+	    round_page((vm_offset_t)addr + len), VM_MAP_WIRE_SYSTEM);
 }
 
 /*
@@ -200,7 +200,7 @@
 
 	vm_map_unwire(&curproc->p_vmspace->vm_map,
 	    trunc_page((vm_offset_t)addr),
-	    round_page((vm_offset_t)addr + len), FALSE);
+	    round_page((vm_offset_t)addr + len), VM_MAP_WIRE_SYSTEM);
 }
 
 /*
diff -uN src/sys/vm/vm_kern.c.orig src/sys/vm/vm_kern.c
--- /usr/src/sys/vm/vm_kern.c.orig	Fri Jun 27 07:50:53 2003
+++ /usr/src/sys/vm/vm_kern.c	Fri Jun 27 07:51:14 2003
@@ -210,7 +210,7 @@
 	/*
 	 * And finally, mark the data as non-pageable.
 	 */
-	(void) vm_map_wire(map, addr, addr + size, FALSE);
+	(void) vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM);
 
 	return (addr);
 }
diff -uN src/sys/vm/vm_map.c.orig src/sys/vm/vm_map.c
--- /usr/src/sys/vm/vm_map.c.orig	Wed Jun  4 11:01:59 2003
+++ /usr/src/sys/vm/vm_map.c	Thu Jul  3 22:57:47 2003
@@ -1559,19 +1559,32 @@
  */
 int
 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
-	boolean_t user_unwire)
+	int flags)
 {
 	vm_map_entry_t entry, first_entry, tmp_entry;
 	vm_offset_t saved_start;
 	unsigned int last_timestamp;
 	int rv;
 	boolean_t need_wakeup, result;
+	boolean_t user_unwire;
+
+	user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
-	if (!vm_map_lookup_entry(map, start, &first_entry)) {
-		vm_map_unlock(map);
-		return (KERN_INVALID_ADDRESS);
+	if (start != (vm_offset_t) 0) {
+		/* operating on arbitrary range */
+		if (!vm_map_lookup_entry(map, start, &first_entry)) {
+			vm_map_unlock(map);
+			return (KERN_INVALID_ADDRESS);
+		}
+	} else {
+		/* operating on entire process space */
+		if (map->header.next == NULL) {
+			vm_map_unlock(map);
+			return (KERN_INVALID_ADDRESS);
+		}
+		first_entry = map->header.next;
 	}
 	last_timestamp = map->timestamp;
 	entry = first_entry;
@@ -1627,9 +1640,11 @@
 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 		/*
 		 * Check the map for holes in the specified region.
+		 * If VM_MAP_WIRE_NONCONTIG was specified, skip this check.
 		 */
-		if (entry->end < end && (entry->next == &map->header ||
-		    entry->next->start > entry->end)) {
+		if (((flags & VM_MAP_WIRE_NONCONTIG) == 0) &&
+		    (entry->end < end && (entry->next == &map->header ||
+		    entry->next->start > entry->end))) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
@@ -1688,19 +1703,32 @@
  */
 int
 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
-	boolean_t user_wire)
+	int flags)
 {
 	vm_map_entry_t entry, first_entry, tmp_entry;
 	vm_offset_t saved_end, saved_start;
 	unsigned int last_timestamp;
 	int rv;
 	boolean_t need_wakeup, result;
+	boolean_t user_wire;
+
+	user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
-	if (!vm_map_lookup_entry(map, start, &first_entry)) {
-		vm_map_unlock(map);
-		return (KERN_INVALID_ADDRESS);
+	if (start != (vm_offset_t) 0) {
+		/* operating on arbitrary range */
+		if (!vm_map_lookup_entry(map, start, &first_entry)) {
+			vm_map_unlock(map);
+			return (KERN_INVALID_ADDRESS);
+		}
+	} else {
+		/* operating on entire process space */
+		if (map->header.next == NULL) {
+			vm_map_unlock(map);
+			return (KERN_INVALID_ADDRESS);
+		}
+		first_entry = map->header.next;
 	}
 	last_timestamp = map->timestamp;
 	entry = first_entry;
@@ -1811,9 +1839,11 @@
 		}
 		/*
 		 * Check the map for holes in the specified region.
+		 * If VM_MAP_WIRE_NONCONTIG was specified, skip this check.
 		 */
-		if (entry->end < end && (entry->next == &map->header ||
-		    entry->next->start > entry->end)) {
+		if (((flags & VM_MAP_WIRE_NONCONTIG) == 0) &&
+		    (entry->end < end && (entry->next == &map->header ||
+		    entry->next->start > entry->end))) {
 			end = entry->end;
 			rv = KERN_INVALID_ADDRESS;
 			goto done;
@@ -1866,6 +1896,52 @@
 }
 
 /*
+ * vm_map_unwire_all
+ *
+ * Unwire all pages in a map, by calling vm_map_unwire() with the
+ * VM_MAP_WIRE_NONCONTIG flag set. An underlying region's vm_map_entry will
+ * only be unwired if it has previously been wired.
+ *
+ * Acquires a temporary lock on map.
+ * Return values are identical to vm_map_unwire().
+ */
+int
+vm_map_unwire_all(vm_map_t map, int flags)
+{
+	vm_offset_t start, end;
+
+	vm_map_lock_read(map);
+	start = vm_map_min(map);
+	end = vm_map_max(map);
+	vm_map_unlock_read(map);
+	return (vm_map_unwire(map, start, end, flags | VM_MAP_WIRE_NONCONTIG));
+}
+
+/*
+ * vm_map_wire_all
+ *
+ * Wire all pages in a map, by calling vm_map_wire() with the
+ * VM_MAP_WIRE_NONCONTIG flag set. The wire count of the pages in the
+ * underlying vm_map_entry of each region will be unconditionally
+ * incremented.
+ *
+ * This function is likely to cause a burst of page faults when called.
+ * Acquires a temporary lock on map.
+ * Return values are identical to vm_map_wire().
+ */
+int
+vm_map_wire_all(vm_map_t map, int flags)
+{
+	vm_offset_t start, end;
+
+	vm_map_lock_read(map);
+	start = vm_map_min(map);
+	end = vm_map_max(map);
+	vm_map_unlock_read(map);
+	return (vm_map_wire(map, start, end, flags | VM_MAP_WIRE_NONCONTIG));
+}
+
+/*
  * vm_map_clean
  *
  * Push any dirty cached pages in the address range to their pager.
@@ -2351,6 +2427,10 @@
 	new_map = &vm2->vm_map;	/* XXX */
 	new_map->timestamp = 1;
 
+	/* Do not inherit the MAP_WIREFUTURE property. */
+	if ((new_map->flags & MAP_WIREFUTURE) == MAP_WIREFUTURE)
+		new_map->flags &= ~MAP_WIREFUTURE;
+
 	old_entry = old_map->header.next;
 
 	while (old_entry != &old_map->header) {
@@ -2660,6 +2740,14 @@
 	}
 
 	vm_map_unlock(map);
+	/*
+	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
+	 */
+	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE))
+		vm_map_wire(map, addr, stack_entry->start,
+			    (p->p_flag & P_SYSTEM ? VM_MAP_WIRE_SYSTEM :
+			    VM_MAP_WIRE_USER));
+
 	return (rv);
 }
 
diff -uN src/sys/vm/vm_map.h.orig src/sys/vm/vm_map.h
--- /usr/src/sys/vm/vm_map.h.orig	Wed Jun  4 13:45:35 2003
+++ /usr/src/sys/vm/vm_map.h	Fri Jun 27 08:27:51 2003
@@ -81,6 +81,7 @@
  *	vm_map_entry_t		an entry in an address map.
  */
 
+typedef u_int vm_flags_t;
 typedef u_int vm_eflags_t;
 
 /*
@@ -177,8 +178,11 @@
 	pmap_t pmap;			/* (c) Physical map */
 #define	min_offset	header.start	/* (c) */
 #define	max_offset	header.end	/* (c) */
+	vm_flags_t flags;		/* flags for this vm_map */
 };
 
+#define MAP_WIREFUTURE		0x0001	/* wire all future pages */
+
 #ifdef	_KERNEL
 static __inline vm_offset_t
 vm_map_max(vm_map_t map)
@@ -197,6 +201,12 @@
 {
 	return (map->pmap);
 }
+
+static __inline void
+vm_map_modflags(vm_map_t map, vm_flags_t set, vm_flags_t clear)
+{
+	map->flags = (map->flags | set) & ~clear;
+}
 #endif	/* _KERNEL */
 
 /* 
@@ -296,6 +306,13 @@
 #define VM_FAULT_WIRE_MASK (VM_FAULT_CHANGE_WIRING|VM_FAULT_USER_WIRE)
 #define VM_FAULT_DIRTY 8		/* Dirty the page */
 
+/*
+ * vm_map_[un]wire option flags
+ */
+#define VM_MAP_WIRE_SYSTEM	0	/* wiring in a kernel map */
+#define VM_MAP_WIRE_USER	1	/* wiring in a user map */
+#define VM_MAP_WIRE_NONCONTIG	2	/* requested region has holes */
+
 #ifdef _KERNEL
 boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t);
 struct pmap;
@@ -321,9 +338,11 @@
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
 int vm_map_growstack (struct proc *p, vm_offset_t addr);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
-    boolean_t user_unwire);
+    int flags);
+int vm_map_unwire_all(vm_map_t map, int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
-    boolean_t user_wire);
+    int flags);
+int vm_map_wire_all(vm_map_t map, int flags);
 int vmspace_swap_count (struct vmspace *vmspace);
 #endif				/* _KERNEL */
 #endif				/* _VM_MAP_ */
diff -uN src/sys/vm/vm_mmap.c.orig src/sys/vm/vm_mmap.c
--- /usr/src/sys/vm/vm_mmap.c.orig	Fri Jun 27 08:32:54 2003
+++ /usr/src/sys/vm/vm_mmap.c	Fri Jun 27 08:31:27 2003
@@ -1039,7 +1039,7 @@
 #endif
 
 	error = vm_map_wire(&td->td_proc->p_vmspace->vm_map, addr,
-		     addr + size, TRUE);
+		     addr + size, VM_MAP_WIRE_USER);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
@@ -1057,14 +1057,56 @@
 	struct thread *td;
 	struct mlockall_args *uap;
 {
-	/* mtx_lock(&Giant); */
-	/* mtx_unlock(&Giant); */
-	return 0;
+	int error;
+	vm_map_t map;
+
+	error = 0;
+	map = &td->td_proc->p_vmspace->vm_map;
+
+	if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
+		return (EINVAL);
+
+#ifdef pmap_wired_count
+	/*
+	 * If wiring all pages in the process would cause it to exceed
+	 * a hard resource limit, return ENOMEM.
+	 */
+	if (map->size - ptoa(pmap_wired_count(vm_map_pmap(map)) >
+	    td->td_proc->p_rlimit[RLIMIT_MEMLOCK].rlim_cur))
+		return (ENOMEM);
+#else
+	error = suser(td);
+	if (error)
+		return (error);
+#endif
+	mtx_lock(&Giant);
+
+	if (uap->how & MCL_FUTURE) {
+		vm_map_lock(map);
+		vm_map_modflags(map, MAP_WIREFUTURE, 0);
+		vm_map_unlock(map);
+		error = 0;
+	}
+
+	if (uap->how & MCL_CURRENT) {
+		/*
+		 * P1003.1-2001 actually mandates that all currently
+		 * mapped pages will be memory resident and locked (wired).
+		 * This implies that we need to prefault. However, the act
+		 * of wiring the page will incur a call to vm_fault_wire(),
+		 * which will fault in the newly wired page.
+		 */
+		error = vm_map_wire_all(map, VM_MAP_WIRE_USER);
+		error = (error == KERN_SUCCESS ? 0 : EAGAIN);
+	}
+
+	mtx_unlock(&Giant);
+	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct munlockall_args {
-	int	how;
+	register_t dummy;
 };
 #endif
 
@@ -1076,9 +1118,30 @@
 	struct thread *td;
 	struct munlockall_args *uap;
 {
-	/* mtx_lock(&Giant); */
-	/* mtx_unlock(&Giant); */
-	return 0;
+	int error;
+	vm_map_t map;
+
+#ifndef pmap_wired_count
+	error = suser(td);
+	if (error)
+		return (error);
+#endif
+
+	error = 0;
+	map = &td->td_proc->p_vmspace->vm_map;
+
+	mtx_lock(&Giant);
+
+	/* Clear the MAP_WIREFUTURE flag from this vm_map. */
+	vm_map_lock(map);
+	vm_map_modflags(map, 0, MAP_WIREFUTURE);
+	vm_map_unlock(map);
+
+	/* Forcibly unwire all pages. */
+	error = vm_map_unwire_all(map, VM_MAP_WIRE_USER);
+
+	mtx_unlock(&Giant);
+	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
@@ -1118,7 +1181,7 @@
 #endif
 
 	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, addr,
-		     addr + size, TRUE);
+		     addr + size, VM_MAP_WIRE_USER);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
@@ -1275,6 +1338,14 @@
 		if (rv != KERN_SUCCESS)
 			(void) vm_map_remove(map, *addr, *addr + size);
 	}
+
+	/*
+	 * If the process has requested that all future mappings
+	 * be wired, then heed this.
+	 */
+	if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE))
+		vm_map_wire(map, *addr, *addr + size, VM_MAP_WIRE_USER);
+
 	switch (rv) {
 	case KERN_SUCCESS:
 		return (0);
diff -uN src/sys/vm/vm_unix.c.orig src/sys/vm/vm_unix.c
--- /usr/src/sys/vm/vm_unix.c.orig	Wed Jun  4 22:40:38 2003
+++ /usr/src/sys/vm/vm_unix.c	Thu Jul  3 22:50:21 2003
@@ -77,6 +77,9 @@
 	vm_offset_t new, old, base;
 	int rv;
 	int error = 0;
+	boolean_t do_map_wirefuture;
+
+	do_map_wirefuture = FALSE;
 
 	new = round_page((vm_offset_t)uap->nsize);
 	vm_map_lock(&vm->vm_map);
@@ -119,6 +122,21 @@
 			goto done;
 		}
 		vm->vm_dsize += btoc(new - old);
+		/*
+		 * Handle the MAP_WIREFUTURE case for legacy applications,
+		 * by marking the newly mapped range of pages as wired.
+		 *
+		 * We are not required to perform a corresponding
+		 * vm_map_unwire() before vm_map_delete() below, as
+		 * it will forcibly unwire the pages in the range.
+		 *
+		 * XXX If the pages cannot be wired, no error is returned.
+		 */
+		if ((vm->vm_map.flags & MAP_WIREFUTURE) == MAP_WIREFUTURE) {
+			if (bootverbose)
+				printf("obreak: MAP_WIREFUTURE set\n");
+			do_map_wirefuture = TRUE;
+		}
 	} else if (new < old) {
 		rv = vm_map_delete(&vm->vm_map, new, old);
 		if (rv != KERN_SUCCESS) {
@@ -129,6 +147,10 @@
 	}
 done:
 	vm_map_unlock(&vm->vm_map);
+
+	if (do_map_wirefuture)
+		(void) vm_map_wire(&vm->vm_map, old, new, VM_MAP_WIRE_USER);
+
 	return (error);
 }
 
-------------- next part --------------
/*	$Id$ */
/*
 * Copyright (c) 2003 Bruce M. Simpson <bms at spc.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bruce M. Simpson.
 * 4. Neither the name of Bruce M. Simpson nor the names of co-
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bruce M. Simpson AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL Bruce M. Simpson OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Test harness for my implementation of mlockall() for FreeBSD. -bms
 * Use this when hooking up whatever tools you need (DDB/GDB, top, vmstat).
 */
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define SYSCTL_WIRECOUNT "vm.stats.vm.v_wire_count"
int
main(int argc, char *argv[])
{
	int err;
	int wire_count;
	int wire_count_len;
	wire_count_len = sizeof(wire_count);
	sysctlbyname(SYSCTL_WIRECOUNT, &wire_count, &wire_count_len,
		NULL, 0);
	fprintf(stderr, "%s: %d\n", SYSCTL_WIRECOUNT, wire_count);
	fprintf(stderr, "Sleeping 5 seconds.\n");
	sleep(5);
	fprintf(stderr, "About to invoke mlockall().\n");
	err = mlockall(MCL_CURRENT|MCL_FUTURE);
	if (err == -1) {
		perror("mlockall");
	}
	sysctlbyname(SYSCTL_WIRECOUNT, &wire_count, &wire_count_len,
		NULL, 0);
	fprintf(stderr, "%s: %d\n", SYSCTL_WIRECOUNT, wire_count);
	fprintf(stderr, "Sleeping 5 seconds.\n");
	sleep(5);
	fprintf(stderr, "About to invoke munlockall().\n");
	err = munlockall();
	if (err == -1) {
		perror("munlockall");
	}
	sysctlbyname(SYSCTL_WIRECOUNT, &wire_count, &wire_count_len,
		NULL, 0);
	fprintf(stderr, "%s: %d\n", SYSCTL_WIRECOUNT, wire_count);
	exit(EXIT_SUCCESS);
}
    
    
More information about the freebsd-hackers
mailing list