svn commit: r302089 - in head/sys: sys vm

Konstantin Belousov kib at FreeBSD.org
Wed Jun 22 20:15:39 UTC 2016


Author: kib
Date: Wed Jun 22 20:15:37 2016
New Revision: 302089
URL: https://svnweb.freebsd.org/changeset/base/302089

Log:
  Fix a LOR between vnode locks and allproc_lock.
  
  There is an order between covered vnode lock and allproc_lock, which
  is established by calling mountcheckdirs() while owning the covered
  vnode lock. mountcheckdirs() iterates over the processes, protected by
  allproc_lock.  This order is needed and seems to be not avoidable.
  
  On the other hand, various VM daemons also need to iterate over all
  processes, and they lock and unlock user maps.  Since unlock of the
  user map may trigger processing of the deferred map entries, it causes
  vnode locking to occur.  Or, when vmspace is freed, dropping references
  on the vnode-backed object also lock vnodes.  We get reverted order
  comparing with the mount/unmount order.
  
  For VM daemons, there is no need to own allproc_lock while we operate
  on vmspaces. If the process is held, it serves as the marker for
  allproc list, which allows to continue the iteration.
  
  Add _PHOLD_LITE() macro, similar to _PHOLD(), but not causing swap-in
  of the kernel stacks.  It is used instead of _PHOLD() in vm code,
  since e.g. calling faultin() in OOM conditions only exaggerates the
  problem.
  
  Modernize comment describing PHOLD.
  
  Reported by:	lists at yamagi.org
  Tested by:	pho (previous version)
  Reviewed by:	jhb
  Sponsored by:	The FreeBSD Foundation
  MFC after:	3 week
  Approved by:	re (gjb)
  Differential revision:	https://reviews.freebsd.org/D6679

Modified:
  head/sys/sys/proc.h
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_pageout.c

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Wed Jun 22 19:07:41 2016	(r302088)
+++ head/sys/sys/proc.h	Wed Jun 22 20:15:37 2016	(r302089)
@@ -827,7 +827,20 @@ extern pid_t pid_max;
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
-/* Hold process U-area in memory, normally for ptrace/procfs work. */
+/*
+ * Non-zero p_lock ensures that:
+ * - exit1() is not performed until p_lock reaches zero;
+ * - the process' threads stack are not swapped out if they are currently
+ *   not (P_INMEM).
+ *
+ * PHOLD() asserts that the process (except the current process) is
+ * not exiting, increments p_lock and swaps threads stacks into memory,
+ * if needed.
+ * _PHOLD() is same as PHOLD(), it takes the process locked.
+ * _PHOLD_LITE() also takes the process locked, but comparing with
+ * _PHOLD(), it only guarantees that exit1() is not executed,
+ * faultin() is not called.
+ */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
@@ -841,6 +854,12 @@ extern pid_t pid_max;
 	if (((p)->p_flag & P_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
+#define	_PHOLD_LITE(p) do {						\
+	PROC_LOCK_ASSERT((p), MA_OWNED);				\
+	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
+	    ("PHOLD of exiting process %p", p));			\
+	(p)->p_lock++;							\
+} while (0)
 #define	PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process %p not held", p));		\
 } while (0)

Modified: head/sys/vm/vm_glue.c
==============================================================================
--- head/sys/vm/vm_glue.c	Wed Jun 22 19:07:41 2016	(r302088)
+++ head/sys/vm/vm_glue.c	Wed Jun 22 20:15:37 2016	(r302089)
@@ -863,22 +863,32 @@ retry:
 		struct vmspace *vm;
 		int minslptime = 100000;
 		int slptime;
-		
+
+		PROC_LOCK(p);
 		/*
 		 * Watch out for a process in
 		 * creation.  It may have no
 		 * address space or lock yet.
 		 */
-		if (p->p_state == PRS_NEW)
+		if (p->p_state == PRS_NEW) {
+			PROC_UNLOCK(p);
 			continue;
+		}
 		/*
 		 * An aio daemon switches its
 		 * address space while running.
 		 * Perform a quick check whether
 		 * a process has P_SYSTEM.
+		 * Filter out exiting processes.
 		 */
-		if ((p->p_flag & P_SYSTEM) != 0)
+		if ((p->p_flag & (P_SYSTEM | P_WEXIT)) != 0) {
+			PROC_UNLOCK(p);
 			continue;
+		}
+		_PHOLD_LITE(p);
+		PROC_UNLOCK(p);
+		sx_sunlock(&allproc_lock);
+
 		/*
 		 * Do not swapout a process that
 		 * is waiting for VM data
@@ -893,16 +903,15 @@ retry:
 		 */
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL)
-			continue;
+			goto nextproc2;
 		if (!vm_map_trylock(&vm->vm_map))
 			goto nextproc1;
 
 		PROC_LOCK(p);
-		if (p->p_lock != 0 ||
-		    (p->p_flag & (P_STOPPED_SINGLE|P_TRACED|P_SYSTEM|P_WEXIT)
-		    ) != 0) {
+		if (p->p_lock != 1 || (p->p_flag & (P_STOPPED_SINGLE |
+		    P_TRACED | P_SYSTEM)) != 0)
 			goto nextproc;
-		}
+
 		/*
 		 * only aiod changes vmspace, however it will be
 		 * skipped because of the if statement above checking 
@@ -977,12 +986,12 @@ retry:
 			if ((action & VM_SWAP_NORMAL) ||
 				((action & VM_SWAP_IDLE) &&
 				 (minslptime > swap_idle_threshold2))) {
+				_PRELE(p);
 				if (swapout(p) == 0)
 					didswap++;
 				PROC_UNLOCK(p);
 				vm_map_unlock(&vm->vm_map);
 				vmspace_free(vm);
-				sx_sunlock(&allproc_lock);
 				goto retry;
 			}
 		}
@@ -991,7 +1000,9 @@ nextproc:
 		vm_map_unlock(&vm->vm_map);
 nextproc1:
 		vmspace_free(vm);
-		continue;
+nextproc2:
+		sx_slock(&allproc_lock);
+		PRELE(p);
 	}
 	sx_sunlock(&allproc_lock);
 	/*

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Wed Jun 22 19:07:41 2016	(r302088)
+++ head/sys/vm/vm_pageout.c	Wed Jun 22 20:15:37 2016	(r302089)
@@ -1485,19 +1485,21 @@ vm_pageout_oom(int shortage)
 			PROC_UNLOCK(p);
 			continue;
 		}
-		_PHOLD(p);
+		_PHOLD_LITE(p);
+		PROC_UNLOCK(p);
+		sx_sunlock(&allproc_lock);
 		if (!vm_map_trylock_read(&vm->vm_map)) {
-			_PRELE(p);
-			PROC_UNLOCK(p);
 			vmspace_free(vm);
+			sx_slock(&allproc_lock);
+			PRELE(p);
 			continue;
 		}
-		PROC_UNLOCK(p);
 		size = vmspace_swap_count(vm);
 		if (shortage == VM_OOM_MEM)
 			size += vm_pageout_oom_pagecount(vm);
 		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
+		sx_slock(&allproc_lock);
 
 		/*
 		 * If this process is bigger than the biggest one,
@@ -1812,9 +1814,13 @@ again:
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 			vm = vmspace_acquire_ref(p);
+			_PHOLD_LITE(p);
 			PROC_UNLOCK(p);
-			if (vm == NULL)
+			if (vm == NULL) {
+				PRELE(p);
 				continue;
+			}
+			sx_sunlock(&allproc_lock);
 
 			size = vmspace_resident_count(vm);
 			if (size >= limit) {
@@ -1859,6 +1865,8 @@ again:
 			}
 #endif
 			vmspace_free(vm);
+			sx_slock(&allproc_lock);
+			PRELE(p);
 		}
 		sx_sunlock(&allproc_lock);
 		if (tryagain != 0 && attempts <= 10)


More information about the svn-src-all mailing list