svn commit: r352174 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs dev/drm2/ttm kern vm

Jeff Roberson jeff at FreeBSD.org
Tue Sep 10 18:27:48 UTC 2019


Author: jeff
Date: Tue Sep 10 18:27:45 2019
New Revision: 352174
URL: https://svnweb.freebsd.org/changeset/base/352174

Log:
  Use the sleepq lock rather than the page lock to protect against wakeup
  races with page busy state.  The object lock is still used as an interlock
  to ensure that the identity stays valid.  Most callers should use
  vm_page_sleep_if_busy() to handle the locking particulars.
  
  Reviewed by:	alc, kib, markj
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D21255

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/dev/drm2/ttm/ttm_bo_vm.c
  head/sys/kern/vfs_bio.c
  head/sys/vm/phys_pager.c
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -422,10 +422,7 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
-				vm_page_lock(pp);
-				zfs_vmobject_wunlock(obj);
-				vm_page_busy_sleep(pp, "zfsmwb", true);
-				zfs_vmobject_wlock(obj);
+				vm_page_sleep_if_xbusy(pp, "zfsmwb");
 				continue;
 			}
 			vm_page_sbusy(pp);
@@ -473,10 +470,7 @@ page_wire(vnode_t *vp, int64_t start)
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
-				vm_page_lock(pp);
-				zfs_vmobject_wunlock(obj);
-				vm_page_busy_sleep(pp, "zfsmwb", true);
-				zfs_vmobject_wlock(obj);
+				vm_page_sleep_if_xbusy(pp, "zfsmwb");
 				continue;
 			}
 

Modified: head/sys/dev/drm2/ttm/ttm_bo_vm.c
==============================================================================
--- head/sys/dev/drm2/ttm/ttm_bo_vm.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/dev/drm2/ttm/ttm_bo_vm.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -232,10 +232,7 @@ reserve:
 
 	VM_OBJECT_WLOCK(vm_obj);
 	if (vm_page_busied(m)) {
-		vm_page_lock(m);
-		VM_OBJECT_WUNLOCK(vm_obj);
-		vm_page_busy_sleep(m, "ttmpbs", false);
-		VM_OBJECT_WLOCK(vm_obj);
+		vm_page_sleep_if_busy(m, "ttmpbs");
 		ttm_mem_io_unlock(man);
 		ttm_bo_unreserve(bo);
 		goto retry;

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/kern/vfs_bio.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -2931,12 +2931,8 @@ vfs_vmio_invalidate(struct buf *bp)
 		presid = resid > (PAGE_SIZE - poffset) ?
 		    (PAGE_SIZE - poffset) : resid;
 		KASSERT(presid >= 0, ("brelse: extra page"));
-		while (vm_page_xbusied(m)) {
-			vm_page_lock(m);
-			VM_OBJECT_WUNLOCK(obj);
-			vm_page_busy_sleep(m, "mbncsh", true);
-			VM_OBJECT_WLOCK(obj);
-		}
+		while (vm_page_xbusied(m))
+			vm_page_sleep_if_xbusy(m, "mbncsh");
 		if (pmap_page_wired_mappings(m) == 0)
 			vm_page_set_invalid(m, poffset, presid);
 		vm_page_release_locked(m, flags);
@@ -4565,10 +4561,7 @@ vfs_drain_busy_pages(struct buf *bp)
 			for (; last_busied < i; last_busied++)
 				vm_page_sbusy(bp->b_pages[last_busied]);
 			while (vm_page_xbusied(m)) {
-				vm_page_lock(m);
-				VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
-				vm_page_busy_sleep(m, "vbpage", true);
-				VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
+				vm_page_sleep_if_xbusy(m, "vbpage");
 			}
 		}
 	}

Modified: head/sys/vm/phys_pager.c
==============================================================================
--- head/sys/vm/phys_pager.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/vm/phys_pager.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -219,10 +219,7 @@ retry:
 				pmap_zero_page(m);
 			m->valid = VM_PAGE_BITS_ALL;
 		} else if (vm_page_xbusied(m)) {
-			vm_page_lock(m);
-			VM_OBJECT_WUNLOCK(object);
-			vm_page_busy_sleep(m, "physb", true);
-			VM_OBJECT_WLOCK(object);
+			vm_page_sleep_if_xbusy(m, "physb");
 			goto retry;
 		} else {
 			vm_page_xbusy(m);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/vm/vm_fault.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -510,7 +510,7 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
 				*m_hold = &m[i];
 				vm_page_wire(&m[i]);
 			}
-			vm_page_xunbusy_maybelocked(&m[i]);
+			vm_page_xunbusy(&m[i]);
 		}
 		if (m_mtx != NULL)
 			mtx_unlock(m_mtx);

Modified: head/sys/vm/vm_object.c
==============================================================================
--- head/sys/vm/vm_object.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/vm/vm_object.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -1160,9 +1160,7 @@ next_page:
 		    ("vm_object_madvise: page %p is not managed", tm));
 		if (vm_page_busied(tm)) {
 			if (object != tobject)
-				VM_OBJECT_WUNLOCK(tobject);
-			vm_page_lock(tm);
-			VM_OBJECT_WUNLOCK(object);
+				VM_OBJECT_WUNLOCK(object);
 			if (advice == MADV_WILLNEED) {
 				/*
 				 * Reference the page before unlocking and
@@ -1345,10 +1343,7 @@ retry:
 		 */
 		if (vm_page_busied(m)) {
 			VM_OBJECT_WUNLOCK(new_object);
-			vm_page_lock(m);
-			VM_OBJECT_WUNLOCK(orig_object);
-			vm_page_busy_sleep(m, "spltwt", false);
-			VM_OBJECT_WLOCK(orig_object);
+			vm_page_sleep_if_busy(m, "spltwt");
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
@@ -1415,15 +1410,16 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_pa
 	    ("invalid ownership %p %p %p", p, object, backing_object));
 	if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
 		return (next);
-	if (p != NULL)
-		vm_page_lock(p);
-	VM_OBJECT_WUNLOCK(object);
-	VM_OBJECT_WUNLOCK(backing_object);
 	/* The page is only NULL when rename fails. */
-	if (p == NULL)
+	if (p == NULL) {
 		vm_radix_wait();
-	else
+	} else {
+		if (p->object == object)
+			VM_OBJECT_WUNLOCK(backing_object);
+		else
+			VM_OBJECT_WUNLOCK(object);
 		vm_page_busy_sleep(p, "vmocol", false);
+	}
 	VM_OBJECT_WLOCK(object);
 	VM_OBJECT_WLOCK(backing_object);
 	return (TAILQ_FIRST(&backing_object->memq));
@@ -1837,7 +1833,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t 
     int options)
 {
 	vm_page_t p, next;
-	struct mtx *mtx;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1848,7 +1843,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t 
 	vm_object_pip_add(object, 1);
 again:
 	p = vm_page_find_least(object, start);
-	mtx = NULL;
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
@@ -1865,17 +1859,8 @@ again:
 		 * however, be invalidated if the option OBJPR_CLEANONLY is
 		 * not specified.
 		 */
-		vm_page_change_lock(p, &mtx);
-		if (vm_page_xbusied(p)) {
-			VM_OBJECT_WUNLOCK(object);
-			vm_page_busy_sleep(p, "vmopax", true);
-			VM_OBJECT_WLOCK(object);
-			goto again;
-		}
 		if (vm_page_busied(p)) {
-			VM_OBJECT_WUNLOCK(object);
-			vm_page_busy_sleep(p, "vmopar", false);
-			VM_OBJECT_WLOCK(object);
+			vm_page_sleep_if_busy(p, "vmopar");
 			goto again;
 		}
 		if (vm_page_wired(p)) {
@@ -1904,8 +1889,6 @@ wired:
 			goto wired;
 		vm_page_free(p);
 	}
-	if (mtx != NULL)
-		mtx_unlock(mtx);
 	vm_object_pip_wakeup(object);
 }
 
@@ -2190,8 +2173,7 @@ again:
 			m = TAILQ_NEXT(m, listq);
 		}
 		if (vm_page_xbusied(tm)) {
-			vm_page_lock(tm);
-			for (tobject = object; locked_depth >= 1;
+			for (tobject = object; locked_depth > 1;
 			    locked_depth--) {
 				t1object = tobject->backing_object;
 				VM_OBJECT_RUNLOCK(tobject);

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/vm/vm_page.c	Tue Sep 10 18:27:45 2019	(r352174)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/sleepqueue.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
@@ -873,27 +874,17 @@ void
 vm_page_busy_downgrade(vm_page_t m)
 {
 	u_int x;
-	bool locked;
 
 	vm_page_assert_xbusied(m);
-	locked = mtx_owned(vm_page_lockptr(m));
 
+	x = m->busy_lock;
 	for (;;) {
-		x = m->busy_lock;
-		x &= VPB_BIT_WAITERS;
-		if (x != 0 && !locked)
-			vm_page_lock(m);
-		if (atomic_cmpset_rel_int(&m->busy_lock,
-		    VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1)))
+		if (atomic_fcmpset_rel_int(&m->busy_lock,
+		    &x, VPB_SHARERS_WORD(1)))
 			break;
-		if (x != 0 && !locked)
-			vm_page_unlock(m);
 	}
-	if (x != 0) {
+	if ((x & VPB_BIT_WAITERS) != 0)
 		wakeup(m);
-		if (!locked)
-			vm_page_unlock(m);
-	}
 }
 
 /*
@@ -920,35 +911,23 @@ vm_page_sunbusy(vm_page_t m)
 {
 	u_int x;
 
-	vm_page_lock_assert(m, MA_NOTOWNED);
 	vm_page_assert_sbusied(m);
 
+	x = m->busy_lock;
 	for (;;) {
-		x = m->busy_lock;
 		if (VPB_SHARERS(x) > 1) {
-			if (atomic_cmpset_int(&m->busy_lock, x,
+			if (atomic_fcmpset_int(&m->busy_lock, &x,
 			    x - VPB_ONE_SHARER))
 				break;
 			continue;
 		}
-		if ((x & VPB_BIT_WAITERS) == 0) {
-			KASSERT(x == VPB_SHARERS_WORD(1),
-			    ("vm_page_sunbusy: invalid lock state"));
-			if (atomic_cmpset_int(&m->busy_lock,
-			    VPB_SHARERS_WORD(1), VPB_UNBUSIED))
-				break;
+		KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
+		    ("vm_page_sunbusy: invalid lock state"));
+		if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
 			continue;
-		}
-		KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
-		    ("vm_page_sunbusy: invalid lock state for waiters"));
-
-		vm_page_lock(m);
-		if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
-			vm_page_unlock(m);
-			continue;
-		}
+		if ((x & VPB_BIT_WAITERS) == 0)
+			break;
 		wakeup(m);
-		vm_page_unlock(m);
 		break;
 	}
 }
@@ -956,28 +935,35 @@ vm_page_sunbusy(vm_page_t m)
 /*
  *	vm_page_busy_sleep:
  *
- *	Sleep and release the page lock, using the page pointer as wchan.
+ *	Sleep if the page is busy, using the page pointer as wchan.
  *	This is used to implement the hard-path of busying mechanism.
  *
- *	The given page must be locked.
- *
  *	If nonshared is true, sleep only if the page is xbusy.
+ *
+ *	The object lock must be held on entry and will be released on exit.
  */
 void
 vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared)
 {
+	vm_object_t obj;
 	u_int x;
 
-	vm_page_assert_locked(m);
+	obj = m->object;
+	vm_page_lock_assert(m, MA_NOTOWNED);
+	VM_OBJECT_ASSERT_LOCKED(obj);
 
+	sleepq_lock(m);
 	x = m->busy_lock;
 	if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) ||
 	    ((x & VPB_BIT_WAITERS) == 0 &&
 	    !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) {
-		vm_page_unlock(m);
+		VM_OBJECT_DROP(obj);
+		sleepq_release(m);
 		return;
 	}
-	msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
+	VM_OBJECT_DROP(obj);
+	sleepq_add(m, NULL, wmesg, 0, 0);
+	sleepq_wait(m, PVM);
 }
 
 /*
@@ -992,55 +978,20 @@ vm_page_trysbusy(vm_page_t m)
 {
 	u_int x;
 
+	x = m->busy_lock;
 	for (;;) {
-		x = m->busy_lock;
 		if ((x & VPB_BIT_SHARED) == 0)
 			return (0);
-		if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER))
+		if (atomic_fcmpset_acq_int(&m->busy_lock, &x,
+		    x + VPB_ONE_SHARER))
 			return (1);
 	}
 }
 
-static void
-vm_page_xunbusy_locked(vm_page_t m)
-{
-
-	vm_page_assert_xbusied(m);
-	vm_page_assert_locked(m);
-
-	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
-	/* There is a waiter, do wakeup() instead of vm_page_flash(). */
-	wakeup(m);
-}
-
-void
-vm_page_xunbusy_maybelocked(vm_page_t m)
-{
-	bool lockacq;
-
-	vm_page_assert_xbusied(m);
-
-	/*
-	 * Fast path for unbusy.  If it succeeds, we know that there
-	 * are no waiters, so we do not need a wakeup.
-	 */
-	if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER,
-	    VPB_UNBUSIED))
-		return;
-
-	lockacq = !mtx_owned(vm_page_lockptr(m));
-	if (lockacq)
-		vm_page_lock(m);
-	vm_page_xunbusy_locked(m);
-	if (lockacq)
-		vm_page_unlock(m);
-}
-
 /*
  *	vm_page_xunbusy_hard:
  *
- *	Called after the first try the exclusive unbusy of a page failed.
- *	It is assumed that the waiters bit is on.
+ *	Called when unbusy has failed because there is a waiter.
  */
 void
 vm_page_xunbusy_hard(vm_page_t m)
@@ -1048,34 +999,10 @@ vm_page_xunbusy_hard(vm_page_t m)
 
 	vm_page_assert_xbusied(m);
 
-	vm_page_lock(m);
-	vm_page_xunbusy_locked(m);
-	vm_page_unlock(m);
-}
-
-/*
- *	vm_page_flash:
- *
- *	Wakeup anyone waiting for the page.
- *	The ownership bits do not change.
- *
- *	The given page must be locked.
- */
-void
-vm_page_flash(vm_page_t m)
-{
-	u_int x;
-
-	vm_page_lock_assert(m, MA_OWNED);
-
-	for (;;) {
-		x = m->busy_lock;
-		if ((x & VPB_BIT_WAITERS) == 0)
-			return;
-		if (atomic_cmpset_int(&m->busy_lock, x,
-		    x & (~VPB_BIT_WAITERS)))
-			break;
-	}
+	/*
+	 * Wake the waiter.
+	 */
+	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 	wakeup(m);
 }
 
@@ -1264,7 +1191,7 @@ vm_page_readahead_finish(vm_page_t m)
 /*
  *	vm_page_sleep_if_busy:
  *
- *	Sleep and release the page queues lock if the page is busied.
+ *	Sleep and release the object lock if the page is busied.
  *	Returns TRUE if the thread slept.
  *
  *	The given page must be unlocked and object containing it must
@@ -1287,8 +1214,6 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 		 * held by the callers.
 		 */
 		obj = m->object;
-		vm_page_lock(m);
-		VM_OBJECT_WUNLOCK(obj);
 		vm_page_busy_sleep(m, msg, false);
 		VM_OBJECT_WLOCK(obj);
 		return (TRUE);
@@ -1297,6 +1222,39 @@ vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 }
 
 /*
+ *	vm_page_sleep_if_xbusy:
+ *
+ *	Sleep and release the object lock if the page is xbusied.
+ *	Returns TRUE if the thread slept.
+ *
+ *	The given page must be unlocked and object containing it must
+ *	be locked.
+ */
+int
+vm_page_sleep_if_xbusy(vm_page_t m, const char *msg)
+{
+	vm_object_t obj;
+
+	vm_page_lock_assert(m, MA_NOTOWNED);
+	VM_OBJECT_ASSERT_WLOCKED(m->object);
+
+	if (vm_page_xbusied(m)) {
+		/*
+		 * The page-specific object must be cached because page
+		 * identity can change during the sleep, causing the
+		 * re-lock of a different object.
+		 * It is assumed that a reference to the object is already
+		 * held by the callers.
+		 */
+		obj = m->object;
+		vm_page_busy_sleep(m, msg, true);
+		VM_OBJECT_WLOCK(obj);
+		return (TRUE);
+	}
+	return (FALSE);
+}
+
+/*
  *	vm_page_dirty_KBI:		[ internal use only ]
  *
  *	Set all bits in the page's dirty field.
@@ -1452,7 +1410,7 @@ vm_page_object_remove(vm_page_t m)
 	KASSERT((m->ref_count & VPRC_OBJREF) != 0,
 	    ("page %p is missing its object ref", m));
 	if (vm_page_xbusied(m))
-		vm_page_xunbusy_maybelocked(m);
+		vm_page_xunbusy(m);
 	mrem = vm_radix_remove(&object->rtree, m->pindex);
 	KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
 
@@ -1598,7 +1556,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm
 
 	mold->object = NULL;
 	atomic_clear_int(&mold->ref_count, VPRC_OBJREF);
-	vm_page_xunbusy_maybelocked(mold);
+	vm_page_xunbusy(mold);
 
 	/*
 	 * The object's resident_page_count does not change because we have
@@ -4089,8 +4047,6 @@ retrylookup:
 			 * likely to reclaim it.
 			 */
 			vm_page_aflag_set(m, PGA_REFERENCED);
-			vm_page_lock(m);
-			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(m, "pgrbwt", (allocflags &
 			    VM_ALLOC_IGN_SBUSY) != 0);
 			VM_OBJECT_WLOCK(object);
@@ -4188,8 +4144,6 @@ retrylookup:
 				 * likely to reclaim it.
 				 */
 				vm_page_aflag_set(m, PGA_REFERENCED);
-				vm_page_lock(m);
-				VM_OBJECT_WUNLOCK(object);
 				vm_page_busy_sleep(m, "grbmaw", (allocflags &
 				    VM_ALLOC_IGN_SBUSY) != 0);
 				VM_OBJECT_WLOCK(object);

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Tue Sep 10 17:55:07 2019	(r352173)
+++ head/sys/vm/vm_page.h	Tue Sep 10 18:27:45 2019	(r352174)
@@ -541,7 +541,6 @@ malloc2vm_flags(int malloc_flags)
 
 void vm_page_busy_downgrade(vm_page_t m);
 void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
-void vm_page_flash(vm_page_t m);
 void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
 
@@ -604,6 +603,7 @@ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t
     vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
+int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
 void vm_page_sunbusy(vm_page_t m);
 void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq);
@@ -618,7 +618,6 @@ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr,
 void vm_page_wire(vm_page_t);
 bool vm_page_wire_mapped(vm_page_t m);
 void vm_page_xunbusy_hard(vm_page_t m);
-void vm_page_xunbusy_maybelocked(vm_page_t m);
 void vm_page_set_validclean (vm_page_t, int, int);
 void vm_page_clear_dirty (vm_page_t, int, int);
 void vm_page_set_invalid (vm_page_t, int, int);


More information about the svn-src-all mailing list