svn commit: r356157 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs dev/md fs/tmpfs kern vm

Mark Johnston markj at FreeBSD.org
Sat Dec 28 19:04:03 UTC 2019


Author: markj
Date: Sat Dec 28 19:04:00 2019
New Revision: 356157
URL: https://svnweb.freebsd.org/changeset/base/356157

Log:
  Remove page locking for queue operations.
  
  With the previous reviews, the page lock is no longer required in order
  to perform queue operations on a page.  It is also no longer needed in
  the page queue scans.  This change effectively eliminates remaining uses
  of the page lock and also the false sharing caused by multiple pages
  sharing a page lock.
  
  Reviewed by:	jeff
  Tested by:	pho
  Sponsored by:	Netflix, Intel
  Differential Revision:	https://reviews.freebsd.org/D22885

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/dev/md/md.c
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/kern/uipc_shm.c
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c
  head/sys/vm/vm_swapout.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1761,12 +1761,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
 		bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
 		zfs_unmap_page(sf);
 		vm_page_valid(m);
-		vm_page_lock(m);
 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 			vm_page_activate(m);
 		else
 			vm_page_deactivate(m);
-		vm_page_unlock(m);
 		vm_page_sunbusy(m);
 	}
 	*rbehind = i;
@@ -1884,12 +1882,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
 		}
 		zfs_unmap_page(sf);
 		vm_page_valid(m);
-		vm_page_lock(m);
 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 			vm_page_activate(m);
 		else
 			vm_page_deactivate(m);
-		vm_page_unlock(m);
 		vm_page_sunbusy(m);
 	}
 	*rahead = i;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -545,9 +545,7 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
 			zfs_vmobject_wlock(obj);
 			if (error == 0) {
 				vm_page_valid(pp);
-				vm_page_lock(pp);
 				vm_page_activate(pp);
-				vm_page_unlock(pp);
 			}
 			vm_page_sunbusy(pp);
 			if (error != 0 && !vm_page_wired(pp) &&

Modified: head/sys/dev/md/md.c
==============================================================================
--- head/sys/dev/md/md.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/dev/md/md.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1145,12 +1145,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 		}
 		if (m != NULL) {
 			vm_page_xunbusy(m);
-			vm_page_lock(m);
-			if (vm_page_active(m))
-				vm_page_reference(m);
-			else
-				vm_page_activate(m);
-			vm_page_unlock(m);
+			vm_page_reference(m);
 		}
 
 		/* Actions on further pages start at offset 0 */

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_subr.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/fs/tmpfs/tmpfs_subr.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1490,9 +1490,7 @@ retry:
 					 * current operation is not regarded
 					 * as an access.
 					 */
-					vm_page_lock(m);
 					vm_page_launder(m);
-					vm_page_unlock(m);
 				} else {
 					vm_page_free(m);
 					if (ignerr)

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/kern/uipc_shm.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -209,9 +209,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struc
 	error = uiomove_fromphys(&m, offset, tlen, uio);
 	if (uio->uio_rw == UIO_WRITE && error == 0)
 		vm_page_set_dirty(m);
-	vm_page_lock(m);
 	vm_page_activate(m);
-	vm_page_unlock(m);
 	vm_page_sunbusy(m);
 
 	return (error);

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/swap_pager.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1630,10 +1630,9 @@ swp_pager_async_iodone(struct buf *bp)
 				 * then finish the I/O.
 				 */
 				MPASS(m->dirty == VM_PAGE_BITS_ALL);
+
 				/* PQ_UNSWAPPABLE? */
-				vm_page_lock(m);
 				vm_page_activate(m);
-				vm_page_unlock(m);
 				vm_page_sunbusy(m);
 			}
 		} else if (bp->b_iocmd == BIO_READ) {
@@ -1668,9 +1667,7 @@ swp_pager_async_iodone(struct buf *bp)
 			    ("swp_pager_async_iodone: page %p is not write"
 			    " protected", m));
 			vm_page_undirty(m);
-			vm_page_lock(m);
 			vm_page_deactivate_noreuse(m);
-			vm_page_unlock(m);
 			vm_page_sunbusy(m);
 		}
 	}
@@ -1718,10 +1715,8 @@ swp_pager_force_dirty(vm_page_t m)
 
 	vm_page_dirty(m);
 #ifdef INVARIANTS
-	vm_page_lock(m);
 	if (!vm_page_wired(m) && m->a.queue == PQ_NONE)
 		panic("page %p is neither wired nor queued", m);
-	vm_page_unlock(m);
 #endif
 	vm_page_xunbusy(m);
 	swap_pager_unswapped(m);
@@ -1732,9 +1727,7 @@ swp_pager_force_launder(vm_page_t m)
 {
 
 	vm_page_dirty(m);
-	vm_page_lock(m);
 	vm_page_launder(m);
-	vm_page_unlock(m);
 	vm_page_xunbusy(m);
 	swap_pager_unswapped(m);
 }

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_fault.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -162,9 +162,7 @@ fault_page_release(vm_page_t *mp)
 		 * this page.  Deactivating it leaves it available for
 		 * pageout while optimizing fault restarts.
 		 */
-		vm_page_lock(m);
 		vm_page_deactivate(m);
-		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 		*mp = NULL;
 	}
@@ -395,9 +393,7 @@ vm_fault_populate_cleanup(vm_object_t object, vm_pinde
 	for (pidx = first, m = vm_page_lookup(object, pidx);
 	    pidx <= last; pidx++, m = vm_page_next(m)) {
 		vm_fault_populate_check_page(m);
-		vm_page_lock(m);
 		vm_page_deactivate(m);
-		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 	}
 }
@@ -406,7 +402,6 @@ static int
 vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
     int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
-	struct mtx *m_mtx;
 	vm_offset_t vaddr;
 	vm_page_t m;
 	vm_pindex_t map_first, map_last, pager_first, pager_last, pidx;
@@ -518,22 +513,17 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
 		MPASS(rv == KERN_SUCCESS);
 #endif
 		VM_OBJECT_WLOCK(fs->first_object);
-		m_mtx = NULL;
 		for (i = 0; i < npages; i++) {
-			if ((fault_flags & VM_FAULT_WIRE) != 0) {
+			if ((fault_flags & VM_FAULT_WIRE) != 0)
 				vm_page_wire(&m[i]);
-			} else {
-				vm_page_change_lock(&m[i], &m_mtx);
+			else
 				vm_page_activate(&m[i]);
-			}
 			if (m_hold != NULL && m[i].pindex == fs->first_pindex) {
 				*m_hold = &m[i];
 				vm_page_wire(&m[i]);
 			}
 			vm_page_xunbusy(&m[i]);
 		}
-		if (m_mtx != NULL)
-			mtx_unlock(m_mtx);
 	}
 	curthread->td_ru.ru_majflt++;
 	return (KERN_SUCCESS);
@@ -1393,13 +1383,10 @@ readrest:
 	 * If the page is not wired down, then put it where the pageout daemon
 	 * can find it.
 	 */
-	if ((fault_flags & VM_FAULT_WIRE) != 0) {
+	if ((fault_flags & VM_FAULT_WIRE) != 0)
 		vm_page_wire(fs.m);
-	} else {
-		vm_page_lock(fs.m);
+	else
 		vm_page_activate(fs.m);
-		vm_page_unlock(fs.m);
-	}
 	if (m_hold != NULL) {
 		*m_hold = fs.m;
 		vm_page_wire(fs.m);
@@ -1499,12 +1486,13 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offs
 				 * Typically, at this point, prefetched pages
 				 * are still in the inactive queue.  Only
 				 * pages that triggered page faults are in the
-				 * active queue.
+				 * active queue.  The test for whether the page
+				 * is in the inactive queue is racy; in the
+				 * worst case we will requeue the page
+				 * unnecessarily.
 				 */
-				vm_page_lock(m);
 				if (!vm_page_inactive(m))
 					vm_page_deactivate(m);
-				vm_page_unlock(m);
 			}
 		}
 	}
@@ -1879,9 +1867,7 @@ again:
 				    ("dst_m %p is not wired", dst_m));
 			}
 		} else {
-			vm_page_lock(dst_m);
 			vm_page_activate(dst_m);
-			vm_page_unlock(dst_m);
 		}
 		vm_page_xunbusy(dst_m);
 	}

Modified: head/sys/vm/vm_object.c
==============================================================================
--- head/sys/vm/vm_object.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_object.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1293,9 +1293,7 @@ next_page:
 			vm_page_busy_sleep(tm, "madvpo", false);
   			goto relookup;
 		}
-		vm_page_lock(tm);
 		vm_page_advise(tm, advice);
-		vm_page_unlock(tm);
 		vm_page_xunbusy(tm);
 		vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
 next_pindex:
@@ -2059,7 +2057,6 @@ wired:
 void
 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
-	struct mtx *mtx;
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
@@ -2073,14 +2070,10 @@ vm_object_page_noreuse(vm_object_t object, vm_pindex_t
 	 * Here, the variable "p" is either (1) the page with the least pindex
 	 * greater than or equal to the parameter "start" or (2) NULL. 
 	 */
-	mtx = NULL;
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
-		vm_page_change_lock(p, &mtx);
 		vm_page_deactivate_noreuse(p);
 	}
-	if (mtx != NULL)
-		mtx_unlock(mtx);
 }
 
 /*

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_page.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -1371,12 +1371,10 @@ vm_page_readahead_finish(vm_page_t m)
 	 * have shown that deactivating the page is usually the best choice,
 	 * unless the page is wanted by another thread.
 	 */
-	vm_page_lock(m);
 	if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 		vm_page_activate(m);
 	else
 		vm_page_deactivate(m);
-	vm_page_unlock(m);
 	vm_page_xunbusy_unchecked(m);
 }
 
@@ -3651,7 +3649,6 @@ static void
 vm_page_enqueue(vm_page_t m, uint8_t queue)
 {
 
-	vm_page_assert_locked(m);
 	KASSERT(m->a.queue == PQ_NONE &&
 	    (m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
 	    ("%s: page %p is already enqueued", __func__, m));
@@ -4124,7 +4121,6 @@ void
 vm_page_unswappable(vm_page_t m)
 {
 
-	vm_page_assert_locked(m);
 	KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0,
 	    ("page %p already unswappable", m));
 
@@ -4222,9 +4218,7 @@ vm_page_release_locked(vm_page_t m, int flags)
 		    m->dirty == 0 && vm_page_tryxbusy(m)) {
 			vm_page_free(m);
 		} else {
-			vm_page_lock(m);
 			vm_page_release_toq(m, PQ_INACTIVE, flags != 0);
-			vm_page_unlock(m);
 		}
 	}
 }
@@ -4293,7 +4287,6 @@ void
 vm_page_advise(vm_page_t m, int advice)
 {
 
-	vm_page_assert_locked(m);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (advice == MADV_FREE)
 		/*
@@ -4309,14 +4302,14 @@ vm_page_advise(vm_page_t m, int advice)
 		return;
 	}
 
+	if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
+		vm_page_dirty(m);
+
 	/*
 	 * Clear any references to the page.  Otherwise, the page daemon will
 	 * immediately reactivate the page.
 	 */
 	vm_page_aflag_clear(m, PGA_REFERENCED);
-
-	if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
-		vm_page_dirty(m);
 
 	/*
 	 * Place clean pages near the head of the inactive queue rather than

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_page.h	Sat Dec 28 19:04:00 2019	(r356157)
@@ -808,12 +808,6 @@ vm_page_aflag_clear(vm_page_t m, uint16_t bits)
 	uint32_t *addr, val;
 
 	/*
-	 * The PGA_REFERENCED flag can only be cleared if the page is locked.
-	 */
-	if ((bits & PGA_REFERENCED) != 0)
-		vm_page_assert_locked(m);
-
-	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
@@ -920,8 +914,6 @@ _vm_page_queue(vm_page_astate_t as)
 static inline uint8_t
 vm_page_queue(vm_page_t m)
 {
-
-	vm_page_assert_locked(m);
 
 	return (_vm_page_queue(vm_page_astate_load(m)));
 }

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_pageout.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -396,14 +396,11 @@ more:
 			vm_page_xunbusy(p);
 			break;
 		}
-		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
-			vm_page_unlock(p);
 			vm_page_xunbusy(p);
 			ib = 0;
 			break;
 		}
-		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
 		++ib;
@@ -429,13 +426,10 @@ more:
 			vm_page_xunbusy(p);
 			break;
 		}
-		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
-			vm_page_unlock(p);
 			vm_page_xunbusy(p);
 			break;
 		}
-		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
 		++is;
@@ -511,10 +505,12 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
 		    ("vm_pageout_flush: page %p is not write protected", mt));
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
-			vm_page_lock(mt);
+			/*
+			 * The page may have moved since laundering started, in
+			 * which case it should be left alone.
+			 */
 			if (vm_page_in_laundry(mt))
 				vm_page_deactivate_noreuse(mt);
-			vm_page_unlock(mt);
 			/* FALLTHROUGH */
 		case VM_PAGER_PEND:
 			numpagedout++;
@@ -527,10 +523,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
 			 * the page daemon.
 			 */
 			vm_page_undirty(mt);
-			vm_page_lock(mt);
 			if (vm_page_in_laundry(mt))
 				vm_page_deactivate_noreuse(mt);
-			vm_page_unlock(mt);
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
@@ -546,14 +540,12 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
 			 * clog the laundry and inactive queues.  (We will try
 			 * paging it out again later.)
 			 */
-			vm_page_lock(mt);
 			if (object->type == OBJT_SWAP &&
 			    pageout_status[i] == VM_PAGER_FAIL) {
 				vm_page_unswappable(mt);
 				numpagedout++;
 			} else
 				vm_page_activate(mt);
-			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
 			break;
@@ -611,7 +603,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 	vm_pindex_t pindex;
 	int error, lockmode;
 
-	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	error = 0;
@@ -631,7 +622,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 	 * of time.
 	 */
 	if (object->type == OBJT_VNODE) {
-		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 		vp = object->handle;
 		if (vp->v_type == VREG &&
@@ -662,11 +652,9 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 			error = ENOENT;
 			goto unlock_all;
 		}
-		vm_page_lock(m);
 
 		/*
-		 * While the object and page were unlocked, the page
-		 * may have been:
+		 * While the object was unlocked, the page may have been:
 		 * (1) moved to a different queue,
 		 * (2) reallocated to a different object,
 		 * (3) reallocated to a different offset, or
@@ -674,17 +662,15 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 		 */
 		if (!vm_page_in_laundry(m) || m->object != object ||
 		    m->pindex != pindex || m->dirty == 0) {
-			vm_page_unlock(m);
 			error = ENXIO;
 			goto unlock_all;
 		}
 
 		/*
-		 * The page may have been busied while the object and page
-		 * locks were released.
+		 * The page may have been busied while the object lock was
+		 * released.
 		 */
 		if (vm_page_tryxbusy(m) == 0) {
-			vm_page_unlock(m);
 			error = EBUSY;
 			goto unlock_all;
 		}
@@ -695,11 +681,9 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 	 */
 	if (!vm_page_try_remove_write(m)) {
 		vm_page_xunbusy(m);
-		vm_page_unlock(m);
 		error = EBUSY;
 		goto unlock_all;
 	}
-	vm_page_unlock(m);
 
 	/*
 	 * If a page is dirty, then it is either being washed
@@ -714,7 +698,6 @@ unlock_all:
 	VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
-	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (mp != NULL) {
 		if (vp != NULL)
 			vput(vp);
@@ -735,7 +718,6 @@ vm_pageout_launder(struct vm_domain *vmd, int launder,
 {
 	struct scan_state ss;
 	struct vm_pagequeue *pq;
-	struct mtx *mtx;
 	vm_object_t object;
 	vm_page_t m, marker;
 	vm_page_astate_t new, old;
@@ -743,7 +725,6 @@ vm_pageout_launder(struct vm_domain *vmd, int launder,
 	int vnodes_skipped;
 	bool pageout_ok;
 
-	mtx = NULL;
 	object = NULL;
 	starting_target = launder;
 	vnodes_skipped = 0;
@@ -771,9 +752,6 @@ scan:
 		if (__predict_false((m->flags & PG_MARKER) != 0))
 			continue;
 
-		vm_page_change_lock(m, &mtx);
-
-recheck:
 		/*
 		 * Don't touch a page that was removed from the queue after the
 		 * page queue lock was released.  Otherwise, ensure that any
@@ -783,46 +761,39 @@ recheck:
 		if (vm_pageout_defer(m, queue, true))
 			continue;
 
-		if (object != m->object) {
+		/*
+		 * Lock the page's object.
+		 */
+		if (object == NULL || object != m->object) {
 			if (object != NULL)
 				VM_OBJECT_WUNLOCK(object);
-
-			/*
-			 * A page's object pointer may be set to NULL before
-			 * the object lock is acquired.
-			 */
 			object = (vm_object_t)atomic_load_ptr(&m->object);
-			if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
-				mtx_unlock(mtx);
-				/* Depends on type-stability. */
-				VM_OBJECT_WLOCK(object);
-				mtx_lock(mtx);
-				goto recheck;
+			if (__predict_false(object == NULL))
+				/* The page is being freed by another thread. */
+				continue;
+
+			/* Depends on type-stability. */
+			VM_OBJECT_WLOCK(object);
+			if (__predict_false(m->object != object)) {
+				VM_OBJECT_WUNLOCK(object);
+				object = NULL;
+				continue;
 			}
 		}
-		if (__predict_false(m->object == NULL))
-			/*
-			 * The page has been removed from its object.
-			 */
-			continue;
-		KASSERT(m->object == object, ("page %p does not belong to %p",
-		    m, object));
 
 		if (vm_page_tryxbusy(m) == 0)
 			continue;
 
 		/*
 		 * Check for wirings now that we hold the object lock and have
-		 * verified that the page is unbusied.  If the page is mapped,
-		 * it may still be wired by pmap lookups.  The call to
+		 * exclusively busied the page.  If the page is mapped, it may
+		 * still be wired by pmap lookups.  The call to
 		 * vm_page_try_remove_all() below atomically checks for such
 		 * wirings and removes mappings.  If the page is unmapped, the
-		 * wire count is guaranteed not to increase.
+		 * wire count is guaranteed not to increase after this check.
 		 */
-		if (__predict_false(vm_page_wired(m))) {
-			vm_page_dequeue_deferred(m);
+		if (__predict_false(vm_page_wired(m)))
 			goto skip_page;
-		}
 
 		/*
 		 * Invalid pages can be easily freed.  They cannot be
@@ -898,10 +869,8 @@ recheck:
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
-			if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
-				vm_page_dequeue_deferred(m);
+			if (m->dirty == 0 && !vm_page_try_remove_all(m))
 				goto skip_page;
-			}
 		}
 
 		/*
@@ -912,6 +881,13 @@ recheck:
 		 */
 		if (m->dirty == 0) {
 free_page:
+			/*
+			 * Now we are guaranteed that no other threads are
+			 * manipulating the page, check for a last-second
+			 * reference.
+			 */
+			if (vm_pageout_defer(m, queue, true))
+				goto skip_page;
 			vm_page_free(m);
 			VM_CNT_INC(v_dfree);
 		} else if ((object->flags & OBJ_DEAD) == 0) {
@@ -948,17 +924,12 @@ free_page:
 				pageout_lock_miss++;
 				vnodes_skipped++;
 			}
-			mtx = NULL;
 			object = NULL;
 		} else {
 skip_page:
 			vm_page_xunbusy(m);
 		}
 	}
-	if (mtx != NULL) {
-		mtx_unlock(mtx);
-		mtx = NULL;
-	}
 	if (object != NULL) {
 		VM_OBJECT_WUNLOCK(object);
 		object = NULL;
@@ -1195,7 +1166,6 @@ static void
 vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)
 {
 	struct scan_state ss;
-	struct mtx *mtx;
 	vm_object_t object;
 	vm_page_t m, marker;
 	struct vm_pagequeue *pq;
@@ -1236,7 +1206,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page
 	 * and scanning resumes.
 	 */
 	max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan;
-	mtx = NULL;
 act_scan:
 	vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);
 	while ((m = vm_pageout_next(&ss, false)) != NULL) {
@@ -1255,8 +1224,6 @@ act_scan:
 		if (__predict_false((m->flags & PG_MARKER) != 0))
 			continue;
 
-		vm_page_change_lock(m, &mtx);
-
 		/*
 		 * Don't touch a page that was removed from the queue after the
 		 * page queue lock was released.  Otherwise, ensure that any
@@ -1390,10 +1357,6 @@ act_scan:
 
 		page_shortage -= ps_delta;
 	}
-	if (mtx != NULL) {
-		mtx_unlock(mtx);
-		mtx = NULL;
-	}
 	vm_pagequeue_lock(pq);
 	TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
 	TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);
@@ -1459,7 +1422,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
 {
 	struct scan_state ss;
 	struct vm_batchqueue rq;
-	struct mtx *mtx;
 	vm_page_t m, marker;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
@@ -1484,7 +1446,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
 	deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
 	starting_page_shortage = page_shortage = shortage + deficit;
 
-	mtx = NULL;
 	object = NULL;
 	vm_batchqueue_init(&rq);
 
@@ -1502,9 +1463,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
 		KASSERT((m->flags & PG_MARKER) == 0,
 		    ("marker page %p was dequeued", m));
 
-		vm_page_change_lock(m, &mtx);
-
-recheck:
 		/*
 		 * Don't touch a page that was removed from the queue after the
 		 * page queue lock was released.  Otherwise, ensure that any
@@ -1514,30 +1472,25 @@ recheck:
 		if (vm_pageout_defer(m, PQ_INACTIVE, false))
 			continue;
 
-		if (object != m->object) {
+		/*
+		 * Lock the page's object.
+		 */
+		if (object == NULL || object != m->object) {
 			if (object != NULL)
 				VM_OBJECT_WUNLOCK(object);
-
-			/*
-			 * A page's object pointer may be set to NULL before
-			 * the object lock is acquired.
-			 */
 			object = (vm_object_t)atomic_load_ptr(&m->object);
-			if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
-				mtx_unlock(mtx);
-				/* Depends on type-stability. */
-				VM_OBJECT_WLOCK(object);
-				mtx_lock(mtx);
-				goto recheck;
+			if (__predict_false(object == NULL))
+				/* The page is being freed by another thread. */
+				continue;
+
+			/* Depends on type-stability. */
+			VM_OBJECT_WLOCK(object);
+			if (__predict_false(m->object != object)) {
+				VM_OBJECT_WUNLOCK(object);
+				object = NULL;
+				goto reinsert;
 			}
 		}
-		if (__predict_false(m->object == NULL))
-			/*
-			 * The page has been removed from its object.
-			 */
-			continue;
-		KASSERT(m->object == object, ("page %p does not belong to %p",
-		    m, object));
 
 		if (vm_page_tryxbusy(m) == 0) {
 			/*
@@ -1557,17 +1510,15 @@ recheck:
 			vm_pager_page_unswapped(m);
 
 		/*
-		 * Re-check for wirings now that we hold the object lock and
-		 * have verified that the page is unbusied.  If the page is
-		 * mapped, it may still be wired by pmap lookups.  The call to
+		 * Check for wirings now that we hold the object lock and have
+		 * exclusively busied the page.  If the page is mapped, it may
+		 * still be wired by pmap lookups.  The call to
 		 * vm_page_try_remove_all() below atomically checks for such
 		 * wirings and removes mappings.  If the page is unmapped, the
-		 * wire count is guaranteed not to increase.
+		 * wire count is guaranteed not to increase after this check.
 		 */
-		if (__predict_false(vm_page_wired(m))) {
-			vm_page_dequeue_deferred(m);
+		if (__predict_false(vm_page_wired(m)))
 			goto skip_page;
-		}
 
 		/*
 		 * Invalid pages can be easily freed. They cannot be
@@ -1635,10 +1586,8 @@ recheck:
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
-			if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
-				vm_page_dequeue_deferred(m);
+			if (m->dirty == 0 && !vm_page_try_remove_all(m))
 				goto skip_page;
-			}
 		}
 
 		/*
@@ -1651,13 +1600,19 @@ recheck:
 		if (m->dirty == 0) {
 free_page:
 			/*
-			 * Because we dequeued the page and have already
-			 * checked for concurrent dequeue and enqueue
-			 * requests, we can safely disassociate the page
-			 * from the inactive queue.
+			 * Now we are guaranteed that no other threads are
+			 * manipulating the page, check for a last-second
+			 * reference that would save it from doom.
 			 */
-			KASSERT((m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
-			    ("page %p has queue state", m));
+			if (vm_pageout_defer(m, PQ_INACTIVE, false))
+				goto skip_page;
+
+			/*
+			 * Because we dequeued the page and have already checked
+			 * for pending dequeue and enqueue requests, we can
+			 * safely disassociate the page from the inactive queue
+			 * without holding the queue lock.
+			 */
 			m->a.queue = PQ_NONE;
 			vm_page_free(m);
 			page_shortage--;
@@ -1671,8 +1626,6 @@ skip_page:
 reinsert:
 		vm_pageout_reinsert_inactive(&ss, &rq, m);
 	}
-	if (mtx != NULL)
-		mtx_unlock(mtx);
 	if (object != NULL)
 		VM_OBJECT_WUNLOCK(object);
 	vm_pageout_reinsert_inactive(&ss, &rq, NULL);

Modified: head/sys/vm/vm_swapout.c
==============================================================================
--- head/sys/vm/vm_swapout.c	Sat Dec 28 19:03:46 2019	(r356156)
+++ head/sys/vm/vm_swapout.c	Sat Dec 28 19:04:00 2019	(r356157)
@@ -186,12 +186,10 @@ vm_swapout_object_deactivate_page(pmap_t pmap, vm_page
 		return;
 	}
 	if (!pmap_is_referenced(m)) {
-		vm_page_lock(m);
 		if (!vm_page_active(m))
 			(void)vm_page_try_remove_all(m);
 		else if (unmap && vm_page_try_remove_all(m))
 			vm_page_deactivate(m);
-		vm_page_unlock(m);
 	}
 	vm_page_xunbusy(m);
 }


More information about the svn-src-all mailing list