svn commit: r311147 - in head/sys: sys vm

Mark Johnston markj at FreeBSD.org
Tue Jan 3 00:05:46 UTC 2017


Author: markj
Date: Tue Jan  3 00:05:44 2017
New Revision: 311147
URL: https://svnweb.freebsd.org/changeset/base/311147

Log:
  Add a page queue for holding dirty anonymous unswappable pages.
  
  On systems without a configured swap device, an attempt to launder pages
  from a swap object will always fail and result in the page being
  reactivated. This means that the page daemon will continuously scan pages
  that can never be evicted. With this change, anonymous pages are instead
  moved to PQ_UNSWAPPABLE after a failed laundering attempt when no swap
  devices are configured. PQ_UNSWAPPABLE is not scanned unless a swap device
  is configured, so unreferenced unswappable pages are excluded from the page
  daemon's workload.
  
  Reviewed by:	alc

Modified:
  head/sys/sys/eventhandler.h
  head/sys/vm/swap_pager.c
  head/sys/vm/swap_pager.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c

Modified: head/sys/sys/eventhandler.h
==============================================================================
--- head/sys/sys/eventhandler.h	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/sys/eventhandler.h	Tue Jan  3 00:05:44 2017	(r311147)
@@ -277,4 +277,11 @@ typedef void (*ada_probe_veto_fn)(void *
     struct ata_params *, int *);
 EVENTHANDLER_DECLARE(ada_probe_veto, ada_probe_veto_fn);
 
+/* Swap device events */
+struct swdevt;
+typedef void (*swapon_fn)(void *, struct swdevt *);
+typedef void (*swapoff_fn)(void *, struct swdevt *);
+EVENTHANDLER_DECLARE(swapon, swapon_fn);
+EVENTHANDLER_DECLARE(swapoff, swapoff_fn);
+
 #endif /* _SYS_EVENTHANDLER_H_ */

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/vm/swap_pager.c	Tue Jan  3 00:05:44 2017	(r311147)
@@ -1632,6 +1632,13 @@ swap_pager_isswapped(vm_object_t object,
 	return (0);
 }
 
+int
+swap_pager_nswapdev(void)
+{
+
+	return (nswapdev);
+}
+
 /*
  * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
  *
@@ -1750,6 +1757,7 @@ restart:
 		pause("swpoff", hz / 20);
 		goto full_rescan;
 	}
+	EVENTHANDLER_INVOKE(swapoff, sp);
 }
 
 /************************************************************************
@@ -2209,6 +2217,7 @@ swaponsomething(struct vnode *vp, void *
 	swapon_check_swzone(swap_total / PAGE_SIZE);
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
+	EVENTHANDLER_INVOKE(swapon, sp);
 }
 
 /*

Modified: head/sys/vm/swap_pager.h
==============================================================================
--- head/sys/vm/swap_pager.h	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/vm/swap_pager.h	Tue Jan  3 00:05:44 2017	(r311147)
@@ -83,6 +83,7 @@ vm_pindex_t swap_pager_find_least(vm_obj
 void swap_pager_freespace(vm_object_t, vm_pindex_t, vm_size_t);
 void swap_pager_swap_init(void);
 int swap_pager_isswapped(vm_object_t, struct swdevt *);
+int swap_pager_nswapdev(void);
 int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_size_t);
 void swap_pager_status(int *total, int *used);
 void swapoff_all(void);

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/vm/vm_page.c	Tue Jan  3 00:05:44 2017	(r311147)
@@ -393,6 +393,11 @@ vm_page_domain_init(struct vm_domain *vm
 	    "vm laundry pagequeue";
 	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
 	    &vm_cnt.v_laundry_count;
+	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
+	    "vm unswappable pagequeue";
+	/* Unswappable dirty pages are counted as being in the laundry. */
+	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) =
+	    &vm_cnt.v_laundry_count;
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
@@ -2578,7 +2583,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_enqueue: invalid queue %u request for page %p",
 	    queue, m));
-	if (queue == PQ_LAUNDRY)
+	if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE)
 		pq = &vm_dom[0].vmd_pagequeues[queue];
 	else
 		pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
@@ -2947,6 +2952,23 @@ vm_page_launder(vm_page_t m)
 }
 
 /*
+ * vm_page_unswappable
+ *
+ *	Put a page in the PQ_UNSWAPPABLE holding queue.
+ */
+void
+vm_page_unswappable(vm_page_t m)
+{
+
+	vm_page_assert_locked(m);
+	KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0,
+	    ("page %p already unswappable", m));
+	if (m->queue != PQ_NONE)
+		vm_page_dequeue(m);
+	vm_page_enqueue(PQ_UNSWAPPABLE, m);
+}
+
+/*
  * vm_page_try_to_free()
  *
  *	Attempt to free the page.  If we cannot free it, we do nothing.
@@ -3534,13 +3556,14 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pag
 	db_printf("pq_free %d\n", vm_cnt.v_free_count);
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf(
-	    "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d\n",
+    "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
 		    dom,
 		    vm_dom[dom].vmd_page_count,
 		    vm_dom[dom].vmd_free_count,
 		    vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
-		    vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt);
+		    vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt,
+		    vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt);
 	}
 }
 

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/vm/vm_page.h	Tue Jan  3 00:05:44 2017	(r311147)
@@ -207,7 +207,8 @@ struct vm_page {
 #define	PQ_INACTIVE	0
 #define	PQ_ACTIVE	1
 #define	PQ_LAUNDRY	2
-#define	PQ_COUNT	3
+#define	PQ_UNSWAPPABLE	3
+#define	PQ_COUNT	4
 
 TAILQ_HEAD(pglist, vm_page);
 SLIST_HEAD(spglist, vm_page);
@@ -347,7 +348,7 @@ extern struct mtx_padalign pa_lock[];
 #include <machine/atomic.h>
 
 /*
- * Each pageable resident page falls into one of four lists:
+ * Each pageable resident page falls into one of five lists:
  *
  *	free
  *		Available for allocation now.
@@ -360,6 +361,10 @@ extern struct mtx_padalign pa_lock[];
  *		This is the list of pages that should be
  *		paged out next.
  *
+ *	unswappable
+ *		Dirty anonymous pages that cannot be paged
+ *		out because no swap device is configured.
+ *
  *	active
  *		Pages that are "active", i.e., they have been
  *		recently referenced.
@@ -483,6 +488,7 @@ vm_offset_t vm_page_startup(vm_offset_t 
 void vm_page_sunbusy(vm_page_t m);
 int vm_page_trysbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
+void vm_page_unswappable(vm_page_t m);
 boolean_t vm_page_unwire(vm_page_t m, uint8_t queue);
 void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_wire (vm_page_t);
@@ -707,7 +713,7 @@ static inline bool
 vm_page_in_laundry(vm_page_t m)
 {
 
-	return (m->queue == PQ_LAUNDRY);
+	return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE);
 }
 
 #endif				/* _KERNEL */

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Mon Jan  2 22:05:05 2017	(r311146)
+++ head/sys/vm/vm_pageout.c	Tue Jan  3 00:05:44 2017	(r311147)
@@ -182,6 +182,7 @@ static int vm_pageout_update_period;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
+static int swapdev_enabled;
 
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled = 0;
@@ -568,12 +569,24 @@ vm_pageout_flush(vm_page_t *mc, int coun
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
-			 * If the page couldn't be paged out, then reactivate
-			 * it so that it doesn't clog the laundry and inactive
-			 * queues.  (We will try paging it out again later).
+			 * If the page couldn't be paged out to swap because the
+			 * pager wasn't able to find space, place the page in
+			 * the PQ_UNSWAPPABLE holding queue.  This is an
+			 * optimization that prevents the page daemon from
+			 * wasting CPU cycles on pages that cannot be reclaimed
+			 * becase no swap device is configured.
+			 *
+			 * Otherwise, reactivate the page so that it doesn't
+			 * clog the laundry and inactive queues.  (We will try
+			 * paging it out again later.)
 			 */
 			vm_page_lock(mt);
-			vm_page_activate(mt);
+			if (object->type == OBJT_SWAP &&
+			    pageout_status[i] == VM_PAGER_FAIL) {
+				vm_page_unswappable(mt);
+				numpagedout++;
+			} else
+				vm_page_activate(mt);
 			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
@@ -600,6 +613,21 @@ vm_pageout_flush(vm_page_t *mc, int coun
 	return (numpagedout);
 }
 
+static void
+vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused)
+{
+
+	atomic_store_rel_int(&swapdev_enabled, 1);
+}
+
+static void
+vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused)
+{
+
+	if (swap_pager_nswapdev() == 1)
+		atomic_store_rel_int(&swapdev_enabled, 0);
+}
+
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
@@ -893,7 +921,7 @@ vm_pageout_launder(struct vm_domain *vmd
 	vnodes_skipped = 0;
 
 	/*
-	 * Scan the laundry queue for pages eligible to be laundered.  We stop
+	 * Scan the laundry queues for pages eligible to be laundered.  We stop
 	 * once the target number of dirty pages have been laundered, or once
 	 * we've reached the end of the queue.  A single iteration of this loop
 	 * may cause more than one page to be laundered because of clustering.
@@ -901,11 +929,18 @@ vm_pageout_launder(struct vm_domain *vmd
 	 * maxscan ensures that we don't re-examine requeued pages.  Any
 	 * additional pages written as part of a cluster are subtracted from
 	 * maxscan since they must be taken from the laundry queue.
+	 *
+	 * As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no
+	 * swap devices are configured.
 	 */
-	pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
-	maxscan = pq->pq_cnt;
+	if (atomic_load_acq_int(&swapdev_enabled))
+		pq = &vmd->vmd_pagequeues[PQ_UNSWAPPABLE];
+	else
+		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
 
+scan:
 	vm_pagequeue_lock(pq);
+	maxscan = pq->pq_cnt;
 	queue_locked = true;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	    m != NULL && maxscan-- > 0 && launder > 0;
@@ -1070,6 +1105,11 @@ relock_queue:
 	}
 	vm_pagequeue_unlock(pq);
 
+	if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) {
+		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+		goto scan;
+	}
+
 	/*
 	 * Wakeup the sync daemon if we skipped a vnode in a writeable object
 	 * and we didn't launder enough pages.
@@ -1132,6 +1172,14 @@ vm_pageout_laundry_worker(void *arg)
 	last_launder = 0;
 
 	/*
+	 * Calls to these handlers are serialized by the swap syscall lock.
+	 */
+	(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain,
+	    EVENTHANDLER_PRI_ANY);
+	(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain,
+	    EVENTHANDLER_PRI_ANY);
+
+	/*
 	 * The pageout laundry worker is never done, so loop forever.
 	 */
 	for (;;) {
@@ -1492,18 +1540,22 @@ drop_page:
 	/*
 	 * Wake up the laundry thread so that it can perform any needed
 	 * laundering.  If we didn't meet our target, we're in shortfall and
-	 * need to launder more aggressively.
+	 * need to launder more aggressively.  If PQ_LAUNDRY is empty and no
+	 * swap devices are configured, the laundry thread has no work to do, so
+	 * don't bother waking it up.
 	 */
 	if (vm_laundry_request == VM_LAUNDRY_IDLE &&
 	    starting_page_shortage > 0) {
 		pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
 		vm_pagequeue_lock(pq);
-		if (page_shortage > 0) {
-			vm_laundry_request = VM_LAUNDRY_SHORTFALL;
-			PCPU_INC(cnt.v_pdshortfalls);
-		} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
-			vm_laundry_request = VM_LAUNDRY_BACKGROUND;
-		wakeup(&vm_laundry_request);
+		if (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled)) {
+			if (page_shortage > 0) {
+				vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+				PCPU_INC(cnt.v_pdshortfalls);
+			} else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
+				vm_laundry_request = VM_LAUNDRY_BACKGROUND;
+			wakeup(&vm_laundry_request);
+		}
 		vm_pagequeue_unlock(pq);
 	}
 


More information about the svn-src-head mailing list