git: 1fee99800a79 - main - vm_page: Retire its listq field

Go to: [ bottom of page ] [ top of archives ] [ this month ]
From: Alan Cox <alc_at_FreeBSD.org>
Date: Sun, 08 Jun 2025 18:36:48 UTC
The branch main has been updated by alc:

URL: https://cgit.FreeBSD.org/src/commit/?id=1fee99800a79887b9037749a34d09f2acab082c0

commit 1fee99800a79887b9037749a34d09f2acab082c0
Author:     Alan Cox <alc@FreeBSD.org>
AuthorDate: 2025-05-27 08:27:16 +0000
Commit:     Alan Cox <alc@FreeBSD.org>
CommitDate: 2025-06-08 18:35:56 +0000

    vm_page: Retire its listq field
    
    Over the life cycle of a vm_page, its listq field has been used for two
    distinct purposes.  First, linking together all of the pages allocated
    to a vm_object.  Recently, c8d56817b80f ("vm_object: drop memq field")
    completed the elimination of this use case, using pctrie iterators in
    place of iteration over the listq.  Second, linking together power-of-
    two-sized chunks of free pages within vm_phys.  This change eliminates
    that use case.  In essence, this change reverts vm_phys back to using
    the plinks.q field, like it did before 5cd29d0f3cda ("Improve VM page
    queue scalability."), but with a twist to maintain scalability.  Just
    before vm_phys uses the plinks.q field, it ensures that any lazy dequeue
    from a paging queue, e.g., PQ_ACTIVE, has completed.  Typically, the
    dequeue has completed, so vm_page_dequeue() is infrequently called by
    vm_freelist_add().  The reason being that vm_phys only needs to use the
    plinks.q field within the first page of any power-of-two-sized chunk,
    so the rest of the pages can still have pending dequeues until the
    chunk is split.
    
    This change saves a non-trivial amount of memory, since we have an
    instance of struct vm_page for every dynamically allocatable physical
    page.
    
    Bump __FreeBSD_version, since third-party modules that use the inline
    accessors in vm_page.h may need to be recompiled.
    
    Reviewed by:    dougm, kib, markj
    Differential Revision:  https://reviews.freebsd.org/D50515
---
 sys/sys/param.h  |  2 +-
 sys/vm/vm_page.c | 22 +++++++++++++++-------
 sys/vm/vm_page.h |  1 -
 sys/vm/vm_phys.c | 20 +++++++++++++++-----
 4 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/sys/sys/param.h b/sys/sys/param.h
index da2089918323..e167c96cf9f8 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500045
+#define __FreeBSD_version 1500046
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 79eaf8563208..128a1ef7ca54 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -341,7 +341,7 @@ vm_page_blacklist_add(vm_paddr_t pa, bool verbose)
 	vm_domain_free_unlock(vmd);
 	if (found) {
 		vm_domain_freecnt_inc(vmd, -1);
-		TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
+		TAILQ_INSERT_TAIL(&blacklist_head, m, plinks.q);
 		if (verbose)
 			printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa);
 	}
@@ -411,7 +411,7 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
-	TAILQ_FOREACH(m, &blacklist_head, listq) {
+	TAILQ_FOREACH(m, &blacklist_head, plinks.q) {
 		sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
 		    (uintmax_t)m->phys_addr);
 		first = 0;
@@ -2470,6 +2470,13 @@ again:
 	}
 
 found:
+	/*
+	 * If the page comes from the free page cache, then it might still
+	 * have a pending deferred dequeue.  Specifically, when the page is
+	 * imported from a different pool by vm_phys_alloc_npages(), the
+	 * second, third, etc. pages in a non-zero order set could have
+	 * pending deferred dequeues.
+	 */
 	vm_page_dequeue(m);
 	vm_page_alloc_check(m);
 
@@ -2536,17 +2543,18 @@ vm_page_alloc_nofree_domain(int domain, int req)
 			return (NULL);
 		}
 		m->ref_count = count - 1;
-		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
 		VM_CNT_ADD(v_nofree_count, count);
 	}
 	m = TAILQ_FIRST(&vmd->vmd_nofreeq);
-	TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq);
+	TAILQ_REMOVE(&vmd->vmd_nofreeq, m, plinks.q);
 	if (m->ref_count > 0) {
 		vm_page_t m_next;
 
 		m_next = &m[1];
+		vm_page_dequeue(m_next);
 		m_next->ref_count = m->ref_count - 1;
-		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq);
+		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, plinks.q);
 		m->ref_count = 0;
 	}
 	vm_domain_free_unlock(vmd);
@@ -2566,7 +2574,7 @@ vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
 {
 	vm_domain_free_lock(vmd);
 	MPASS(m->ref_count == 0);
-	TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+	TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
 	vm_domain_free_unlock(vmd);
 	VM_CNT_ADD(v_nofree_count, 1);
 }
@@ -3971,7 +3979,7 @@ vm_page_dequeue(vm_page_t m)
 
 	old = vm_page_astate_load(m);
 	do {
-		if (old.queue == PQ_NONE) {
+		if (__predict_true(old.queue == PQ_NONE)) {
 			KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
 			    ("%s: page %p has unexpected queue state",
 			    __func__, m));
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 4bcd8d9f8236..4f82a69ebe25 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -229,7 +229,6 @@ struct vm_page {
 			void *zone;
 		} uma;
 	} plinks;
-	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) */
 	vm_object_t object;		/* which object am I in (O) */
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page (C) */
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 95bf6b61fe19..ba16ae551093 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -393,13 +393,23 @@ static void
 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
     int tail)
 {
+	/*
+	 * The paging queues and the free page lists utilize the same field,
+	 * plinks.q, within the vm_page structure.  When a physical page is
+	 * freed, it is lazily removed from the paging queues to reduce the
+	 * cost of removal through batching.  Here, we must ensure that any
+	 * deferred dequeue on the physical page has completed before using
+	 * its plinks.q field.
+	 */
+	if (__predict_false(vm_page_astate_load(m).queue != PQ_NONE))
+		vm_page_dequeue(m);
 
 	m->order = order;
 	m->pool = pool;
 	if (tail)
-		TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
+		TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
 	else
-		TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
+		TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt++;
 }
 
@@ -407,7 +417,7 @@ static void
 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
 {
 
-	TAILQ_REMOVE(&fl[order].pl, m, listq);
+	TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
 	fl[order].lcnt--;
 	m->order = VM_NFREEORDER;
 }
@@ -1582,7 +1592,7 @@ vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages,
 	 * check if there are enough free blocks starting at a properly aligned
 	 * block.  Thus, no block is checked for free-ness more than twice.
 	 */
-	TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
+	TAILQ_FOREACH(m, &fl[max_order].pl, plinks.q) {
 		/*
 		 * Skip m unless it is first in a sequence of free max page
 		 * blocks >= low in its segment.
@@ -1655,7 +1665,7 @@ vm_phys_find_queues_contig(
 	for (oind = order; oind < VM_NFREEORDER; oind++) {
 		for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
 			fl = (*queues)[pind];
-			TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
+			TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
 				/*
 				 * Determine if the address range starting at pa
 				 * is within the given range, satisfies the