git: 1fee99800a79 - main - vm_page: Retire its listq field
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 08 Jun 2025 18:36:48 UTC
The branch main has been updated by alc:
URL: https://cgit.FreeBSD.org/src/commit/?id=1fee99800a79887b9037749a34d09f2acab082c0
commit 1fee99800a79887b9037749a34d09f2acab082c0
Author: Alan Cox <alc@FreeBSD.org>
AuthorDate: 2025-05-27 08:27:16 +0000
Commit: Alan Cox <alc@FreeBSD.org>
CommitDate: 2025-06-08 18:35:56 +0000
vm_page: Retire its listq field
Over the life cycle of a vm_page, its listq field has been used for two
distinct purposes. First, linking together all of the pages allocated
to a vm_object. Recently, c8d56817b80f ("vm_object: drop memq field")
completed the elimination of this use case, using pctrie iterators in
place of iteration over the listq. Second, linking together power-of-
two-sized chunks of free pages within vm_phys. This change eliminates
that use case. In essence, this change reverts vm_phys back to using
the plinks.q field, like it did before 5cd29d0f3cda ("Improve VM page
queue scalability."), but with a twist to maintain scalability. Just
before vm_phys uses the plinks.q field, it ensures that any lazy dequeue
from a paging queue, e.g., PQ_ACTIVE, has completed. Typically, the
dequeue has completed, so vm_page_dequeue() is infrequently called by
vm_freelist_add(). The reason being that vm_phys only needs to use the
plinks.q field within the first page of any power-of-two-sized chunk,
so the rest of the pages can still have pending dequeues until the
chunk is split.
This change saves a non-trivial amount of memory, since we have an
instance of struct vm_page for every dynamically allocatable physical
page.
Bump __FreeBSD_version, since third-party modules that use the inline
accessors in vm_page.h may need to be recompiled.
Reviewed by: dougm, kib, markj
Differential Revision: https://reviews.freebsd.org/D50515
---
sys/sys/param.h | 2 +-
sys/vm/vm_page.c | 22 +++++++++++++++-------
sys/vm/vm_page.h | 1 -
sys/vm/vm_phys.c | 20 +++++++++++++++-----
4 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/sys/sys/param.h b/sys/sys/param.h
index da2089918323..e167c96cf9f8 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500045
+#define __FreeBSD_version 1500046
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 79eaf8563208..128a1ef7ca54 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -341,7 +341,7 @@ vm_page_blacklist_add(vm_paddr_t pa, bool verbose)
vm_domain_free_unlock(vmd);
if (found) {
vm_domain_freecnt_inc(vmd, -1);
- TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
+ TAILQ_INSERT_TAIL(&blacklist_head, m, plinks.q);
if (verbose)
printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa);
}
@@ -411,7 +411,7 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
- TAILQ_FOREACH(m, &blacklist_head, listq) {
+ TAILQ_FOREACH(m, &blacklist_head, plinks.q) {
sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
(uintmax_t)m->phys_addr);
first = 0;
@@ -2470,6 +2470,13 @@ again:
}
found:
+ /*
+ * If the page comes from the free page cache, then it might still
+ * have a pending deferred dequeue. Specifically, when the page is
+ * imported from a different pool by vm_phys_alloc_npages(), the
+ * second, third, etc. pages in a non-zero order set could have
+ * pending deferred dequeues.
+ */
vm_page_dequeue(m);
vm_page_alloc_check(m);
@@ -2536,17 +2543,18 @@ vm_page_alloc_nofree_domain(int domain, int req)
return (NULL);
}
m->ref_count = count - 1;
- TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
VM_CNT_ADD(v_nofree_count, count);
}
m = TAILQ_FIRST(&vmd->vmd_nofreeq);
- TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq);
+ TAILQ_REMOVE(&vmd->vmd_nofreeq, m, plinks.q);
if (m->ref_count > 0) {
vm_page_t m_next;
m_next = &m[1];
+ vm_page_dequeue(m_next);
m_next->ref_count = m->ref_count - 1;
- TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq);
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, plinks.q);
m->ref_count = 0;
}
vm_domain_free_unlock(vmd);
@@ -2566,7 +2574,7 @@ vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
{
vm_domain_free_lock(vmd);
MPASS(m->ref_count == 0);
- TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
vm_domain_free_unlock(vmd);
VM_CNT_ADD(v_nofree_count, 1);
}
@@ -3971,7 +3979,7 @@ vm_page_dequeue(vm_page_t m)
old = vm_page_astate_load(m);
do {
- if (old.queue == PQ_NONE) {
+ if (__predict_true(old.queue == PQ_NONE)) {
KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
("%s: page %p has unexpected queue state",
__func__, m));
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 4bcd8d9f8236..4f82a69ebe25 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -229,7 +229,6 @@ struct vm_page {
void *zone;
} uma;
} plinks;
- TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
vm_object_t object; /* which object am I in (O) */
vm_pindex_t pindex; /* offset into object (O,P) */
vm_paddr_t phys_addr; /* physical address of page (C) */
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 95bf6b61fe19..ba16ae551093 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -393,13 +393,23 @@ static void
vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
int tail)
{
+ /*
+ * The paging queues and the free page lists utilize the same field,
+ * plinks.q, within the vm_page structure. When a physical page is
+ * freed, it is lazily removed from the paging queues to reduce the
+ * cost of removal through batching. Here, we must ensure that any
+ * deferred dequeue on the physical page has completed before using
+ * its plinks.q field.
+ */
+ if (__predict_false(vm_page_astate_load(m).queue != PQ_NONE))
+ vm_page_dequeue(m);
m->order = order;
m->pool = pool;
if (tail)
- TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
+ TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
else
- TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
+ TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
fl[order].lcnt++;
}
@@ -407,7 +417,7 @@ static void
vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
{
- TAILQ_REMOVE(&fl[order].pl, m, listq);
+ TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
fl[order].lcnt--;
m->order = VM_NFREEORDER;
}
@@ -1582,7 +1592,7 @@ vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages,
* check if there are enough free blocks starting at a properly aligned
* block. Thus, no block is checked for free-ness more than twice.
*/
- TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
+ TAILQ_FOREACH(m, &fl[max_order].pl, plinks.q) {
/*
* Skip m unless it is first in a sequence of free max page
* blocks >= low in its segment.
@@ -1655,7 +1665,7 @@ vm_phys_find_queues_contig(
for (oind = order; oind < VM_NFREEORDER; oind++) {
for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
fl = (*queues)[pind];
- TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
+ TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
/*
* Determine if the address range starting at pa
* is within the given range, satisfies the