git: ae10431c9833 - main - vm_page: Allow PG_NOFREE pages to be freed
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 10 Apr 2025 12:47:20 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=ae10431c9833bd6b176afe4d8021d233fd985107
commit ae10431c9833bd6b176afe4d8021d233fd985107
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-04-10 12:43:12 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-04-10 12:47:05 +0000
vm_page: Allow PG_NOFREE pages to be freed
There is at least one case where we need to support it: kmem_malloc()
might need to allocate multiple pages to satisfy a NOFREE allocation,
which it implements by calling vm_page_alloc() in a loop. If it fails
part-way though, it needs to free already-allocated pages, but this was
illegal.
Convert the bump allocator to a linked list; (ab)use the pindex field of
each page in the list to store the number of contiguous pages in the
block. (Originally I added a new plinks member for this purpose, but
it's not safe to use that until after vm_page_dequeue() is called due to
lazy page queue removal.) Then, modify vm_page_free() to support freeing
pages to this list.
While here, add a __noinline qualifier to vm_page_alloc_nofree_domain()
to ensure that it doesn't get inlined into a hot path.
Reported by: syzbot+93bc9edd2d0f22ae426a@syzkaller.appspotmail.com
Reviewed by: bnovkov, kib
Fixes: a8693e89e3e4 ("vm: Introduce vm_page_alloc_nofree_domain")
Differential Revision: https://reviews.freebsd.org/D49480
---
sys/vm/vm_page.c | 56 ++++++++++++++++++++++++++++++++++++++-------------
sys/vm/vm_pagequeue.h | 5 +----
2 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 741c45490d96..5214b3c956ba 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2648,40 +2648,66 @@ found:
* the routine will try to fetch a new one from the freelists
* and discard the old one.
*/
-static vm_page_t
+static vm_page_t __noinline
vm_page_alloc_nofree_domain(int domain, int req)
{
vm_page_t m;
struct vm_domain *vmd;
- struct vm_nofreeq *nqp;
KASSERT((req & VM_ALLOC_NOFREE) != 0, ("invalid request %#x", req));
vmd = VM_DOMAIN(domain);
- nqp = &vmd->vmd_nofreeq;
vm_domain_free_lock(vmd);
- if (nqp->offs >= (1 << VM_NOFREE_IMPORT_ORDER) || nqp->ma == NULL) {
- if (!vm_domain_allocate(vmd, req,
- 1 << VM_NOFREE_IMPORT_ORDER)) {
+ if (TAILQ_EMPTY(&vmd->vmd_nofreeq)) {
+ int count;
+
+ count = 1 << VM_NOFREE_IMPORT_ORDER;
+ if (!vm_domain_allocate(vmd, req, count)) {
vm_domain_free_unlock(vmd);
return (NULL);
}
- nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+ m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
VM_NOFREE_IMPORT_ORDER);
- if (nqp->ma == NULL) {
- vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER);
+ if (m == NULL) {
+ vm_domain_freecnt_inc(vmd, count);
vm_domain_free_unlock(vmd);
return (NULL);
}
- nqp->offs = 0;
+ m->pindex = count;
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+ VM_CNT_ADD(v_nofree_count, count);
+ }
+ m = TAILQ_FIRST(&vmd->vmd_nofreeq);
+ TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq);
+ if (m->pindex > 1) {
+ vm_page_t m_next;
+
+ m_next = &m[1];
+ m_next->pindex = m->pindex - 1;
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq);
}
- m = &nqp->ma[nqp->offs++];
vm_domain_free_unlock(vmd);
- VM_CNT_ADD(v_nofree_count, 1);
+ VM_CNT_ADD(v_nofree_count, -1);
return (m);
}
+/*
+ * Though a NOFREE page by definition should not be freed, we support putting
+ * them aside for future NOFREE allocations. This enables code which allocates
+ * NOFREE pages for some purpose but then encounters an error and releases
+ * resources.
+ */
+static void __noinline
+vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
+{
+ vm_domain_free_lock(vmd);
+ m->pindex = 1;
+ TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+ vm_domain_free_unlock(vmd);
+ VM_CNT_ADD(v_nofree_count, 1);
+}
+
vm_page_t
vm_page_alloc_noobj(int req)
{
@@ -4145,8 +4171,6 @@ vm_page_free_prep(vm_page_t m)
m, i, (uintmax_t)*p));
}
#endif
- KASSERT((m->flags & PG_NOFREE) == 0,
- ("%s: attempting to free a PG_NOFREE page", __func__));
if ((m->oflags & VPO_UNMANAGED) == 0) {
KASSERT(!pmap_page_is_mapped(m),
("vm_page_free_prep: freeing mapped page %p", m));
@@ -4230,6 +4254,10 @@ vm_page_free_toq(vm_page_t m)
return;
vmd = vm_pagequeue_domain(m);
+ if (__predict_false((m->flags & PG_NOFREE) != 0)) {
+ vm_page_free_nofree(vmd, m);
+ return;
+ }
zone = vmd->vmd_pgcache[m->pool].zone;
if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) {
uma_zfree(zone, m);
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index 72fd1bb47318..cbbd27389662 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -247,10 +247,7 @@ struct vm_domain {
u_int vmd_domain; /* (c) Domain number. */
u_int vmd_page_count; /* (c) Total page count. */
long vmd_segs; /* (c) bitmask of the segments */
- struct vm_nofreeq {
- vm_page_t ma;
- int offs;
- } vmd_nofreeq; /* (f) NOFREE page bump allocator. */
+ struct pglist vmd_nofreeq; /* (f) NOFREE page bump allocator. */
u_int __aligned(CACHE_LINE_SIZE) vmd_free_count; /* (a,f) free page count */
u_int vmd_pageout_deficit; /* (a) Estimated number of pages deficit */
uint8_t vmd_pad[CACHE_LINE_SIZE - (sizeof(u_int) * 2)];