git: 987fca954dc8 - stable/13 - md: Get rid of the pbuf zone
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 23 Jun 2023 13:53:38 UTC
The branch stable/13 has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=987fca954dc805e5135c13b6efa28a8174d35377
commit 987fca954dc805e5135c13b6efa28a8174d35377
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-05-23 14:14:06 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-06-23 13:33:49 +0000
md: Get rid of the pbuf zone
The zone is used solely to provide KVA for mapping BIOs so that we can
pass mapped buffers to VOP_READ and VOP_WRITE. Currently we preallocate
nswbuf/10 bufs for this purpose during boot.
The intent was to limit KVA usage on 32-bit systems, but the
preallocation means that we in fact consumed more KVA than needed unless
one has more than nswbuf/10 (typically 25) vnode-backed MD devices
in existence, which I would argue is the uncommon case.
Meanwhile, all I/O to an MD is handled by a dedicated thread, so we can
instead simply preallocate the KVA region at MD device creation time.
Event: BSDCan 2023
Reviewed by: kib
MFC after: 1 month
Differential Revision: https://reviews.freebsd.org/D40215
(cherry picked from commit 30038a8b4efc6d0b9f8f295e28bc205fe9728310)
---
sys/dev/md/md.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 2e941c991ddb..52d64d6721cd 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -97,6 +97,7 @@
#include <geom/geom_int.h>
#include <vm/vm.h>
+#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@@ -231,8 +232,6 @@ static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
#define NMASK (NINDIR-1)
static int nshift;
-static uma_zone_t md_pbuf_zone;
-
struct indir {
uintptr_t *array;
u_int total;
@@ -274,6 +273,7 @@ struct md_s {
char file[PATH_MAX];
char label[PATH_MAX];
struct ucred *cred;
+ vm_offset_t kva;
/* MD_SWAP related fields */
vm_object_t object;
@@ -875,11 +875,11 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
struct iovec *piov;
struct mount *mp;
struct vnode *vp;
- struct buf *pb;
bus_dma_segment_t *vlist;
struct thread *td;
off_t iolen, iostart, len, zerosize;
int ma_offs, npages;
+ bool mapped;
switch (bp->bio_cmd) {
case BIO_READ:
@@ -897,10 +897,10 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
td = curthread;
vp = sc->vnode;
- pb = NULL;
piov = NULL;
ma_offs = bp->bio_ma_offset;
len = bp->bio_length;
+ mapped = false;
/*
* VNODE I/O
@@ -962,22 +962,21 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
auio.uio_iovcnt = piov - auio.uio_iov;
piov = auio.uio_iov;
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
- MPASS((pb->b_flags & B_MAXPHYS) != 0);
bp->bio_resid = len;
unmapped_step:
npages = atop(min(maxphys, round_page(len + (ma_offs &
PAGE_MASK))));
iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len);
KASSERT(iolen > 0, ("zero iolen"));
- pmap_qenter((vm_offset_t)pb->b_data,
- &bp->bio_ma[atop(ma_offs)], npages);
- aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
- (ma_offs & PAGE_MASK));
+ KASSERT(npages <= atop(MAXPHYS + PAGE_SIZE),
+ ("npages %d too large", npages));
+ pmap_qenter(sc->kva, &bp->bio_ma[atop(ma_offs)], npages);
+ aiov.iov_base = (void *)(sc->kva + (ma_offs & PAGE_MASK));
aiov.iov_len = iolen;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_resid = iolen;
+ mapped = true;
} else {
aiov.iov_base = bp->bio_data;
aiov.iov_len = bp->bio_length;
@@ -1005,8 +1004,8 @@ unmapped_step:
VOP_ADVISE(vp, iostart, auio.uio_offset - 1,
POSIX_FADV_DONTNEED);
- if (pb != NULL) {
- pmap_qremove((vm_offset_t)pb->b_data, npages);
+ if (mapped) {
+ pmap_qremove(sc->kva, npages);
if (error == 0) {
len -= iolen;
bp->bio_resid -= iolen;
@@ -1014,7 +1013,6 @@ unmapped_step:
if (len > 0)
goto unmapped_step;
}
- uma_zfree(md_pbuf_zone, pb);
} else {
bp->bio_resid = auio.uio_resid;
}
@@ -1283,7 +1281,7 @@ mdnew(int unit, int *errp, enum md_types type)
return (NULL);
}
- sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
+ sc = malloc(sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
sc->type = type;
bioq_init(&sc->bio_queue);
mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
@@ -1483,6 +1481,8 @@ mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td)
nd.ni_vp->v_vflag &= ~VV_MD;
goto bad;
}
+
+ sc->kva = kva_alloc(MAXPHYS + PAGE_SIZE);
return (0);
bad:
VOP_UNLOCK(nd.ni_vp);
@@ -1541,6 +1541,8 @@ mddestroy(struct md_s *sc, struct thread *td)
destroy_indir(sc, sc->indir);
if (sc->uma)
uma_zdestroy(sc->uma);
+ if (sc->kva)
+ kva_free(sc->kva, MAXPHYS + PAGE_SIZE);
LIST_REMOVE(sc, list);
free_unr(md_uh, sc->unit);
@@ -2074,7 +2076,6 @@ g_md_init(struct g_class *mp __unused)
sx_xunlock(&md_sx);
}
}
- md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10);
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
0600, MDCTL_NAME);
g_topology_lock();
@@ -2170,6 +2171,5 @@ g_md_fini(struct g_class *mp __unused)
sx_destroy(&md_sx);
if (status_dev != NULL)
destroy_dev(status_dev);
- uma_zdestroy(md_pbuf_zone);
delete_unrhdr(md_uh);
}