Terrible NFS performance under 9.2-RELEASE?
wollman at freebsd.org
wollman at freebsd.org
Tue Jan 28 04:27:33 UTC 2014
In article <1415339672.17282775.1390872779067.JavaMail.root at uoguelph.ca>,
Rick Macklem writes:
>Btw, Garrett Wollman's patch uses m_getm2() to get the mbuf list.
I do two things in my version that should provide an improvement. The
first is, as you say, using m_getm2() to allocate a list of mbufs.
The second is to use a fixed-size iovec array and a special-purpose
UMA zone to allocate the iovec and a preinitialized uio as a single
allocation.
I haven't tested this approach at all (not even compilation testing),
so I don't know whether it will work or not, and I don't know if it
actually provides the sort of performance improvement I expect.
The real big improvement, which I have not tried to implement, would
be to use physical pages (via sfbufs) by sharing the inner loop of
sendfile(2). Since I use ZFS as my backing filesystem, I'm not sure
this would have any benefit for me, but it should be a measurable
improvement for UFS-backed NFS servers.
My patch follows. Note that I haven't even compile-tested it yet, and
there is likely to be some fuzz if you apply it to stock kernel
sources.
-GAWollman
--- nfs_nfsdport.c.orig 2014-01-26 23:38:58.296234939 -0500
+++ nfs_nfsdport.c 2014-01-26 23:46:17.901236792 -0500
@@ -50,6 +50,14 @@
FEATURE(nfsd, "NFSv4 server");
+#define NFS_NIOVEC (NFS_SRVMAXDATA / MCLBYTES + 2)
+struct nfsd_iovec {
+ struct uio nfsiov_uio;
+ struct iovec nfsiov_iov[NFS_NIOVEC];
+};
+static struct uma_zone *nfsd_iovec_zone;
+static void nfsd_iovec_construct(struct uio **, struct mbuf **, struct mbuf **,
+ int);
extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
extern int nfsrv_useacl;
extern int newnfs_numnfsd;
@@ -626,7 +634,7 @@
struct iovec *iv2;
int error = 0, len, left, siz, tlen, ioflag = 0;
struct mbuf *m2 = NULL, *m3;
- struct uio io, *uiop = &io;
+ struct uio *uiop;
struct nfsheur *nh;
len = left = NFSM_RNDUP(cnt);
@@ -634,49 +642,11 @@
/*
* Generate the mbuf list with the uio_iov ref. to it.
*/
- i = 0;
- while (left > 0) {
- NFSMGET(m);
- MCLGET(m, M_WAIT);
- m->m_len = 0;
- siz = min(M_TRAILINGSPACE(m), left);
- left -= siz;
- i++;
- if (m3)
- m2->m_next = m;
- else
- m3 = m;
- m2 = m;
- }
- MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
- M_TEMP, M_WAITOK);
- uiop->uio_iov = iv2 = iv;
- m = m3;
- left = len;
- i = 0;
- while (left > 0) {
- if (m == NULL)
- panic("nfsvno_read iov");
- siz = min(M_TRAILINGSPACE(m), left);
- if (siz > 0) {
- iv->iov_base = mtod(m, caddr_t) + m->m_len;
- iv->iov_len = siz;
- m->m_len += siz;
- left -= siz;
- iv++;
- i++;
- }
- m = m->m_next;
- }
- uiop->uio_iovcnt = i;
+ nfsd_iovec_construct(&uiop, &m3, &m2, len);
uiop->uio_offset = off;
- uiop->uio_resid = len;
- uiop->uio_rw = UIO_READ;
- uiop->uio_segflg = UIO_SYSSPACE;
nh = nfsrv_sequential_heuristic(uiop, vp);
ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
- FREE((caddr_t)iv2, M_TEMP);
if (error) {
m_freem(m3);
*mpp = NULL;
@@ -695,6 +665,7 @@
*mpendp = m2;
out:
+ uma_zfree(nfsd_iovec_zone, uiop); /* now safe to free */
NFSEXITCODE(error);
return (error);
}
@@ -3284,6 +3255,74 @@
}
}
+/*
+ * UMA initializer for nfsd_iovec objects.
+ */
+static int
+nfsd_iovec_init(void *mem, int size, int flags)
+{
+ int i;
+ struct nfsd_iovec *nfsiov = mem;
+ struct uio *uio = &nfsiov->nfsiov_uio;
+
+ KASSERT(size == sizeof(struct nfsd_iovec));
+ uio->uio_iov = nfsiov->nfsiov_iovec;
+ uio->uio_iovcnt = 0;
+ /* don't care about state of uio_offset */
+ uio->uio_resid = 0;
+ uio->uio_segflg = UIO_SYSSPACE;
+ uio->uio_rw = UIO_READ;
+ uio->uio_td = NULL;
+ return (0);
+}
+
+/*
+ * The destructor doesn't need to do anything different from the
+ * initializer.
+ */
+static int
+nfsd_iovec_dtor(void *mem, int size, void *arg)
+{
+ return (nfsd_iovec_init(mem, size, 0));
+}
+
+static void
+nfsd_iovec_construct(struct uio **uiop, struct mbuf **mp, struct mbuf **tailp,
+ int left)
+{
+ struct nfsd_iovec *nfsiov;
+ struct iovec *iov;
+ struct mbuf *m, *m2;
+ struct uio *uio;
+ int siz;
+
+ /* uma_zalloc is guaranteed to succeed or deadlock with M_WAITOK */
+ nfsiov = uma_zalloc(nfsd_iovec_zone, NULL, M_WAITOK);
+ *uiop = uio = &nfsiov->nfsiov_uio;
+ for (;;) {
+ m = m_getm2(NULL, left, M_WAITOK, MT_DATA, 0);
+ if (m != NULL) /* should always be taken with M_WAITOK */
+ break;
+ nfs_catnap(PZERO, 0, "nfsiovec");
+ }
+ *mp = m;
+ uio->uio_resid = left;
+ iov = uio->uio_iov;
+
+ while (m != NULL && left > 0) {
+ if (++uio->uio_iovcnt > NFSIOV_NIOVEC)
+ panic("nfsd_iovec_construct: mbuf chain exceeded size");
+ iov->iov_base = mtod(m, char *);
+ m->m_len = iov->iov_len = siz = min(M_TRAILINGSPACE(m), left);
+ left -= siz;
+ iov++;
+ m2 = m->m_next;
+ if ((m2 = m->m_next) == NULL && tailp != NULL) /* last one? */
+ *tailp = m;
+ m = m2;
+ }
+}
+
extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
/*
@@ -3319,6 +3358,10 @@
vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
#endif
+ nfsd_iovec_zone = uma_zcreate("nfsd iovec",
+ sizeof(struct nfsd_iovec), NULL /* ctor */,
+ nfsd_iovec_dtor, nfsd_iovec_init, NULL /* fini */,
+ sizeof(void *) - 1 /* alignment mask */, 0 /* flags */);
nfsd_call_servertimer = nfsrv_servertimer;
nfsd_call_nfsd = nfssvc_nfsd;
loaded = 1;
@@ -3347,6 +3390,9 @@
if (nfsrvd_pool != NULL)
svcpool_destroy(nfsrvd_pool);
+ /* Release memory in the iovec zone */
+ uma_zdestroy(nfsd_iovec_zone);
+
/* and get rid of the locks */
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++)
mtx_destroy(&nfsrc_tcpmtx[i]);
More information about the freebsd-net
mailing list