Terrible NFS performance under 9.2-RELEASE?

wollman at freebsd.org wollman at freebsd.org
Tue Jan 28 04:27:33 UTC 2014


In article <1415339672.17282775.1390872779067.JavaMail.root at uoguelph.ca>,
Rick Macklem writes:

>Btw, Garrett Wollman's patch uses m_getm2() to get the mbuf list.

I do two things in my version that should provide an improvement.  The
first is, as you say, using m_getm2() to allocate a list of mbufs.
The second is to use a fixed-size iovec array and a special-purpose
UMA zone to allocate the iovec and a preinitialized uio as a single
allocation.

I haven't tested this approach at all (not even compilation testing),
so I don't know whether it will work or not, and I don't know if it
actually provides the sort of performance improvement I expect.

The real big improvement, which I have not tried to implement, would
be to use physical pages (via sfbufs) by sharing the inner loop of
sendfile(2).  Since I use ZFS as my backing filesystem, I'm not sure
this would have any benefit for me, but it should be a measurable
improvement for UFS-backed NFS servers.

My patch follows.  Note that I haven't even compile-tested it yet, and
there is likely to be some fuzz if you apply it to stock kernel
sources.

-GAWollman

--- nfs_nfsdport.c.orig	2014-01-26 23:38:58.296234939 -0500
+++ nfs_nfsdport.c	2014-01-26 23:46:17.901236792 -0500
@@ -50,6 +50,14 @@
 
 FEATURE(nfsd, "NFSv4 server");
 
+#define NFS_NIOVEC (NFS_SRVMAXDATA / MCLBYTES + 2)
+struct nfsd_iovec {
+	struct	uio nfsiov_uio;
+	struct	iovec nfsiov_iov[NFS_NIOVEC];
+};
+static struct uma_zone *nfsd_iovec_zone;
+static void nfsd_iovec_construct(struct uio **, struct mbuf **, struct mbuf **,
+    int);
 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
 extern int nfsrv_useacl;
 extern int newnfs_numnfsd;
@@ -626,7 +634,7 @@
 	struct iovec *iv2;
 	int error = 0, len, left, siz, tlen, ioflag = 0;
 	struct mbuf *m2 = NULL, *m3;
-	struct uio io, *uiop = &io;
+	struct uio *uiop;
 	struct nfsheur *nh;
 
 	len = left = NFSM_RNDUP(cnt);
@@ -634,49 +642,11 @@
 	/*
 	 * Generate the mbuf list with the uio_iov ref. to it.
 	 */
-	i = 0;
-	while (left > 0) {
-		NFSMGET(m);
-		MCLGET(m, M_WAIT);
-		m->m_len = 0;
-		siz = min(M_TRAILINGSPACE(m), left);
-		left -= siz;
-		i++;
-		if (m3)
-			m2->m_next = m;
-		else
-			m3 = m;
-		m2 = m;
-	}
-	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
-	    M_TEMP, M_WAITOK);
-	uiop->uio_iov = iv2 = iv;
-	m = m3;
-	left = len;
-	i = 0;
-	while (left > 0) {
-		if (m == NULL)
-			panic("nfsvno_read iov");
-		siz = min(M_TRAILINGSPACE(m), left);
-		if (siz > 0) {
-			iv->iov_base = mtod(m, caddr_t) + m->m_len;
-			iv->iov_len = siz;
-			m->m_len += siz;
-			left -= siz;
-			iv++;
-			i++;
-		}
-		m = m->m_next;
-	}
-	uiop->uio_iovcnt = i;
+	nfsd_iovec_construct(&uiop, &m3, &m2, len);
 	uiop->uio_offset = off;
-	uiop->uio_resid = len;
-	uiop->uio_rw = UIO_READ;
-	uiop->uio_segflg = UIO_SYSSPACE;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
-	FREE((caddr_t)iv2, M_TEMP);
 	if (error) {
 		m_freem(m3);
 		*mpp = NULL;
@@ -695,6 +665,7 @@
 	*mpendp = m2;
 
 out:
+	uma_zfree(nfsd_iovec_zone, uiop);	/* now safe to free */
 	NFSEXITCODE(error);
 	return (error);
 }
@@ -3284,6 +3255,74 @@
 	}
 }
 
+/*
+ * UMA initializer for nfsd_iovec objects.
+ */
+static int
+nfsd_iovec_init(void *mem, int size, int flags)
+{
+	int i;
+	struct nfsd_iovec *nfsiov = mem;
+	struct uio *uio = &nfsiov->nfsiov_uio;
+
+	KASSERT(size == sizeof(struct nfsd_iovec));
+	uio->uio_iov = nfsiov->nfsiov_iovec;
+	uio->uio_iovcnt = 0;
+	/* don't care about state of uio_offset */
+	uio->uio_resid = 0;
+	uio->uio_segflg = UIO_SYSSPACE;
+	uio->uio_rw = UIO_READ;
+	uio->uio_td = NULL;
+	return (0);
+}
+
+/* 
+ * The destructor doesn't need to do anything different from the
+ * initializer.
+ */
+static int
+nfsd_iovec_dtor(void *mem, int size, void *arg)
+{
+	return (nfsd_iovec_init(mem, size, 0));
+}
+
+static void
+nfsd_iovec_construct(struct uio **uiop, struct mbuf **mp, struct mbuf **tailp,
+    int left)
+{
+	struct nfsd_iovec *nfsiov;
+	struct iovec *iov;
+	struct mbuf *m, *m2;
+	struct uio *uio;
+	int siz;
+
+	/* uma_zalloc is guaranteed to succeed or deadlock with M_WAITOK */
+	nfsiov = uma_zalloc(nfsd_iovec_zone, NULL, M_WAITOK);
+	*uiop = uio = &nfsiov->nfsiov_uio;
+	for (;;) {
+		m = m_getm2(NULL, left, M_WAITOK, MT_DATA, 0);
+		if (m != NULL) /* should always be taken with M_WAITOK */
+			break;
+		nfs_catnap(PZERO, 0, "nfsiovec");
+	}
+	*mp = m;
+	uio->uio_resid = left;
+	iov = uio->uio_iov;
+
+	while (m != NULL && left > 0) {
+		if (++uio->uio_iovcnt > NFSIOV_NIOVEC)
+			panic("nfsd_iovec_construct: mbuf chain exceeded size");
+		iov->iov_base = mtod(m, char *);
+		m->m_len = iov->iov_len = siz = min(M_TRAILINGSPACE(m), left);
+		left -= siz;
+		iov++;
+		m2 = m->m_next;
+		if ((m2 = m->m_next) == NULL && tailp != NULL) /* last one? */
+			*tailp = m;
+		m = m2;
+	}
+}
+
 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 
 /*
@@ -3319,6 +3358,10 @@
 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 #endif
+		nfsd_iovec_zone = uma_zcreate("nfsd iovec", 
+		    sizeof(struct nfsd_iovec), NULL /* ctor */,
+		    nfsd_iovec_dtor, nfsd_iovec_init, NULL /* fini */,
+		    sizeof(void *) - 1 /* alignment mask */, 0 /* flags */);
 		nfsd_call_servertimer = nfsrv_servertimer;
 		nfsd_call_nfsd = nfssvc_nfsd;
 		loaded = 1;
@@ -3347,6 +3390,9 @@
 		if (nfsrvd_pool != NULL)
 			svcpool_destroy(nfsrvd_pool);
 
+		/* Release memory in the iovec zone */
+		uma_zdestroy(nfsd_iovec_zone);
+
 		/* and get rid of the locks */
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++)
 			mtx_destroy(&nfsrc_tcpmtx[i]);


More information about the freebsd-net mailing list