svn commit: r349530 - in head/sys: kern sys

John Baldwin jhb at FreeBSD.org
Sat Jun 29 00:49:36 UTC 2019


Author: jhb
Date: Sat Jun 29 00:49:35 2019
New Revision: 349530
URL: https://svnweb.freebsd.org/changeset/base/349530

Log:
  Add support for using unmapped mbufs with sendfile(2).
  
  This can be enabled at runtime via the kern.ipc.mb_use_ext_pgs sysctl.
  It is disabled by default.
  
  Submitted by:	gallatin (earlier version)
  Reviewed by:	gallatin, hselasky, rrs
  Relnotes:	yes
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D20616

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/kern/kern_sendfile.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c	Sat Jun 29 00:48:33 2019	(r349529)
+++ head/sys/kern/kern_mbuf.c	Sat Jun 29 00:49:35 2019	(r349530)
@@ -112,6 +112,11 @@ int nmbjumbop;			/* limits number of page size jumbo c
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
 int nmbjumbo16;			/* limits number of 16k jumbo clusters */
 
+bool mb_use_ext_pgs;		/* use EXT_PGS mbufs for sendfile */
+SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+    &mb_use_ext_pgs, 0,
+    "Use unmapped mbufs for sendfile(2)");
+
 static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
 
 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,

Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c	Sat Jun 29 00:48:33 2019	(r349529)
+++ head/sys/kern/kern_sendfile.c	Sat Jun 29 00:49:35 2019	(r349530)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
+#include <netinet/in.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
@@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
 
 #define	EXT_FLAG_SYNC		EXT_FLAG_VENDOR1
 #define	EXT_FLAG_NOCACHE	EXT_FLAG_VENDOR2
+#define	EXT_FLAG_CACHE_LAST	EXT_FLAG_VENDOR3
 
 /*
  * Structure describing a single sendfile(2) I/O, which may consist of
@@ -201,6 +203,39 @@ sendfile_free_mext(struct mbuf *m)
 	}
 }
 
+static void
+sendfile_free_mext_pg(struct mbuf *m)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	vm_page_t pg;
+	int i;
+	bool nocache, cache_last;
+
+	KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
+	    ("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
+
+	nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
+	cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
+	ext_pgs = m->m_ext.ext_pgs;
+
+	for (i = 0; i < ext_pgs->npgs; i++) {
+		if (cache_last && i == ext_pgs->npgs - 1)
+			nocache = false;
+		pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+		sendfile_free_page(pg, nocache);
+	}
+
+	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
+		struct sendfile_sync *sfs = m->m_ext.ext_arg2;
+
+		mtx_lock(&sfs->mtx);
+		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
+		if (--sfs->count == 0)
+			cv_signal(&sfs->cv);
+		mtx_unlock(&sfs->mtx);
+	}
+}
+
 /*
  * Helper function to calculate how much data to put into page i of n.
  * Only first and last pages are special.
@@ -283,8 +318,6 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, i
 
 	CURVNET_SET(so->so_vnet);
 	if (sfio->error) {
-		struct mbuf *m;
-
 		/*
 		 * I/O operation failed.  The state of data in the socket
 		 * is now inconsistent, and all what we can do is to tear
@@ -299,9 +332,7 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, i
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 
-		m = sfio->m;
-		for (int i = 0; i < sfio->npages; i++)
-			m = m_free(m);
+		mb_free_notready(sfio->m, sfio->npages);
 	} else
 		(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
 		    sfio->npages);
@@ -540,13 +571,15 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *h
 	struct vnode *vp;
 	struct vm_object *obj;
 	struct socket *so;
+	struct mbuf_ext_pgs *ext_pgs;
 	struct mbuf *m, *mh, *mhtail;
 	struct sf_buf *sf;
 	struct shmfd *shmfd;
 	struct sendfile_sync *sfs;
 	struct vattr va;
 	off_t off, sbytes, rem, obj_size;
-	int error, softerr, bsize, hdrlen;
+	int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
+	bool use_ext_pgs;
 
 	obj = NULL;
 	so = NULL;
@@ -554,6 +587,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *h
 	sfs = NULL;
 	hdrlen = sbytes = 0;
 	softerr = 0;
+	use_ext_pgs = false;
 
 	error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
 	if (error != 0)
@@ -714,6 +748,17 @@ retry_space:
 
 		if (space > rem)
 			space = rem;
+		else if (space > PAGE_SIZE) {
+			/*
+			 * Use page boundaries when possible for large
+			 * requests.
+			 */
+			if (off & PAGE_MASK)
+				space -= (PAGE_SIZE - (off & PAGE_MASK));
+			space = trunc_page(space);
+			if (off & PAGE_MASK)
+				space += (PAGE_SIZE - (off & PAGE_MASK));
+		}
 
 		npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
 
@@ -751,6 +796,22 @@ retry_space:
 		 * dumped into socket buffer.
 		 */
 		pa = sfio->pa;
+
+		/*
+		 * Use unmapped mbufs if enabled for TCP.  Unmapped
+		 * bufs are restricted to TCP as that is what has been
+		 * tested.  In particular, unmapped mbufs have not
+		 * been tested with UNIX-domain sockets.
+		 */
+		if (mb_use_ext_pgs &&
+		    so->so_proto->pr_protocol == IPPROTO_TCP) {
+			use_ext_pgs = true;
+			max_pgs = MBUF_PEXT_MAX_PGS;
+
+			/* Start at last index, to wrap on first use. */
+			ext_pgs_idx = max_pgs - 1;
+		}
+
 		for (int i = 0; i < npages; i++) {
 			struct mbuf *m0;
 
@@ -764,6 +825,66 @@ retry_space:
 				npages = i;
 				softerr = EBUSY;
 				break;
+			}
+
+			if (use_ext_pgs) {
+				off_t xfs;
+
+				ext_pgs_idx++;
+				if (ext_pgs_idx == max_pgs) {
+					m0 = mb_alloc_ext_pgs(M_WAITOK, false,
+					    sendfile_free_mext_pg);
+
+					if (flags & SF_NOCACHE) {
+						m0->m_ext.ext_flags |=
+						    EXT_FLAG_NOCACHE;
+
+						/*
+						 * See comment below regarding
+						 * ignoring SF_NOCACHE for the
+						 * last page.
+						 */
+						if ((npages - i <= max_pgs) &&
+						    ((off + space) & PAGE_MASK) &&
+						    (rem > space || rhpages > 0))
+							m0->m_ext.ext_flags |=
+							    EXT_FLAG_CACHE_LAST;
+					}
+					if (sfs != NULL) {
+						m0->m_ext.ext_flags |=
+						    EXT_FLAG_SYNC;
+						m0->m_ext.ext_arg2 = sfs;
+						mtx_lock(&sfs->mtx);
+						sfs->count++;
+						mtx_unlock(&sfs->mtx);
+					}
+					ext_pgs = m0->m_ext.ext_pgs;
+					if (i == 0)
+						sfio->m = m0;
+					ext_pgs_idx = 0;
+
+					/* Append to mbuf chain. */
+					if (mtail != NULL)
+						mtail->m_next = m0;
+					else
+						m = m0;
+					mtail = m0;
+					ext_pgs->first_pg_off =
+					    vmoff(i, off) & PAGE_MASK;
+				}
+				if (nios) {
+					mtail->m_flags |= M_NOTREADY;
+					ext_pgs->nrdy++;
+				}
+
+				ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]);
+				ext_pgs->npgs++;
+				xfs = xfsize(i, npages, off, space);
+				ext_pgs->last_pg_len = xfs;
+				MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs);
+				mtail->m_len += xfs;
+				mtail->m_ext.ext_size += PAGE_SIZE;
+				continue;
 			}
 
 			/*

Modified: head/sys/sys/mbuf.h
==============================================================================
--- head/sys/sys/mbuf.h	Sat Jun 29 00:48:33 2019	(r349529)
+++ head/sys/sys/mbuf.h	Sat Jun 29 00:49:35 2019	(r349530)
@@ -1129,6 +1129,7 @@ extern int		max_hdr;	/* Largest link + protocol header
 extern int		max_linkhdr;	/* Largest link-level header */
 extern int		max_protohdr;	/* Largest protocol header */
 extern int		nmbclusters;	/* Maximum number of clusters */
+extern bool		mb_use_ext_pgs;	/* Use ext_pgs for sendfile */
 
 /*-
  * Network packets may have annotations attached by affixing a list of


More information about the svn-src-all mailing list