svn commit: r262911 - projects/sendfile/sys/kern
Gleb Smirnoff
glebius at FreeBSD.org
Fri Mar 7 22:53:30 UTC 2014
Author: glebius
Date: Fri Mar 7 22:53:29 2014
New Revision: 262911
URL: http://svnweb.freebsd.org/changeset/base/262911
Log:
Provide a hack to workaround the following condition: two sendfile(2)
calls are issues on the same object, with ranges overlapping on at
least one page. One of them grabs all pages, schedules I/O and returns.
The second one encounters an overlapping page in a busy state. It
sleeps with 'pgrbwt' wait channel, and sleep time is effectively the
I/O time. Thus, second syscall is degenerating to the blocking on
disk I/O mode.
To avoid this, a sysctl kern.ipc.sendfile.pgrabnowait is provided
(default to off). When sysctl is on and userland supplies SF_NODISKIO
flag, then we are calling vm_page_grab(VM_ALLOC_NOWAIT). In case of
a failure we return EAGAIN, hinting userland that it should continue
monitor the socket via select/kevent/whatever. If the socket has zero
data to send, then we have to notify it immediately, to avoid stall.
Sponsored by: Netflix
Sponsored by: Nginx, Inc.
Modified:
projects/sendfile/sys/kern/uipc_syscalls.c
Modified: projects/sendfile/sys/kern/uipc_syscalls.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_syscalls.c Fri Mar 7 22:29:00 2014 (r262910)
+++ projects/sendfile/sys/kern/uipc_syscalls.c Fri Mar 7 22:53:29 2014 (r262911)
@@ -133,6 +133,10 @@ static int filt_sfsync(struct knote *kn,
static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0,
"sendfile(2) tunables");
+static int sfpgrabnowait = 0;
+SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, pgrabnowait, CTLFLAG_RW,
+ &sfpgrabnowait, 0, "Use VM_ALLOC_NOWAIT when SF_NODISKIO is requested");
+
#ifdef SFSYNC_DEBUG
static int sf_sync_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW,
@@ -2718,18 +2722,28 @@ sf_io_done(void *arg)
}
static int
-sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len)
+sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len,
+ int flags)
{
vm_page_t *pa = sfio->pa;
int npages = sfio->npages;
int nios, rv;
nios = 0;
+ if (sfpgrabnowait && (flags & SF_NODISKIO))
+ flags = VM_ALLOC_NOWAIT;
+ else
+ flags = 0;
VM_OBJECT_WLOCK(obj);
- for (int i = 0; i < npages; i++)
+ for (int i = 0; i < npages; i++) {
pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)),
- VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
+ VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags);
+ if (pa[i] == NULL) {
+ npages = sfio->npages = i;
+ break;
+ }
+ }
for (int i = 0; i < npages; i++) {
int j, a;
@@ -3079,7 +3093,37 @@ retry_space:
refcount_init(&sfio->nios, 1);
sfio->npages = npages;
- nios = sendfile_swapin(obj, sfio, off, space);
+ nios = sendfile_swapin(obj, sfio, off, space, flags);
+
+ if (sfio->npages != npages) {
+ /*
+ * sendfile_swapin() encountered a busy page,
+ * and was called with SF_NODISKIO. We don't
+ * return EBUSY, like old I/O blocking sendfile
+ * did, because situtation is different. No
+ * extra operation like read(2) or aio_read(2)
+ * is required from userland. We just need it
+ * to retry soonish.
+ * We rely on remote side ACKing our data to
+ * drive this timeout. And in the worst case,
+ * when we do not have data to send, we put
+ * the socket on the notification queue immediately.
+ */
+ error = EAGAIN;
+ if (sfio->npages == 0 && hdrlen == 0) {
+ if (vp != NULL)
+ VOP_UNLOCK(vp, 0);
+ SOCKBUF_LOCK(&so->so_snd);
+ if (!sbused(&so->so_snd))
+ sowwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_snd);
+ free(sfio, M_TEMP);
+ goto done;
+ }
+ fixspace(npages, sfio->npages, off, &space);
+ npages = sfio->npages;
+ }
/*
* Loop and construct maximum sized mbuf chain to be bulk
@@ -3180,7 +3224,8 @@ retry_space:
mh = NULL;
}
- if (error) {
+ if (m == NULL) {
+ KASSERT(error, ("%s: no mbuf and no error", __func__));
free(sfio, M_TEMP);
goto done;
}
More information about the svn-src-projects
mailing list