svn commit: r194672 - in head/sys: kern netinet sys
Kip Macy
kmacy at freebsd.org
Mon Jun 22 23:56:47 UTC 2009
Who is reviewing and testing these changes?
-Kip
On Mon, Jun 22, 2009 at 4:08 PM, Andre Oppermann<andre at freebsd.org> wrote:
> Author: andre
> Date: Mon Jun 22 23:08:05 2009
> New Revision: 194672
> URL: http://svn.freebsd.org/changeset/base/194672
>
> Log:
> Add soreceive_stream(), an optimized version of soreceive() for
> stream (TCP) sockets.
>
> It is functionally identical to generic soreceive() but has a
> number stream specific optimizations:
> o does only one sockbuf unlock/lock per receive independent of
> the length of data to be moved into the uio compared to
> soreceive() which unlocks/locks per *mbuf*.
> o uses m_mbuftouio() instead of its own copy(out) variant.
> o much more compact code flow as a large number of special
> cases is removed.
> o much improved reability.
>
> It offers significantly reduced CPU usage and lock contention
> when receiving fast TCP streams. Additional gains are obtained
> when the receiving application is using SO_RCVLOWAT to batch up
> some data before a read (and wakeup) is done.
>
> This function was written by "reverse engineering" and is not
> just a stripped down variant of soreceive().
>
> It is not yet enabled by default on TCP sockets. Instead it is
> commented out in the protocol initialization in tcp_usrreq.c
> until more widespread testing has been done.
>
> Testers, especially with 10GigE gear, are welcome.
>
> MFP4: r164817 //depot/user/andre/soreceive_stream/
>
> Modified:
> head/sys/kern/uipc_socket.c
> head/sys/netinet/tcp_usrreq.c
> head/sys/sys/socketvar.h
>
> Modified: head/sys/kern/uipc_socket.c
> ==============================================================================
> --- head/sys/kern/uipc_socket.c Mon Jun 22 22:54:44 2009 (r194671)
> +++ head/sys/kern/uipc_socket.c Mon Jun 22 23:08:05 2009 (r194672)
> @@ -1857,6 +1857,202 @@ release:
> }
>
> /*
> + * Optimized version of soreceive() for stream (TCP) sockets.
> + */
> +int
> +soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
> + struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
> +{
> + int len = 0, error = 0, flags, oresid;
> + struct sockbuf *sb;
> + struct mbuf *m, *n = NULL;
> +
> + /* We only do stream sockets. */
> + if (so->so_type != SOCK_STREAM)
> + return (EINVAL);
> + if (psa != NULL)
> + *psa = NULL;
> + if (controlp != NULL)
> + return (EINVAL);
> + if (flagsp != NULL)
> + flags = *flagsp &~ MSG_EOR;
> + else
> + flags = 0;
> + if (flags & MSG_OOB)
> + return (soreceive_rcvoob(so, uio, flags));
> + if (mp0 != NULL)
> + *mp0 = NULL;
> +
> + sb = &so->so_rcv;
> +
> + /* Prevent other readers from entering the socket. */
> + error = sblock(sb, SBLOCKWAIT(flags));
> + if (error)
> + goto out;
> + SOCKBUF_LOCK(sb);
> +
> + /* Easy one, no space to copyout anything. */
> + if (uio->uio_resid == 0) {
> + error = EINVAL;
> + goto out;
> + }
> + oresid = uio->uio_resid;
> +
> + /* We will never ever get anything unless we are connected. */
> + if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
> + /* When disconnecting there may be still some data left. */
> + if (sb->sb_cc > 0)
> + goto deliver;
> + if (!(so->so_state & SS_ISDISCONNECTED))
> + error = ENOTCONN;
> + goto out;
> + }
> +
> + /* Socket buffer is empty and we shall not block. */
> + if (sb->sb_cc == 0 &&
> + ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
> + error = EAGAIN;
> + goto out;
> + }
> +
> +restart:
> + SOCKBUF_LOCK_ASSERT(&so->so_rcv);
> +
> + /* Abort if socket has reported problems. */
> + if (so->so_error) {
> + if (sb->sb_cc > 0)
> + goto deliver;
> + if (oresid > uio->uio_resid)
> + goto out;
> + error = so->so_error;
> + if (!(flags & MSG_PEEK))
> + so->so_error = 0;
> + goto out;
> + }
> +
> + /* Door is closed. Deliver what is left, if any. */
> + if (sb->sb_state & SBS_CANTRCVMORE) {
> + if (sb->sb_cc > 0)
> + goto deliver;
> + else
> + goto out;
> + }
> +
> + /* Socket buffer got some data that we shall deliver now. */
> + if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
> + ((sb->sb_flags & SS_NBIO) ||
> + (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
> + sb->sb_cc >= sb->sb_lowat ||
> + sb->sb_cc >= uio->uio_resid ||
> + sb->sb_cc >= sb->sb_hiwat) ) {
> + goto deliver;
> + }
> +
> + /* On MSG_WAITALL we must wait until all data or error arrives. */
> + if ((flags & MSG_WAITALL) &&
> + (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
> + goto deliver;
> +
> + /*
> + * Wait and block until (more) data comes in.
> + * NB: Drops the sockbuf lock during wait.
> + */
> + error = sbwait(sb);
> + if (error)
> + goto out;
> + goto restart;
> +
> +deliver:
> + SOCKBUF_LOCK_ASSERT(&so->so_rcv);
> + KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
> + KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
> +
> + /* Statistics. */
> + if (uio->uio_td)
> + uio->uio_td->td_ru.ru_msgrcv++;
> +
> + /* Fill uio until full or current end of socket buffer is reached. */
> + len = min(uio->uio_resid, sb->sb_cc);
> + if (mp0 != NULL) {
> + /* Dequeue as many mbufs as possible. */
> + if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
> + for (*mp0 = m = sb->sb_mb;
> + m != NULL && m->m_len <= len;
> + m = m->m_next) {
> + len -= m->m_len;
> + uio->uio_resid -= m->m_len;
> + sbfree(sb, m);
> + n = m;
> + }
> + sb->sb_mb = m;
> + if (sb->sb_mb == NULL)
> + SB_EMPTY_FIXUP(sb);
> + n->m_next = NULL;
> + }
> + /* Copy the remainder. */
> + if (len > 0) {
> + KASSERT(sb->sb_mb != NULL,
> + ("%s: len > 0 && sb->sb_mb empty", __func__));
> +
> + m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
> + if (m == NULL)
> + len = 0; /* Don't flush data from sockbuf. */
> + else
> + uio->uio_resid -= m->m_len;
> + if (*mp0 != NULL)
> + n->m_next = m;
> + else
> + *mp0 = m;
> + if (*mp0 == NULL) {
> + error = ENOBUFS;
> + goto out;
> + }
> + }
> + } else {
> + /* NB: Must unlock socket buffer as uiomove may sleep. */
> + SOCKBUF_UNLOCK(sb);
> + error = m_mbuftouio(uio, sb->sb_mb, len);
> + SOCKBUF_LOCK(sb);
> + if (error)
> + goto out;
> + }
> + SBLASTRECORDCHK(sb);
> + SBLASTMBUFCHK(sb);
> +
> + /*
> + * Remove the delivered data from the socket buffer unless we
> + * were only peeking.
> + */
> + if (!(flags & MSG_PEEK)) {
> + if (len > 0)
> + sbdrop_locked(sb, len);
> +
> + /* Notify protocol that we drained some data. */
> + if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
> + (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
> + !(flags & MSG_SOCALLBCK))) {
> + SOCKBUF_UNLOCK(sb);
> + (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
> + SOCKBUF_LOCK(sb);
> + }
> + }
> +
> + /*
> + * For MSG_WAITALL we may have to loop again and wait for
> + * more data to come in.
> + */
> + if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
> + goto restart;
> +out:
> + SOCKBUF_LOCK_ASSERT(sb);
> + SBLASTRECORDCHK(sb);
> + SBLASTMBUFCHK(sb);
> + SOCKBUF_UNLOCK(sb);
> + sbunlock(sb);
> + return (error);
> +}
> +
> +/*
> * Optimized version of soreceive() for simple datagram cases from userspace.
> * Unlike in the stream case, we're able to drop a datagram if copyout()
> * fails, and because we handle datagrams atomically, we don't need to use a
>
> Modified: head/sys/netinet/tcp_usrreq.c
> ==============================================================================
> --- head/sys/netinet/tcp_usrreq.c Mon Jun 22 22:54:44 2009 (r194671)
> +++ head/sys/netinet/tcp_usrreq.c Mon Jun 22 23:08:05 2009 (r194672)
> @@ -1032,6 +1032,9 @@ struct pr_usrreqs tcp_usrreqs = {
> .pru_send = tcp_usr_send,
> .pru_shutdown = tcp_usr_shutdown,
> .pru_sockaddr = in_getsockaddr,
> +#if 0
> + .pru_soreceive = soreceive_stream,
> +#endif
> .pru_sosetlabel = in_pcbsosetlabel,
> .pru_close = tcp_usr_close,
> };
> @@ -1053,6 +1056,9 @@ struct pr_usrreqs tcp6_usrreqs = {
> .pru_send = tcp_usr_send,
> .pru_shutdown = tcp_usr_shutdown,
> .pru_sockaddr = in6_mapped_sockaddr,
> +#if 0
> + .pru_soreceive = soreceive_stream,
> +#endif
> .pru_sosetlabel = in_pcbsosetlabel,
> .pru_close = tcp_usr_close,
> };
>
> Modified: head/sys/sys/socketvar.h
> ==============================================================================
> --- head/sys/sys/socketvar.h Mon Jun 22 22:54:44 2009 (r194671)
> +++ head/sys/sys/socketvar.h Mon Jun 22 23:08:05 2009 (r194672)
> @@ -345,6 +345,9 @@ int sopoll_generic(struct socket *so, in
> struct ucred *active_cred, struct thread *td);
> int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
> struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
> +int soreceive_stream(struct socket *so, struct sockaddr **paddr,
> + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
> + int *flagsp);
> int soreceive_dgram(struct socket *so, struct sockaddr **paddr,
> struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
> int *flagsp);
>
--
When bad men combine, the good must associate; else they will fall one
by one, an unpitied sacrifice in a contemptible struggle.
Edmund Burke
More information about the svn-src-all
mailing list