svn commit: r194672 - in head/sys: kern netinet sys

Kip Macy kmacy at freebsd.org
Mon Jun 22 23:56:47 UTC 2009


Who is reviewing and testing these changes?


-Kip

On Mon, Jun 22, 2009 at 4:08 PM, Andre Oppermann<andre at freebsd.org> wrote:
> Author: andre
> Date: Mon Jun 22 23:08:05 2009
> New Revision: 194672
> URL: http://svn.freebsd.org/changeset/base/194672
>
> Log:
>  Add soreceive_stream(), an optimized version of soreceive() for
>  stream (TCP) sockets.
>
>  It is functionally identical to generic soreceive() but has a
>  number stream specific optimizations:
>  o does only one sockbuf unlock/lock per receive independent of
>    the length of data to be moved into the uio compared to
>    soreceive() which unlocks/locks per *mbuf*.
>  o uses m_mbuftouio() instead of its own copy(out) variant.
>  o much more compact code flow as a large number of special
>    cases is removed.
>  o much improved reability.
>
>  It offers significantly reduced CPU usage and lock contention
>  when receiving fast TCP streams.  Additional gains are obtained
>  when the receiving application is using SO_RCVLOWAT to batch up
>  some data before a read (and wakeup) is done.
>
>  This function was written by "reverse engineering" and is not
>  just a stripped down variant of soreceive().
>
>  It is not yet enabled by default on TCP sockets.  Instead it is
>  commented out in the protocol initialization in tcp_usrreq.c
>  until more widespread testing has been done.
>
>  Testers, especially with 10GigE gear, are welcome.
>
>  MFP4: r164817 //depot/user/andre/soreceive_stream/
>
> Modified:
>  head/sys/kern/uipc_socket.c
>  head/sys/netinet/tcp_usrreq.c
>  head/sys/sys/socketvar.h
>
> Modified: head/sys/kern/uipc_socket.c
> ==============================================================================
> --- head/sys/kern/uipc_socket.c Mon Jun 22 22:54:44 2009        (r194671)
> +++ head/sys/kern/uipc_socket.c Mon Jun 22 23:08:05 2009        (r194672)
> @@ -1857,6 +1857,202 @@ release:
>  }
>
>  /*
> + * Optimized version of soreceive() for stream (TCP) sockets.
> + */
> +int
> +soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
> +    struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
> +{
> +       int len = 0, error = 0, flags, oresid;
> +       struct sockbuf *sb;
> +       struct mbuf *m, *n = NULL;
> +
> +       /* We only do stream sockets. */
> +       if (so->so_type != SOCK_STREAM)
> +               return (EINVAL);
> +       if (psa != NULL)
> +               *psa = NULL;
> +       if (controlp != NULL)
> +               return (EINVAL);
> +       if (flagsp != NULL)
> +               flags = *flagsp &~ MSG_EOR;
> +       else
> +               flags = 0;
> +       if (flags & MSG_OOB)
> +               return (soreceive_rcvoob(so, uio, flags));
> +       if (mp0 != NULL)
> +               *mp0 = NULL;
> +
> +       sb = &so->so_rcv;
> +
> +       /* Prevent other readers from entering the socket. */
> +       error = sblock(sb, SBLOCKWAIT(flags));
> +       if (error)
> +               goto out;
> +       SOCKBUF_LOCK(sb);
> +
> +       /* Easy one, no space to copyout anything. */
> +       if (uio->uio_resid == 0) {
> +               error = EINVAL;
> +               goto out;
> +       }
> +       oresid = uio->uio_resid;
> +
> +       /* We will never ever get anything unless we are connected. */
> +       if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
> +               /* When disconnecting there may be still some data left. */
> +               if (sb->sb_cc > 0)
> +                       goto deliver;
> +               if (!(so->so_state & SS_ISDISCONNECTED))
> +                       error = ENOTCONN;
> +               goto out;
> +       }
> +
> +       /* Socket buffer is empty and we shall not block. */
> +       if (sb->sb_cc == 0 &&
> +           ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
> +               error = EAGAIN;
> +               goto out;
> +       }
> +
> +restart:
> +       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
> +
> +       /* Abort if socket has reported problems. */
> +       if (so->so_error) {
> +               if (sb->sb_cc > 0)
> +                       goto deliver;
> +               if (oresid > uio->uio_resid)
> +                       goto out;
> +               error = so->so_error;
> +               if (!(flags & MSG_PEEK))
> +                       so->so_error = 0;
> +               goto out;
> +       }
> +
> +       /* Door is closed.  Deliver what is left, if any. */
> +       if (sb->sb_state & SBS_CANTRCVMORE) {
> +               if (sb->sb_cc > 0)
> +                       goto deliver;
> +               else
> +                       goto out;
> +       }
> +
> +       /* Socket buffer got some data that we shall deliver now. */
> +       if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
> +           ((sb->sb_flags & SS_NBIO) ||
> +            (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
> +            sb->sb_cc >= sb->sb_lowat ||
> +            sb->sb_cc >= uio->uio_resid ||
> +            sb->sb_cc >= sb->sb_hiwat) ) {
> +               goto deliver;
> +       }
> +
> +       /* On MSG_WAITALL we must wait until all data or error arrives. */
> +       if ((flags & MSG_WAITALL) &&
> +           (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
> +               goto deliver;
> +
> +       /*
> +        * Wait and block until (more) data comes in.
> +        * NB: Drops the sockbuf lock during wait.
> +        */
> +       error = sbwait(sb);
> +       if (error)
> +               goto out;
> +       goto restart;
> +
> +deliver:
> +       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
> +       KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
> +       KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
> +
> +       /* Statistics. */
> +       if (uio->uio_td)
> +               uio->uio_td->td_ru.ru_msgrcv++;
> +
> +       /* Fill uio until full or current end of socket buffer is reached. */
> +       len = min(uio->uio_resid, sb->sb_cc);
> +       if (mp0 != NULL) {
> +               /* Dequeue as many mbufs as possible. */
> +               if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
> +                       for (*mp0 = m = sb->sb_mb;
> +                            m != NULL && m->m_len <= len;
> +                            m = m->m_next) {
> +                               len -= m->m_len;
> +                               uio->uio_resid -= m->m_len;
> +                               sbfree(sb, m);
> +                               n = m;
> +                       }
> +                       sb->sb_mb = m;
> +                       if (sb->sb_mb == NULL)
> +                               SB_EMPTY_FIXUP(sb);
> +                       n->m_next = NULL;
> +               }
> +               /* Copy the remainder. */
> +               if (len > 0) {
> +                       KASSERT(sb->sb_mb != NULL,
> +                           ("%s: len > 0 && sb->sb_mb empty", __func__));
> +
> +                       m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
> +                       if (m == NULL)
> +                               len = 0;        /* Don't flush data from sockbuf. */
> +                       else
> +                               uio->uio_resid -= m->m_len;
> +                       if (*mp0 != NULL)
> +                               n->m_next = m;
> +                       else
> +                               *mp0 = m;
> +                       if (*mp0 == NULL) {
> +                               error = ENOBUFS;
> +                               goto out;
> +                       }
> +               }
> +       } else {
> +               /* NB: Must unlock socket buffer as uiomove may sleep. */
> +               SOCKBUF_UNLOCK(sb);
> +               error = m_mbuftouio(uio, sb->sb_mb, len);
> +               SOCKBUF_LOCK(sb);
> +               if (error)
> +                       goto out;
> +       }
> +       SBLASTRECORDCHK(sb);
> +       SBLASTMBUFCHK(sb);
> +
> +       /*
> +        * Remove the delivered data from the socket buffer unless we
> +        * were only peeking.
> +        */
> +       if (!(flags & MSG_PEEK)) {
> +               if (len > 0)
> +                       sbdrop_locked(sb, len);
> +
> +               /* Notify protocol that we drained some data. */
> +               if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
> +                   (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
> +                    !(flags & MSG_SOCALLBCK))) {
> +                       SOCKBUF_UNLOCK(sb);
> +                       (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
> +                       SOCKBUF_LOCK(sb);
> +               }
> +       }
> +
> +       /*
> +        * For MSG_WAITALL we may have to loop again and wait for
> +        * more data to come in.
> +        */
> +       if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
> +               goto restart;
> +out:
> +       SOCKBUF_LOCK_ASSERT(sb);
> +       SBLASTRECORDCHK(sb);
> +       SBLASTMBUFCHK(sb);
> +       SOCKBUF_UNLOCK(sb);
> +       sbunlock(sb);
> +       return (error);
> +}
> +
> +/*
>  * Optimized version of soreceive() for simple datagram cases from userspace.
>  * Unlike in the stream case, we're able to drop a datagram if copyout()
>  * fails, and because we handle datagrams atomically, we don't need to use a
>
> Modified: head/sys/netinet/tcp_usrreq.c
> ==============================================================================
> --- head/sys/netinet/tcp_usrreq.c       Mon Jun 22 22:54:44 2009        (r194671)
> +++ head/sys/netinet/tcp_usrreq.c       Mon Jun 22 23:08:05 2009        (r194672)
> @@ -1032,6 +1032,9 @@ struct pr_usrreqs tcp_usrreqs = {
>        .pru_send =             tcp_usr_send,
>        .pru_shutdown =         tcp_usr_shutdown,
>        .pru_sockaddr =         in_getsockaddr,
> +#if 0
> +       .pru_soreceive =        soreceive_stream,
> +#endif
>        .pru_sosetlabel =       in_pcbsosetlabel,
>        .pru_close =            tcp_usr_close,
>  };
> @@ -1053,6 +1056,9 @@ struct pr_usrreqs tcp6_usrreqs = {
>        .pru_send =             tcp_usr_send,
>        .pru_shutdown =         tcp_usr_shutdown,
>        .pru_sockaddr =         in6_mapped_sockaddr,
> +#if 0
> +       .pru_soreceive =        soreceive_stream,
> +#endif
>        .pru_sosetlabel =       in_pcbsosetlabel,
>        .pru_close =            tcp_usr_close,
>  };
>
> Modified: head/sys/sys/socketvar.h
> ==============================================================================
> --- head/sys/sys/socketvar.h    Mon Jun 22 22:54:44 2009        (r194671)
> +++ head/sys/sys/socketvar.h    Mon Jun 22 23:08:05 2009        (r194672)
> @@ -345,6 +345,9 @@ int sopoll_generic(struct socket *so, in
>            struct ucred *active_cred, struct thread *td);
>  int    soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
>            struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
> +int    soreceive_stream(struct socket *so, struct sockaddr **paddr,
> +           struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
> +           int *flagsp);
>  int    soreceive_dgram(struct socket *so, struct sockaddr **paddr,
>            struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
>            int *flagsp);
>



-- 
When bad men combine, the good must associate; else they will fall one
by one, an unpitied sacrifice in a contemptible struggle.

    Edmund Burke


More information about the svn-src-all mailing list