svn commit: r268029 - head/sys/dev/ixgbe

Adrian Chadd adrian at freebsd.org
Mon Jun 30 04:39:34 UTC 2014


Reviewed by: jfv, gnn

On 29 June 2014 21:38, Adrian Chadd <adrian at freebsd.org> wrote:
> Author: adrian
> Date: Mon Jun 30 04:38:29 2014
> New Revision: 268029
> URL: http://svnweb.freebsd.org/changeset/base/268029
>
> Log:
>   Add initial RSS awareness to the ixgbe(4) driver.
>
>   The ixgbe(4) hardware is capable of RSS hashing RX packets and doing RSS
>   queue selection for up to 8 queues.
>
>   However, even if multi-queue is enabled for ixgbe(4), the RX path doesn't use
>   the RSS flowid from the received descriptor.  It just uses the MSIX queue id.
>
>   This patch does a handful of things if RSS is enabled:
>
>   * Instead of using a random key at boot, fetch the RSS key from the RSS code
>     and program that in to the RSS redirection table.
>
>     That whole chunk of code should be double checked for endian correctness.
>
>   * Use the RSS queue mapping to CPU ID to figure out where to thread pin
>     the RX swi thread and the taskqueue threads for each queue.
>
>   * The software queue is now really an "RSS bucket".
>
>   * When programming the RSS indirection table, use the RSS code to
>     figure out which RSS bucket each slot in the indirection table maps
>     to.
>
>   * When transmitting, use the flowid RSS mapping if the mbuf has
>     an RSS aware hash.  The existing method wasn't guaranteed to align
>     correctly with the destination RSS bucket (and thus CPU ID.)
>
>   This code warns if the number of RSS buckets isn't the same as the
>   automatically configured number of hardware queues.  The administrator
>   will have to tweak one of them for better performance.
>
>   There's currently no way to re-balance the RSS indirection table after
>   startup.  I'll worry about that later.
>
>   Additionally, it may be worthwhile to always use the full 32 bit flowid if
>   multi-queue is enabled.  It'll make things like lagg(4) behave better with
>   respect to traffic distribution.
>
> Modified:
>   head/sys/dev/ixgbe/ixgbe.c
>
> Modified: head/sys/dev/ixgbe/ixgbe.c
> ==============================================================================
> --- head/sys/dev/ixgbe/ixgbe.c  Mon Jun 30 04:34:59 2014        (r268028)
> +++ head/sys/dev/ixgbe/ixgbe.c  Mon Jun 30 04:38:29 2014        (r268029)
> @@ -35,8 +35,13 @@
>
>  #include "opt_inet.h"
>  #include "opt_inet6.h"
> +#include "opt_rss.h"
>  #include "ixgbe.h"
>
> +#ifdef RSS
> +#include <netinet/in_rss.h>
> +#endif
> +
>  /*********************************************************************
>   *  Set this to one to display debug statistics
>   *********************************************************************/
> @@ -809,12 +814,33 @@ ixgbe_mq_start(struct ifnet *ifp, struct
>         struct ix_queue *que;
>         struct tx_ring  *txr;
>         int             i, err = 0;
> +#ifdef RSS
> +       uint32_t bucket_id;
> +#endif
>
>         /* Which queue to use */
> -       if ((m->m_flags & M_FLOWID) != 0)
> -               i = m->m_pkthdr.flowid % adapter->num_queues;
> -       else
> +       /*
> +        * When doing RSS, map it to the same outbound queue
> +        * as the incoming flow would be mapped to.
> +        *
> +        * If everything is setup correctly, it should be the
> +        * same bucket that the current CPU we're on is.
> +        */
> +       if ((m->m_flags & M_FLOWID) != 0) {
> +#ifdef RSS
> +               if (rss_hash2bucket(m->m_pkthdr.flowid,
> +                   M_HASHTYPE_GET(m), &bucket_id) == 0) {
> +                       /* XXX TODO: spit out something if bucket_id > num_queues? */
> +                       i = bucket_id % adapter->num_queues;
> +               } else {
> +#endif
> +                       i = m->m_pkthdr.flowid % adapter->num_queues;
> +#ifdef RSS
> +               }
> +#endif
> +       } else {
>                 i = curcpu % adapter->num_queues;
> +       }
>
>         txr = &adapter->tx_rings[i];
>         que = &adapter->queues[i];
> @@ -2338,6 +2364,31 @@ ixgbe_allocate_msix(struct adapter *adap
>         struct          ix_queue *que = adapter->queues;
>         struct          tx_ring *txr = adapter->tx_rings;
>         int             error, rid, vector = 0;
> +       int             cpu_id;
> +
> +#ifdef RSS
> +       /*
> +        * If we're doing RSS, the number of queues needs to
> +        * match the number of RSS buckets that are configured.
> +        *
> +        * + If there's more queues than RSS buckets, we'll end
> +        *   up with queues that get no traffic.
> +        *
> +        * + If there's more RSS buckets than queues, we'll end
> +        *   up having multiple RSS buckets map to the same queue,
> +        *   so there'll be some contention.
> +        */
> +       if (adapter->num_queues != rss_getnumbuckets()) {
> +               device_printf(dev,
> +                   "%s: number of queues (%d) != number of RSS buckets (%d)"
> +                   "; performance will be impacted.\n",
> +                   __func__,
> +                   adapter->num_queues,
> +                   rss_getnumbuckets());
> +       }
> +#endif
> +
> +
>
>         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
>                 rid = vector + 1;
> @@ -2362,12 +2413,37 @@ ixgbe_allocate_msix(struct adapter *adap
>  #endif
>                 que->msix = vector;
>                 adapter->que_mask |= (u64)(1 << que->msix);
> +#ifdef RSS
>                 /*
> -               ** Bind the msix vector, and thus the
> -               ** ring to the corresponding cpu.
> -               */
> +                * The queue ID is used as the RSS layer bucket ID.
> +                * We look up the queue ID -> RSS CPU ID and select
> +                * that.
> +                */
> +               cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#else
> +               /*
> +                * Bind the msix vector, and thus the
> +                * rings to the corresponding cpu.
> +                *
> +                * This just happens to match the default RSS round-robin
> +                * bucket -> queue -> CPU allocation.
> +                */
>                 if (adapter->num_queues > 1)
> -                       bus_bind_intr(dev, que->res, i);
> +                       cpu_id = i;
> +#endif
> +               if (adapter->num_queues > 1)
> +                       bus_bind_intr(dev, que->res, cpu_id);
> +
> +#ifdef RSS
> +               device_printf(dev,
> +                   "Bound RSS bucket %d to CPU %d\n",
> +                   i, cpu_id);
> +#else
> +               device_printf(dev,
> +                   "Bound queue %d to cpu %d\n",
> +                   i, cpu_id);
> +#endif
> +
>
>  #ifndef IXGBE_LEGACY_TX
>                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
> @@ -2375,8 +2451,16 @@ ixgbe_allocate_msix(struct adapter *adap
>                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
>                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
>                     taskqueue_thread_enqueue, &que->tq);
> +#ifdef RSS
> +               taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> +                   cpu_id,
> +                   "%s (bucket %d)",
> +                   device_get_nameunit(adapter->dev),
> +                   cpu_id);
> +#else
>                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
>                     device_get_nameunit(adapter->dev));
> +#endif
>         }
>
>         /* and Link */
> @@ -2450,6 +2534,11 @@ ixgbe_setup_msix(struct adapter *adapter
>
>         /* Figure out a reasonable auto config value */
>         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
> +#ifdef RSS
> +       /* If we're doing RSS, clamp at the number of RSS buckets */
> +       if (queues > rss_getnumbuckets())
> +               queues = rss_getnumbuckets();
> +#endif
>
>         if (ixgbe_num_queues != 0)
>                 queues = ixgbe_num_queues;
> @@ -4107,6 +4196,65 @@ fail:
>         return (ENOBUFS);
>  }
>
> +static void
> +ixgbe_initialise_rss_mapping(struct adapter *adapter)
> +{
> +       struct ixgbe_hw *hw = &adapter->hw;
> +       uint32_t reta;
> +       int i, j, queue_id;
> +       uint32_t rss_key[10];
> +       uint32_t mrqc;
> +
> +       /* Setup RSS */
> +       reta = 0;
> +
> +#ifdef RSS
> +       /* Fetch the configured RSS key */
> +       rss_getkey((uint8_t *) &rss_key);
> +#else
> +       /* set up random bits */
> +       arc4rand(&rss_key, sizeof(rss_key), 0);
> +#endif
> +
> +       /* Set up the redirection table */
> +       for (i = 0, j = 0; i < 128; i++, j++) {
> +               if (j == adapter->num_queues) j = 0;
> +#ifdef RSS
> +               /*
> +                * Fetch the RSS bucket id for the given indirection entry.
> +                * Cap it at the number of configured buckets (which is
> +                * num_queues.)
> +                */
> +               queue_id = rss_get_indirection_to_bucket(i);
> +               queue_id = queue_id % adapter->num_queues;
> +#else
> +               queue_id = (j * 0x11);
> +#endif
> +               /* XXX endian? */
> +               reta = (reta << 8) | queue_id;
> +               if ((i & 3) == 3)
> +                       IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
> +       }
> +
> +       /* Now fill our hash function seeds */
> +       for (int i = 0; i < 10; i++)
> +               IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
> +
> +       /* Perform hash on these packet types */
> +       mrqc = IXGBE_MRQC_RSSEN
> +            | IXGBE_MRQC_RSS_FIELD_IPV4
> +            | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
> +            | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
> +            | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
> +            | IXGBE_MRQC_RSS_FIELD_IPV6_EX
> +            | IXGBE_MRQC_RSS_FIELD_IPV6
> +            | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
> +            | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
> +            | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
> +       IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +}
> +
> +
>  /*********************************************************************
>   *
>   *  Setup receive registers and features.
> @@ -4123,7 +4271,7 @@ ixgbe_initialize_receive_units(struct ad
>         struct ixgbe_hw *hw = &adapter->hw;
>         struct ifnet   *ifp = adapter->ifp;
>         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
> -       u32             reta, mrqc = 0, hlreg, random[10];
> +       u32             hlreg;
>
>
>         /*
> @@ -4195,39 +4343,9 @@ ixgbe_initialize_receive_units(struct ad
>
>         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
>
> -       /* Setup RSS */
> -       if (adapter->num_queues > 1) {
> -               int i, j;
> -               reta = 0;
> -
> -               /* set up random bits */
> -               arc4rand(&random, sizeof(random), 0);
> -
> -               /* Set up the redirection table */
> -               for (i = 0, j = 0; i < 128; i++, j++) {
> -                       if (j == adapter->num_queues) j = 0;
> -                       reta = (reta << 8) | (j * 0x11);
> -                       if ((i & 3) == 3)
> -                               IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
> -               }
> -
> -               /* Now fill our hash function seeds */
> -               for (int i = 0; i < 10; i++)
> -                       IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
> -
> -               /* Perform hash on these packet types */
> -               mrqc = IXGBE_MRQC_RSSEN
> -                    | IXGBE_MRQC_RSS_FIELD_IPV4
> -                    | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
> -                    | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6_EX
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
> -                    | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
> -               IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +       ixgbe_initialise_rss_mapping(adapter);
>
> +       if (adapter->num_queues > 1) {
>                 /* RSS and RX IPP Checksum are mutually exclusive */
>                 rxcsum |= IXGBE_RXCSUM_PCSD;
>         }
> @@ -4400,6 +4518,7 @@ ixgbe_rxeof(struct ix_queue *que)
>         u16                     count = rxr->process_limit;
>         union ixgbe_adv_rx_desc *cur;
>         struct ixgbe_rx_buf     *rbuf, *nbuf;
> +       u16                     pkt_info;
>
>         IXGBE_RX_LOCK(rxr);
>
> @@ -4424,6 +4543,7 @@ ixgbe_rxeof(struct ix_queue *que)
>
>                 cur = &rxr->rx_base[i];
>                 staterr = le32toh(cur->wb.upper.status_error);
> +               pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
>
>                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
>                         break;
> @@ -4556,9 +4676,44 @@ ixgbe_rxeof(struct ix_queue *que)
>                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
>                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
>  #if __FreeBSD_version >= 800000
> +#ifdef RSS
> +                       sendmp->m_pkthdr.flowid =
> +                           le32toh(cur->wb.lower.hi_dword.rss);
> +                       sendmp->m_flags |= M_FLOWID;
> +                       switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
> +                       case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
> +                               break;
> +                       case IXGBE_RXDADV_RSSTYPE_IPV4:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
> +                               break;
> +                       case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
> +                               break;
> +                       case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
> +                               break;
> +                       case IXGBE_RXDADV_RSSTYPE_IPV6:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
> +                               break;
> +                       case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
> +                               break;
> +                       /* XXX no UDP support in RSS just yet */
> +#ifdef notyet
> +                       case IGXBE_RXDADV_RSSTYPE_IPV4_UDP:
> +                       case IGXBE_RXDADV_RSSTYPE_IPV6_UDP:
> +                       case IGXBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
> +#endif /* notyet */
> +                       default:
> +                               /* XXX fallthrough */
> +                               M_HASHTYPE_SET(sendmp, M_HASHTYPE_NONE);
> +                       }
> +#else /* RSS */
>                         sendmp->m_pkthdr.flowid = que->msix;
>                         sendmp->m_flags |= M_FLOWID;
> -#endif
> +#endif /* RSS */
> +#endif /* FreeBSD_version */
>                 }
>  next_desc:
>                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
>


More information about the svn-src-all mailing list