svn commit: r268028 - head/sys/dev/e1000
Adrian Chadd
adrian at freebsd.org
Mon Jun 30 04:39:27 UTC 2014
Reviewed by: jfv, gnn
On 29 June 2014 21:34, Adrian Chadd <adrian at freebsd.org> wrote:
> Author: adrian
> Date: Mon Jun 30 04:34:59 2014
> New Revision: 268028
> URL: http://svnweb.freebsd.org/changeset/base/268028
>
> Log:
> Add initial RSS awareness to the igb(4) driver.
>
> The igb(4) hardware is capable of RSS hashing RX packets and doing RSS
> queue selection for up to 8 queues. (I believe some hardware is limited
> to 4 queues, but I haven't tested on that.)
>
> However, even if multi-queue is enabled for igb(4), the RX path doesn't use
> the RSS flowid from the received descriptor. It just uses the MSIX queue id.
>
> This patch does a handful of things if RSS is enabled:
>
> * Instead of using a random key at boot, fetch the RSS key from the RSS code
> and program that in to the RSS redirection table.
>
> That whole chunk of code should be double checked for endian correctness.
>
> * Use the RSS queue mapping to CPU ID to figure out where to thread pin
> the RX swi thread and the taskqueue threads for each queue.
>
> * The software queue is now really an "RSS bucket".
>
> * When programming the RSS indirection table, use the RSS code to
> figure out which RSS bucket each slot in the indirection table maps
> to.
>
> * When transmitting, use the flowid RSS mapping if the mbuf has
> an RSS aware hash. The existing method wasn't guaranteed to align
> correctly with the destination RSS bucket (and thus CPU ID.)
>
> This code warns if the number of RSS buckets isn't the same as the
> automatically configured number of hardware queues. The administrator
> will have to tweak one of them for better performance.
>
> There's currently no way to re-balance the RSS indirection table after
> startup. I'll worry about that later.
>
> Additionally, it may be worthwhile to always use the full 32 bit flowid if
> multi-queue is enabled. It'll make things like lagg(4) behave better with
> respect to traffic distribution.
>
> Modified:
> head/sys/dev/e1000/if_igb.c
>
> Modified: head/sys/dev/e1000/if_igb.c
> ==============================================================================
> --- head/sys/dev/e1000/if_igb.c Mon Jun 30 04:26:29 2014 (r268027)
> +++ head/sys/dev/e1000/if_igb.c Mon Jun 30 04:34:59 2014 (r268028)
> @@ -35,6 +35,7 @@
>
> #include "opt_inet.h"
> #include "opt_inet6.h"
> +#include "opt_rss.h"
>
> #ifdef HAVE_KERNEL_OPTION_HEADERS
> #include "opt_device_polling.h"
> @@ -84,6 +85,9 @@
> #include <netinet/tcp.h>
> #include <netinet/tcp_lro.h>
> #include <netinet/udp.h>
> +#ifdef RSS
> +#include <netinet/in_rss.h>
> +#endif
>
> #include <machine/in_cksum.h>
> #include <dev/led/led.h>
> @@ -967,12 +971,33 @@ igb_mq_start(struct ifnet *ifp, struct m
> struct igb_queue *que;
> struct tx_ring *txr;
> int i, err = 0;
> +#ifdef RSS
> + uint32_t bucket_id;
> +#endif
>
> /* Which queue to use */
> - if ((m->m_flags & M_FLOWID) != 0)
> - i = m->m_pkthdr.flowid % adapter->num_queues;
> - else
> + /*
> + * When doing RSS, map it to the same outbound queue
> + * as the incoming flow would be mapped to.
> + *
> + * If everything is setup correctly, it should be the
> + * same bucket that the current CPU we're on is.
> + */
> + if ((m->m_flags & M_FLOWID) != 0) {
> +#ifdef RSS
> + if (rss_hash2bucket(m->m_pkthdr.flowid,
> + M_HASHTYPE_GET(m), &bucket_id) == 0) {
> + /* XXX TODO: spit out something if bucket_id > num_queues? */
> + i = bucket_id % adapter->num_queues;
> + } else {
> +#endif
> + i = m->m_pkthdr.flowid % adapter->num_queues;
> +#ifdef RSS
> + }
> +#endif
> + } else {
> i = curcpu % adapter->num_queues;
> + }
> txr = &adapter->tx_rings[i];
> que = &adapter->queues[i];
>
> @@ -2433,11 +2458,34 @@ igb_allocate_msix(struct adapter *adapte
> device_t dev = adapter->dev;
> struct igb_queue *que = adapter->queues;
> int error, rid, vector = 0;
> + int cpu_id;
>
> /* Be sure to start with all interrupts disabled */
> E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
> E1000_WRITE_FLUSH(&adapter->hw);
>
> +#ifdef RSS
> + /*
> + * If we're doing RSS, the number of queues needs to
> + * match the number of RSS buckets that are configured.
> + *
> + * + If there's more queues than RSS buckets, we'll end
> + * up with queues that get no traffic.
> + *
> + * + If there's more RSS buckets than queues, we'll end
> + * up having multiple RSS buckets map to the same queue,
> + * so there'll be some contention.
> + */
> + if (adapter->num_queues != rss_getnumbuckets()) {
> + device_printf(dev,
> + "%s: number of queues (%d) != number of RSS buckets (%d)"
> + "; performance will be impacted.\n",
> + __func__,
> + adapter->num_queues,
> + rss_getnumbuckets());
> + }
> +#endif
> +
> for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
> rid = vector +1;
> que->res = bus_alloc_resource_any(dev,
> @@ -2464,19 +2512,42 @@ igb_allocate_msix(struct adapter *adapte
> que->eims = E1000_EICR_TX_QUEUE0 << i;
> else
> que->eims = 1 << vector;
> +
> +#ifdef RSS
> /*
> - ** Bind the msix vector, and thus the
> - ** rings to the corresponding cpu.
> - */
> + * The queue ID is used as the RSS layer bucket ID.
> + * We look up the queue ID -> RSS CPU ID and select
> + * that.
> + */
> + cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#else
> + /*
> + * Bind the msix vector, and thus the
> + * rings to the corresponding cpu.
> + *
> + * This just happens to match the default RSS round-robin
> + * bucket -> queue -> CPU allocation.
> + */
> if (adapter->num_queues > 1) {
> if (igb_last_bind_cpu < 0)
> igb_last_bind_cpu = CPU_FIRST();
> - bus_bind_intr(dev, que->res, igb_last_bind_cpu);
> + cpu_id = igb_last_bind_cpu;
> + }
> +#endif
> +
> + if (adapter->num_queues > 1) {
> + bus_bind_intr(dev, que->res, cpu_id);
> +#ifdef RSS
> + device_printf(dev,
> + "Bound queue %d to RSS bucket %d\n",
> + i, cpu_id);
> +#else
> device_printf(dev,
> "Bound queue %d to cpu %d\n",
> - i,igb_last_bind_cpu);
> - igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
> + i, cpu_id);
> +#endif
> }
> +
> #ifndef IGB_LEGACY_TX
> TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
> que->txr);
> @@ -2485,8 +2556,34 @@ igb_allocate_msix(struct adapter *adapte
> TASK_INIT(&que->que_task, 0, igb_handle_que, que);
> que->tq = taskqueue_create("igb_que", M_NOWAIT,
> taskqueue_thread_enqueue, &que->tq);
> - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> - device_get_nameunit(adapter->dev));
> + if (adapter->num_queues > 1) {
> + /*
> + * Only pin the taskqueue thread to a CPU if
> + * RSS is in use.
> + *
> + * This again just happens to match the default RSS
> + * round-robin bucket -> queue -> CPU allocation.
> + */
> +#ifdef RSS
> + taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> + cpu_id,
> + "%s que (bucket %d)",
> + device_get_nameunit(adapter->dev),
> + cpu_id);
> +#else
> + taskqueue_start_threads(&que->tq, 1, PI_NET,
> + "%s que (qid %d)",
> + device_get_nameunit(adapter->dev),
> + cpu_id);
> +#endif
> + } else {
> + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> + device_get_nameunit(adapter->dev));
> + }
> +
> + /* Finally update the last bound CPU id */
> + if (adapter->num_queues > 1)
> + igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
> }
>
> /* And Link */
> @@ -2763,6 +2860,13 @@ igb_setup_msix(struct adapter *adapter)
> /* Figure out a reasonable auto config value */
> queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
>
> +#ifdef RSS
> + /* If we're doing RSS, clamp at the number of RSS buckets */
> + if (queues > rss_getnumbuckets())
> + queues = rss_getnumbuckets();
> +#endif
> +
> +
> /* Manual override */
> if (igb_num_queues != 0)
> queues = igb_num_queues;
> @@ -4455,6 +4559,99 @@ fail:
> return (ENOBUFS);
> }
>
> +/*
> + * Initialise the RSS mapping for NICs that support multiple transmit/
> + * receive rings.
> + */
> +static void
> +igb_initialise_rss_mapping(struct adapter *adapter)
> +{
> + struct e1000_hw *hw = &adapter->hw;
> + int i;
> + int queue_id;
> +
> + u32 rss_key[10], mrqc, shift = 0;
> + union igb_reta {
> + u32 dword;
> + u8 bytes[4];
> + } reta;
> +
> + /* XXX? */
> + if (adapter->hw.mac.type == e1000_82575)
> + shift = 6;
> +
> + /*
> + * The redirection table controls which destination
> + * queue each bucket redirects traffic to.
> + * Each DWORD represents four queues, with the LSB
> + * being the first queue in the DWORD.
> + *
> + * This just allocates buckets to queues using round-robin
> + * allocation.
> + *
> + * NOTE: It Just Happens to line up with the default
> + * RSS allocation method.
> + */
> +
> + /* Warning FM follows */
> + for (i = 0; i < 128; i++) {
> +#ifdef RSS
> + queue_id = rss_get_indirection_to_bucket(i);
> + /*
> + * If we have more queues than buckets, we'll
> + * end up mapping buckets to a subset of the
> + * queues.
> + *
> + * If we have more buckets than queues, we'll
> + * end up instead assigning multiple buckets
> + * to queues.
> + *
> + * Both are suboptimal, but we need to handle
> + * the case so we don't go out of bounds
> + * indexing arrays and such.
> + */
> + queue_id = queue_id % adapter->num_queues;
> +#else
> + queue_id = (i % adapter->num_queues);
> +#endif
> + reta.bytes[i & 3] = queue_id << shift;
> +
> + if ((i & 3) == 3)
> + E1000_WRITE_REG(hw,
> + E1000_RETA(i >> 2), reta.dword);
> + }
> +
> +
> + /* Now fill in hash table */
> +
> + /* XXX This means RSS enable + 8 queues for my igb (82580.) */
> + mrqc = E1000_MRQC_ENABLE_RSS_4Q;
> +
> +#ifdef RSS
> + /* XXX ew typecasting */
> + rss_getkey((uint8_t *) &rss_key);
> +#else
> + arc4rand(&rss_key, sizeof(rss_key), 0);
> +#endif
> + for (i = 0; i < 10; i++)
> + E1000_WRITE_REG_ARRAY(hw,
> + E1000_RSSRK(0), i, rss_key[i]);
> +
> + /*
> + * Configure the RSS fields to hash upon.
> + */
> + mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
> + E1000_MRQC_RSS_FIELD_IPV4_TCP);
> + mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
> + E1000_MRQC_RSS_FIELD_IPV6_TCP);
> + mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
> + E1000_MRQC_RSS_FIELD_IPV6_UDP);
> + mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
> + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
> +
> + E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
> +}
> +
> /*********************************************************************
> *
> * Enable receive unit.
> @@ -4538,39 +4735,9 @@ igb_initialize_receive_units(struct adap
> */
> rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
> if (adapter->num_queues >1) {
> - u32 random[10], mrqc, shift = 0;
> - union igb_reta {
> - u32 dword;
> - u8 bytes[4];
> - } reta;
> -
> - arc4rand(&random, sizeof(random), 0);
> - if (adapter->hw.mac.type == e1000_82575)
> - shift = 6;
> - /* Warning FM follows */
> - for (int i = 0; i < 128; i++) {
> - reta.bytes[i & 3] =
> - (i % adapter->num_queues) << shift;
> - if ((i & 3) == 3)
> - E1000_WRITE_REG(hw,
> - E1000_RETA(i >> 2), reta.dword);
> - }
> - /* Now fill in hash table */
> - mrqc = E1000_MRQC_ENABLE_RSS_4Q;
> - for (int i = 0; i < 10; i++)
> - E1000_WRITE_REG_ARRAY(hw,
> - E1000_RSSRK(0), i, random[i]);
> -
> - mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
> - E1000_MRQC_RSS_FIELD_IPV4_TCP);
> - mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
> - E1000_MRQC_RSS_FIELD_IPV6_TCP);
> - mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
> - E1000_MRQC_RSS_FIELD_IPV6_UDP);
> - mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
> - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
>
> - E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
> + /* rss setup */
> + igb_initialise_rss_mapping(adapter);
>
> /*
> ** NOTE: Receive Full-Packet Checksum Offload
> @@ -4831,7 +4998,7 @@ igb_rxeof(struct igb_queue *que, int cou
> for (i = rxr->next_to_check; count != 0;) {
> struct mbuf *sendmp, *mh, *mp;
> struct igb_rx_buf *rxbuf;
> - u16 hlen, plen, hdr, vtag;
> + u16 hlen, plen, hdr, vtag, pkt_info;
> bool eop = FALSE;
>
> cur = &rxr->rx_base[i];
> @@ -4853,6 +5020,7 @@ igb_rxeof(struct igb_queue *que, int cou
> else
> vtag = le16toh(cur->wb.upper.vlan);
> hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
> + pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
> eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
>
> /* Make sure all segments of a bad packet are discarded */
> @@ -4952,7 +5120,43 @@ igb_rxeof(struct igb_queue *que, int cou
> rxr->fmp->m_pkthdr.ether_vtag = vtag;
> rxr->fmp->m_flags |= M_VLANTAG;
> }
> -#ifndef IGB_LEGACY_TX
> +#ifdef RSS
> + /* XXX set flowtype once this works right */
> + rxr->fmp->m_pkthdr.flowid =
> + le32toh(cur->wb.lower.hi_dword.rss);
> + rxr->fmp->m_flags |= M_FLOWID;
> + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
> + case E1000_RXDADV_RSSTYPE_IPV4_TCP:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4);
> + break;
> + case E1000_RXDADV_RSSTYPE_IPV4:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4);
> + break;
> + case E1000_RXDADV_RSSTYPE_IPV6_TCP:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6);
> + break;
> + case E1000_RXDADV_RSSTYPE_IPV6_EX:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX);
> + break;
> + case E1000_RXDADV_RSSTYPE_IPV6:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6);
> + break;
> + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
> + break;
> +
> + /* XXX no UDP support in RSS just yet */
> +#ifdef notyet
> + case E1000_RXDADV_RSSTYPE_IPV4_UDP:
> + case E1000_RXDADV_RSSTYPE_IPV6_UDP:
> + case E1000_RXDADV_RSSTYPE_IPV6_UDP_EX:
> +#endif
> +
> + default:
> + /* XXX fallthrough */
> + M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_NONE);
> + }
> +#elif !defined(IGB_LEGACY_TX)
> rxr->fmp->m_pkthdr.flowid = que->msix;
> rxr->fmp->m_flags |= M_FLOWID;
> #endif
>
More information about the svn-src-all
mailing list