Flow ID, LACP, and igb

Alan Somers asomers at freebsd.org
Wed Aug 28 23:42:59 UTC 2013


On Mon, Aug 26, 2013 at 2:40 PM, Andre Oppermann <andre at freebsd.org> wrote:

> On 26.08.2013 19:18, Justin T. Gibbs wrote:
>
>> Hi Net,
>>
>> I'm an infrequent traveler through the networking code and would
>> appreciate some feedback on some proposed solutions to issues Spectra
>> has seen with outbound LACP traffic.
>>
>> lacp_select_tx_port() uses the flow ID if it is available in the outbound
>> mbuf to select the outbound port.  The igb driver uses the msix queue of
>> the inbound packet to set a packet's flow ID.  This doesn't provide enough
>> bits of information to yield a high quality flow ID.  If, for example, the
>> switch controlling inbound packet distribution does a poor job, the
>> outbound
>> packet distribution will also be poorly distributed.
>>
>
> Please note that inbound and outbound flow ID do not need to be the same
> or symmetric.  It only should stay the same for all packets in a single
> connection to prevent reordering.
>
> Generally it doesn't matter if in- and outbound packets do not use the
> same queue.  Only in sophisticated setups with full affinity, which we
> don't support yet, it could matter.
>
>
>  The majority of the adapters supported by this driver will compute
>> the Toeplitz RSS hash.  Using this data seems to work quite well
>> in our tests (3 member LAGG group).  Is there any reason we shouldn't
>> use the RSS hash for flow ID?
>>
>
> Using the RSS hash is the idea.  The infrastructure and driver adjustments
> haven't been implemented throughout yet.
>
>
>  We also tried disabling the use of flow ID and doing the hash directly in
>> the driver.  Unfortunately, the current hash is pretty weak.  It
>> multiplies
>> by 33, which yield very poor distributions if you need to mod the result
>> by 3 (e.g. LAGG group with 3 members).  Alan modified the driver to use
>> the FNV hash, which is already in the kernel, and this yielded much better
>> results.  He is still benchmarking the impact of this change.  Assuming we
>> can get decent flow ID data, this should only impact outbound UDP, since
>> the
>> stack doesn't provide a flow ID in this case.
>>
>> Are there other checksums we should be looking at in addition to FNV?
>>
>
> siphash24() is fast, keyed and strong.
>
I benchmarked hash32 (the existing hash function) vs fnv_hash using both
TCP and UDP, with 1500 and 9000 byte MTUs.  At 10Gbps, I couldn't measure
any difference in either throughput or cpu utilization.  Given that
siphash24 is definitely slower than hash32, there's no way that I'll find
it to be significantly faster than fnv_hash for this application.  In fact,
I'm guessing that it will be slower due to the function call overhead and
the fact that lagg_hashmbuf calls the hash function on very short buffers.
Therefore I'm going to commit the change using fnv_hash in the next few
days if no one objects.  Here's the diff:

==== //SpectraBSD/stable/sys/net/ieee8023ad_lacp.c#4 (text) ====

@@ -763,7 +763,6 @@
     sc->sc_psc = (caddr_t)lsc;
     lsc->lsc_softc = sc;

-    lsc->lsc_hashkey = arc4random();
     lsc->lsc_active_aggregator = NULL;
     LACP_LOCK_INIT(lsc);
     TAILQ_INIT(&lsc->lsc_aggregators);
@@ -841,7 +840,7 @@
     if (sc->use_flowid && (m->m_flags & M_FLOWID))
         hash = m->m_pkthdr.flowid;
     else
-        hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+        hash = lagg_hashmbuf(sc, m);
     hash %= pm->pm_count;
     lp = pm->pm_map[hash];


==== //SpectraBSD/stable/sys/net/ieee8023ad_lacp.h#2 (text) ====

@@ -244,7 +244,6 @@
     LIST_HEAD(, lacp_port)    lsc_ports;
     struct lacp_portmap    lsc_pmap[2];
     volatile u_int        lsc_activemap;
-    u_int32_t        lsc_hashkey;
 };

 #define    LACP_TYPE_ACTORINFO    1

==== //SpectraBSD/stable/sys/net/if_lagg.c#9 (text) ====

@@ -35,7 +35,7 @@
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
-#include <sys/hash.h>
+#include <sys/fnv_hash.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/taskqueue.h>
@@ -1588,10 +1588,10 @@
 }

 uint32_t
-lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
+lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m)
 {
     uint16_t etype;
-    uint32_t p = key;
+    uint32_t p = FNV1_32_INIT;
     int off;
     struct ether_header *eh;
     const struct ether_vlan_header *vlan;
@@ -1622,13 +1622,13 @@
     eh = mtod(m, struct ether_header *);
     etype = ntohs(eh->ether_type);
     if (sc->sc_flags & LAGG_F_HASHL2) {
-        p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
-        p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+        p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+        p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
     }

     /* Special handling for encapsulating VLAN frames */
     if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
-        p = hash32_buf(&m->m_pkthdr.ether_vtag,
+        p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
             sizeof(m->m_pkthdr.ether_vtag), p);
     } else if (etype == ETHERTYPE_VLAN) {
         vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
@@ -1636,7 +1636,7 @@
             goto out;

         if (sc->sc_flags & LAGG_F_HASHL2)
-            p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+            p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
         etype = ntohs(vlan->evl_proto);
         off += sizeof(*vlan) - sizeof(*eh);
     }
@@ -1649,8 +1649,8 @@
             goto out;

         if (sc->sc_flags & LAGG_F_HASHL3) {
-            p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
-            p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+            p = fnv_32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+            p = fnv_32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
         }
         if (!(sc->sc_flags & LAGG_F_HASHL4))
             break;
@@ -1665,7 +1665,7 @@
                 ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
                 if (ports == NULL)
                     break;
-                p = hash32_buf(ports, sizeof(*ports), p);
+                p = fnv_32_buf(ports, sizeof(*ports), p);
                 break;
         }
         break;
@@ -1678,10 +1678,10 @@
         if (ip6 == NULL)
             goto out;

-        p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
-        p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+        p = fnv_32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+        p = fnv_32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
         flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
-        p = hash32_buf(&flow, sizeof(flow), p);    /* IPv6 flow label */
+        p = fnv_32_buf(&flow, sizeof(flow), p);    /* IPv6 flow label */
         break;
 #endif
     }
@@ -1904,7 +1904,7 @@
     if (sc->use_flowid && (m->m_flags & M_FLOWID))
         p = m->m_pkthdr.flowid;
     else
-        p = lagg_hashmbuf(sc, m, lb->lb_key);
+        p = lagg_hashmbuf(sc, m);
     p %= sc->sc_count;
     lp = lb->lb_ports[p];


==== //SpectraBSD/stable/sys/net/if_lagg.h#5 (text) ====

@@ -262,7 +262,7 @@
 extern void    (*lagg_linkstate_p)(struct ifnet *, int );

 int        lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t    lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+uint32_t    lagg_hashmbuf(struct lagg_softc *, struct mbuf *);

 SYSCTL_DECL(_net_link_lagg);



> --
> Andre
>
>


More information about the freebsd-net mailing list