Flow ID, LACP, and igb
Alan Somers
asomers at freebsd.org
Wed Aug 28 23:42:59 UTC 2013
On Mon, Aug 26, 2013 at 2:40 PM, Andre Oppermann <andre at freebsd.org> wrote:
> On 26.08.2013 19:18, Justin T. Gibbs wrote:
>
>> Hi Net,
>>
>> I'm an infrequent traveler through the networking code and would
>> appreciate some feedback on some proposed solutions to issues Spectra
>> has seen with outbound LACP traffic.
>>
>> lacp_select_tx_port() uses the flow ID if it is available in the outbound
>> mbuf to select the outbound port. The igb driver uses the msix queue of
>> the inbound packet to set a packet's flow ID. This doesn't provide enough
>> bits of information to yield a high quality flow ID. If, for example, the
>> switch controlling inbound packet distribution does a poor job, the
>> outbound
>> packet distribution will also be poorly distributed.
>>
>
> Please note that inbound and outbound flow ID do not need to be the same
> or symmetric. It only should stay the same for all packets in a single
> connection to prevent reordering.
>
> Generally it doesn't matter if in- and outbound packets do not use the
> same queue. Only in sophisticated setups with full affinity, which we
> don't support yet, it could matter.
>
>
> The majority of the adapters supported by this driver will compute
>> the Toeplitz RSS hash. Using this data seems to work quite well
>> in our tests (3 member LAGG group). Is there any reason we shouldn't
>> use the RSS hash for flow ID?
>>
>
> Using the RSS hash is the idea. The infrastructure and driver adjustments
> haven't been implemented throughout yet.
>
>
> We also tried disabling the use of flow ID and doing the hash directly in
>> the driver. Unfortunately, the current hash is pretty weak. It
>> multiplies
>> by 33, which yield very poor distributions if you need to mod the result
>> by 3 (e.g. LAGG group with 3 members). Alan modified the driver to use
>> the FNV hash, which is already in the kernel, and this yielded much better
>> results. He is still benchmarking the impact of this change. Assuming we
>> can get decent flow ID data, this should only impact outbound UDP, since
>> the
>> stack doesn't provide a flow ID in this case.
>>
>> Are there other checksums we should be looking at in addition to FNV?
>>
>
> siphash24() is fast, keyed and strong.
>
I benchmarked hash32 (the existing hash function) vs fnv_hash using both
TCP and UDP, with 1500 and 9000 byte MTUs. At 10Gbps, I couldn't measure
any difference in either throughput or cpu utilization. Given that
siphash24 is definitely slower than hash32, there's no way that I'll find
it to be significantly faster than fnv_hash for this application. In fact,
I'm guessing that it will be slower due to the function call overhead and
the fact that lagg_hashmbuf calls the hash function on very short buffers.
Therefore I'm going to commit the change using fnv_hash in the next few
days if no one objects. Here's the diff:
==== //SpectraBSD/stable/sys/net/ieee8023ad_lacp.c#4 (text) ====
@@ -763,7 +763,6 @@
sc->sc_psc = (caddr_t)lsc;
lsc->lsc_softc = sc;
- lsc->lsc_hashkey = arc4random();
lsc->lsc_active_aggregator = NULL;
LACP_LOCK_INIT(lsc);
TAILQ_INIT(&lsc->lsc_aggregators);
@@ -841,7 +840,7 @@
if (sc->use_flowid && (m->m_flags & M_FLOWID))
hash = m->m_pkthdr.flowid;
else
- hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+ hash = lagg_hashmbuf(sc, m);
hash %= pm->pm_count;
lp = pm->pm_map[hash];
==== //SpectraBSD/stable/sys/net/ieee8023ad_lacp.h#2 (text) ====
@@ -244,7 +244,6 @@
LIST_HEAD(, lacp_port) lsc_ports;
struct lacp_portmap lsc_pmap[2];
volatile u_int lsc_activemap;
- u_int32_t lsc_hashkey;
};
#define LACP_TYPE_ACTORINFO 1
==== //SpectraBSD/stable/sys/net/if_lagg.c#9 (text) ====
@@ -35,7 +35,7 @@
#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/proc.h>
-#include <sys/hash.h>
+#include <sys/fnv_hash.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/taskqueue.h>
@@ -1588,10 +1588,10 @@
}
uint32_t
-lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
+lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m)
{
uint16_t etype;
- uint32_t p = key;
+ uint32_t p = FNV1_32_INIT;
int off;
struct ether_header *eh;
const struct ether_vlan_header *vlan;
@@ -1622,13 +1622,13 @@
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
if (sc->sc_flags & LAGG_F_HASHL2) {
- p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
- p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
}
/* Special handling for encapsulating VLAN frames */
if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
- p = hash32_buf(&m->m_pkthdr.ether_vtag,
+ p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
sizeof(m->m_pkthdr.ether_vtag), p);
} else if (etype == ETHERTYPE_VLAN) {
vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
@@ -1636,7 +1636,7 @@
goto out;
if (sc->sc_flags & LAGG_F_HASHL2)
- p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
etype = ntohs(vlan->evl_proto);
off += sizeof(*vlan) - sizeof(*eh);
}
@@ -1649,8 +1649,8 @@
goto out;
if (sc->sc_flags & LAGG_F_HASHL3) {
- p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
- p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ p = fnv_32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = fnv_32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
}
if (!(sc->sc_flags & LAGG_F_HASHL4))
break;
@@ -1665,7 +1665,7 @@
ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
if (ports == NULL)
break;
- p = hash32_buf(ports, sizeof(*ports), p);
+ p = fnv_32_buf(ports, sizeof(*ports), p);
break;
}
break;
@@ -1678,10 +1678,10 @@
if (ip6 == NULL)
goto out;
- p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
- p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+ p = fnv_32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+ p = fnv_32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
- p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
+ p = fnv_32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
break;
#endif
}
@@ -1904,7 +1904,7 @@
if (sc->use_flowid && (m->m_flags & M_FLOWID))
p = m->m_pkthdr.flowid;
else
- p = lagg_hashmbuf(sc, m, lb->lb_key);
+ p = lagg_hashmbuf(sc, m);
p %= sc->sc_count;
lp = lb->lb_ports[p];
==== //SpectraBSD/stable/sys/net/if_lagg.h#5 (text) ====
@@ -262,7 +262,7 @@
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *);
SYSCTL_DECL(_net_link_lagg);
> --
> Andre
>
>
More information about the freebsd-net
mailing list