lagg/lacp poor traffic distribution

Gleb Kurtsou gleb.kurtsou at gmail.com
Sun Dec 26 18:17:56 UTC 2010


On (19/12/2010 00:35), Eugene Grosbein wrote:
> Hi!
> 
> I've loaded router using two lagg interfaces in LACP mode.
> lagg0 has IP address and two ports (em0 and em1) and carry untagged frames.
> lagg1 has no IP address and has two ports (igb0 and igb1) and carry
> about 1000 dot-q vlans with lots of hosts in each vlan.
> 
> For lagg1, lagg distributes outgoing traffic over two ports just fine.
> For lagg0 (untagged ethernet segment with only 2 MAC addresses)
> less than 0.07% (54Mbit/s max) of traffic goes to em0
> and over 99.92% goes to em1, that's bad.
> 
> That's general traffic of several thousands of customers surfing the web,
> using torrents etc.  I've glanced over lagg/lacp sources if src/sys/net/
> and found nothing suspicious, it should extract and use srcIP/dstIP for hash.
> 
> How do I debug this problem?
Could you try the patch attached. It changes hash function for
distributing traffic.

Thanks,
Gleb.
> 
> Eugene Grosbein
> _______________________________________________
> freebsd-net at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe at freebsd.org"
-------------- next part --------------
--- /dev/null
+++ b/sys/sys/hash_sfh.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 2010, Paul Hsieh
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. My name, Paul Hsieh, and the names of any other contributors to
+ *    the code use may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_HASH_SFH_H_
+#define	_SYS_HASH_SFH_H_
+#include <sys/types.h>
+
+static __inline uint32_t
+hash_sfh_buf(const void *buf, size_t len, uint32_t hash)
+{
+	const uint8_t *data = buf;
+	uint32_t tmp;
+	int rem;
+
+	if (len <= 0 || data == NULL)
+		return (0);
+
+	rem = len & 3;
+	len >>= 2;
+
+#define	get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
+		+(uint32_t)(((const uint8_t *)(d))[0]) )
+
+	/* Main loop */
+	for (;len > 0; len--) {
+		hash  += get16bits(data);
+		tmp    = (get16bits(data + 2) << 11) ^ hash;
+		hash   = (hash << 16) ^ tmp;
+		data  += 2 * sizeof(uint16_t);
+		hash  += hash >> 11;
+	}
+
+	/* Handle end cases */
+	switch (rem) {
+	case 3: hash += get16bits(data);
+		hash ^= hash << 16;
+		hash ^= data[sizeof(uint16_t)] << 18;
+		hash += hash >> 11;
+		break;
+	case 2: hash += get16bits(data);
+		hash ^= hash << 11;
+		hash += hash >> 17;
+		break;
+	case 1: hash += *data;
+		hash ^= hash << 10;
+		hash += hash >> 1;
+	}
+#undef get16bits
+
+	/* Force "avalanching" of final 127 bits */
+	hash ^= hash << 3;
+	hash += hash >> 5;
+	hash ^= hash << 4;
+	hash += hash >> 17;
+	hash ^= hash << 25;
+	hash += hash >> 6;
+
+	return (hash);
+}
+#endif /* !_SYS_HASH_SFH_H_ */
--- a/sys/net/if_lagg.c
+++ b/sys/net/if_lagg.c
@@ -35,7 +35,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
-#include <sys/hash.h>
+#include <sys/hash_sfh.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/taskqueue.h>
@@ -1414,19 +1414,19 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key)
 		goto out;
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
-	p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
-	p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+	p = hash_sfh_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
+	p = hash_sfh_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
 
 	/* Special handling for encapsulating VLAN frames */
 	if (m->m_flags & M_VLANTAG) {
-		p = hash32_buf(&m->m_pkthdr.ether_vtag,
+		p = hash_sfh_buf(&m->m_pkthdr.ether_vtag,
 		    sizeof(m->m_pkthdr.ether_vtag), p);
 	} else if (etype == ETHERTYPE_VLAN) {
 		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &vlanbuf);
 		if (vlan == NULL)
 			goto out;
 
-		p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+		p = hash_sfh_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
 		etype = ntohs(vlan->evl_proto);
 		off += sizeof(*vlan) - sizeof(*eh);
 	}
@@ -1438,8 +1438,8 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key)
 		if (ip == NULL)
 			goto out;
 
-		p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
-		p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+		p = hash_sfh_buf(&ip->ip_src, sizeof(struct in_addr), p);
+		p = hash_sfh_buf(&ip->ip_dst, sizeof(struct in_addr), p);
 		break;
 #endif
 #ifdef INET6
@@ -1448,10 +1448,10 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key)
 		if (ip6 == NULL)
 			goto out;
 
-		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
-		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+		p = hash_sfh_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+		p = hash_sfh_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
 		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
-		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
+		p = hash_sfh_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
 		break;
 #endif
 	}


More information about the freebsd-net mailing list