svn commit: r347055 - in head: sbin/ifconfig sys/net

Andrew Gallatin gallatin at FreeBSD.org
Fri May 3 14:43:24 UTC 2019


Author: gallatin
Date: Fri May  3 14:43:21 2019
New Revision: 347055
URL: https://svnweb.freebsd.org/changeset/base/347055

Log:
  Select lacp egress ports based on NUMA domain
  
  This change creates an array of port maps indexed by numa domain
  for lacp port selection. If we have lacp interfaces in more than
  one domain, then we select the egress port by indexing into the
  numa port maps and picking a port on the appropriate numa domain.
  
  This is behavior is controlled by the new ifconfig use_numa flag
  and net.link.lagg.use_numa sysctl/tunable (both modeled after the
  existing use_flowid), which default to enabled.
  
  Reviewed by:	bz, hselasky, markj (and scottl, earlier version)
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D20060

Modified:
  head/sbin/ifconfig/ifconfig.8
  head/sbin/ifconfig/iflagg.c
  head/sys/net/ieee8023ad_lacp.c
  head/sys/net/ieee8023ad_lacp.h
  head/sys/net/if_lagg.c
  head/sys/net/if_lagg.h

Modified: head/sbin/ifconfig/ifconfig.8
==============================================================================
--- head/sbin/ifconfig/ifconfig.8	Fri May  3 13:06:46 2019	(r347054)
+++ head/sbin/ifconfig/ifconfig.8	Fri May  3 14:43:21 2019	(r347055)
@@ -28,7 +28,7 @@
 .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
 .\" $FreeBSD$
 .\"
-.Dd June 27, 2018
+.Dd May 3, 2019
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
@@ -2497,6 +2497,22 @@ Use the RSS hash from the network card if available.
 Set a shift parameter for RSS local hash computation.
 Hash is calculated by using flowid bits in a packet header mbuf
 which are shifted by the number of this parameter.
+.It Cm use_numa
+Enable selection of egress ports based on the native
+.Xr NUMA 4
+domain for the packets being transmitted.
+This is currently only implemented for lacp mode.
+This works only on
+.Xr NUMA 4
+hardware, running a kernel compiled with the
+.Xr NUMA 4
+option, and when interfaces from multiple
+.Xr NUMA 4
+domains are ports of the aggregation interface.
+.It Cm -use_numa
+Disable selection of egress ports based on the native
+.Xr NUMA 4
+domain for the packets being transmitted.
 .It Cm lacp_fast_timeout
 Enable lacp fast-timeout on the interface.
 .It Cm -lacp_fast_timeout

Modified: head/sbin/ifconfig/iflagg.c
==============================================================================
--- head/sbin/ifconfig/iflagg.c	Fri May  3 13:06:46 2019	(r347054)
+++ head/sbin/ifconfig/iflagg.c	Fri May  3 14:43:21 2019	(r347055)
@@ -130,6 +130,8 @@ setlaggsetopt(const char *val, int d, int s, const str
 	switch (ro.ro_opts) {
 	case LAGG_OPT_USE_FLOWID:
 	case -LAGG_OPT_USE_FLOWID:
+	case LAGG_OPT_USE_NUMA:
+	case -LAGG_OPT_USE_NUMA:
 	case LAGG_OPT_LACP_STRICT:
 	case -LAGG_OPT_LACP_STRICT:
 	case LAGG_OPT_LACP_TXTEST:
@@ -303,6 +305,8 @@ static struct cmd lagg_cmds[] = {
 	DEF_CMD_ARG("lagghash",		setlagghash),
 	DEF_CMD("use_flowid",	LAGG_OPT_USE_FLOWID,	setlaggsetopt),
 	DEF_CMD("-use_flowid",	-LAGG_OPT_USE_FLOWID,	setlaggsetopt),
+	DEF_CMD("use_numa",	LAGG_OPT_USE_NUMA,	setlaggsetopt),
+	DEF_CMD("-use_numa",	-LAGG_OPT_USE_NUMA,	setlaggsetopt),
 	DEF_CMD("lacp_strict",	LAGG_OPT_LACP_STRICT,	setlaggsetopt),
 	DEF_CMD("-lacp_strict",	-LAGG_OPT_LACP_STRICT,	setlaggsetopt),
 	DEF_CMD("lacp_txtest",	LAGG_OPT_LACP_TXTEST,	setlaggsetopt),

Modified: head/sys/net/ieee8023ad_lacp.c
==============================================================================
--- head/sys/net/ieee8023ad_lacp.c	Fri May  3 13:06:46 2019	(r347054)
+++ head/sys/net/ieee8023ad_lacp.c	Fri May  3 14:43:21 2019	(r347055)
@@ -835,7 +835,9 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	struct lacp_portmap *pm;
 	struct lacp_port *lp;
+	struct lacp_port **map;
 	uint32_t hash;
+	int count;
 
 	if (__predict_false(lsc->lsc_suppress_distributing)) {
 		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
@@ -848,14 +850,32 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf
 		return (NULL);
 	}
 
+#ifdef NUMA
+	if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
+	    pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
+		count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+		if (count > 0) {
+			map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+		} else {
+			/* No ports on this domain; use global hash. */
+			map = pm->pm_map;
+			count = pm->pm_count;
+		}
+	} else
+#endif
+	{
+		map = pm->pm_map;
+		count = pm->pm_count;
+	}
 	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
 		hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
-	hash %= pm->pm_count;
-	lp = pm->pm_map[hash];
 
+	hash %= count;
+	lp = map[hash];
+
 	KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
 	    ("aggregated port is not distributing"));
 
@@ -1044,6 +1064,10 @@ lacp_update_portmap(struct lacp_softc *lsc)
 	uint64_t speed;
 	u_int newmap;
 	int i;
+#ifdef NUMA
+	int count;
+	uint8_t domain;
+#endif
 
 	newmap = lsc->lsc_activemap == 0 ? 1 : 0;
 	p = &lsc->lsc_pmap[newmap];
@@ -1054,9 +1078,25 @@ lacp_update_portmap(struct lacp_softc *lsc)
 	if (la != NULL && la->la_nports > 0) {
 		p->pm_count = la->la_nports;
 		i = 0;
-		TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
+		TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
 			p->pm_map[i++] = lp;
+#ifdef NUMA
+			domain = lp->lp_ifp->if_numa_domain;
+			if (domain >= MAXMEMDOM)
+				continue;
+			count = p->pm_numa[domain].count;
+			p->pm_numa[domain].map[count] = lp;
+			p->pm_numa[domain].count++;
+#endif
+		}
 		KASSERT(i == p->pm_count, ("Invalid port count"));
+
+#ifdef NUMA
+		for (i = 0; i < MAXMEMDOM; i++) {
+			if (p->pm_numa[i].count != 0)
+				p->pm_num_dom++;
+		}
+#endif
 		speed = lacp_aggregator_bandwidth(la);
 	}
 	sc->sc_ifp->if_baudrate = speed;

Modified: head/sys/net/ieee8023ad_lacp.h
==============================================================================
--- head/sys/net/ieee8023ad_lacp.h	Fri May  3 13:06:46 2019	(r347054)
+++ head/sys/net/ieee8023ad_lacp.h	Fri May  3 14:43:21 2019	(r347055)
@@ -197,8 +197,15 @@ enum lacp_mux_state {
 
 #define	LACP_MAX_PORTS		32
 
+struct lacp_numa {
+	int			count;
+	struct lacp_port	*map[LACP_MAX_PORTS];
+};
+
 struct lacp_portmap {
 	int			pm_count;
+	int			pm_num_dom;
+	struct lacp_numa	pm_numa[MAXMEMDOM];
 	struct lacp_port	*pm_map[LACP_MAX_PORTS];
 };
 

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Fri May  3 13:06:46 2019	(r347054)
+++ head/sys/net/if_lagg.c	Fri May  3 14:43:21 2019	(r347055)
@@ -264,6 +264,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowi
     &VNET_NAME(def_use_flowid), 0,
     "Default setting for using flow id for load sharing");
 
+/* Default value for using numa */
+VNET_DEFINE_STATIC(int, def_use_numa) = 1;
+#define	V_def_use_numa	VNET(def_use_numa)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
+    &VNET_NAME(def_use_numa), 0,
+    "Use numa to steer flows");
+
 /* Default value for flowid shift */
 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
 #define	V_def_flowid_shift	VNET(def_flowid_shift)
@@ -491,6 +498,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
 	LAGG_XLOCK(sc);
 	if (V_def_use_flowid)
 		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+	if (V_def_use_numa)
+		sc->sc_opts |= LAGG_OPT_USE_NUMA;
 	sc->flowid_shift = V_def_flowid_shift;
 
 	/* Hash all layers by default */
@@ -1247,6 +1256,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
 		switch (ro->ro_opts) {
 		case LAGG_OPT_USE_FLOWID:
 		case -LAGG_OPT_USE_FLOWID:
+		case LAGG_OPT_USE_NUMA:
+		case -LAGG_OPT_USE_NUMA:
 		case LAGG_OPT_FLOWIDSHIFT:
 			valid = 1;
 			lacp = 0;

Modified: head/sys/net/if_lagg.h
==============================================================================
--- head/sys/net/if_lagg.h	Fri May  3 13:06:46 2019	(r347054)
+++ head/sys/net/if_lagg.h	Fri May  3 14:43:21 2019	(r347055)
@@ -143,6 +143,7 @@ struct lagg_reqopts {
 #define	LAGG_OPT_USE_FLOWID		0x01		/* enable use of flowid */
 /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
 #define	LAGG_OPT_FLOWIDSHIFT		0x02		/* set flowid shift */
+#define	LAGG_OPT_USE_NUMA		0x04		/* enable use of numa */
 #define	LAGG_OPT_FLOWIDSHIFT_MASK	0x1f		/* flowid is uint32_t */
 #define	LAGG_OPT_LACP_STRICT		0x10		/* LACP strict mode */
 #define	LAGG_OPT_LACP_TXTEST		0x20		/* LACP debug: txtest */
@@ -158,8 +159,9 @@ struct lagg_reqopts {
 #define	SIOCGLAGGOPTS		_IOWR('i', 152, struct lagg_reqopts)
 #define	SIOCSLAGGOPTS		 _IOW('i', 153, struct lagg_reqopts)
 
-#define	LAGG_OPT_BITS		"\020\001USE_FLOWID\005LACP_STRICT" \
-				"\006LACP_TXTEST\007LACP_RXTEST"
+#define	LAGG_OPT_BITS		"\020\001USE_FLOWID\003USE_NUMA" \
+				"\005LACP_STRICT\006LACP_TXTEST" \
+				"\007LACP_RXTEST"
 
 #ifdef _KERNEL
 


More information about the svn-src-head mailing list