svn commit: r368229 - in stable/12: sbin/ifconfig share/man/man4 sys/conf sys/net

Hans Petter Selasky hselasky at FreeBSD.org
Tue Dec 1 13:58:17 UTC 2020


Author: hselasky
Date: Tue Dec  1 13:58:14 2020
New Revision: 368229
URL: https://svnweb.freebsd.org/changeset/base/368229

Log:
  MFC r366933 and r366934:
  Add support for IP over infiniband, IPoIB, to lagg(4). Currently only
  the failover protocol is supported due to limitations in the IPoIB
  architecture. Refer to the lagg(4) manual page for how to configure
  and use this new feature. A new network interface type,
  IFT_INFINIBANDLAG, has been added, similar to the existing
  IFT_IEEE8023ADLAG .
  
  ifconfig(8) has been updated to accept a new laggtype argument when
  creating lagg(4) network interfaces. This new argument is used to
  distinguish between ethernet and infiniband type of lagg(4) network
  interface. The laggtype argument is optional and defaults to
  ethernet. The lagg(4) command line syntax is backwards compatible.
  
  Differential Revision:	https://reviews.freebsd.org/D26254
  Reviewed by:		melifaro@
  Sponsored by:		Mellanox Technologies // NVIDIA Networking

Modified:
  stable/12/sbin/ifconfig/ifconfig.8
  stable/12/sbin/ifconfig/iflagg.c
  stable/12/share/man/man4/lagg.4
  stable/12/sys/conf/files
  stable/12/sys/net/ieee8023ad_lacp.c
  stable/12/sys/net/if_ethersubr.c
  stable/12/sys/net/if_infiniband.c
  stable/12/sys/net/if_lagg.c
  stable/12/sys/net/if_lagg.h
  stable/12/sys/net/if_types.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sbin/ifconfig/ifconfig.8
==============================================================================
--- stable/12/sbin/ifconfig/ifconfig.8	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sbin/ifconfig/ifconfig.8	Tue Dec  1 13:58:14 2020	(r368229)
@@ -28,7 +28,7 @@
 .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
 .\" $FreeBSD$
 .\"
-.Dd November 1, 2020
+.Dd December 1, 2020
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
@@ -2429,6 +2429,12 @@ Set to 0 to disable.
 .Pp
 The following parameters are specific to lagg interfaces:
 .Bl -tag -width indent
+.It Cm laggtype Ar type
+When creating a lagg interface the type can be specified as either
+.Cm ethernet
+or
+.Cm infiniband .
+If not specified ethernet is the default lagg type.
 .It Cm laggport Ar interface
 Add the interface named by
 .Ar interface

Modified: stable/12/sbin/ifconfig/iflagg.c
==============================================================================
--- stable/12/sbin/ifconfig/iflagg.c	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sbin/ifconfig/iflagg.c	Tue Dec  1 13:58:14 2020	(r368229)
@@ -30,8 +30,12 @@ static const char rcsid[] =
 
 #include "ifconfig.h"
 
-char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+static struct iflaggparam params = {
+	.lagg_type = LAGG_TYPE_DEFAULT,
+};
 
+static char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+
 static void
 setlaggport(const char *val, int d, int s, const struct afswtch *afp)
 {
@@ -299,7 +303,31 @@ lagg_status(int s)
 	}
 }
 
+static
+DECL_CMD_FUNC(setlaggtype, arg, d)
+{
+	static const struct lagg_types lt[] = LAGG_TYPES;
+	int i;
+
+	for (i = 0; i < nitems(lt); i++) {
+		if (strcmp(arg, lt[i].lt_name) == 0) {
+			params.lagg_type = lt[i].lt_value;
+			return;
+		}
+	}
+	errx(1, "invalid lagg type: %s", arg);
+}
+
+static void
+lagg_create(int s, struct ifreq *ifr)
+{
+	ifr->ifr_data = (caddr_t) ¶ms;
+	if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
+		err(1, "SIOCIFCREATE2");
+}
+
 static struct cmd lagg_cmds[] = {
+	DEF_CLONE_CMD_ARG("laggtype",   setlaggtype),
 	DEF_CMD_ARG("laggport",		setlaggport),
 	DEF_CMD_ARG("-laggport",	unsetlaggport),
 	DEF_CMD_ARG("laggproto",	setlaggproto),
@@ -331,4 +359,5 @@ lagg_ctor(void)
 	for (i = 0; i < nitems(lagg_cmds);  i++)
 		cmd_register(&lagg_cmds[i]);
 	af_register(&af_lagg);
+	clone_setdefcallback("lagg", lagg_create);
 }

Modified: stable/12/share/man/man4/lagg.4
==============================================================================
--- stable/12/share/man/man4/lagg.4	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/share/man/man4/lagg.4	Tue Dec  1 13:58:14 2020	(r368229)
@@ -16,7 +16,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 18, 2017
+.Dd October 21, 2020
 .Dt LAGG 4
 .Os
 .Sh NAME
@@ -192,6 +192,15 @@ device will be used:
 .Pp
 (Note the mac address of the wireless device is forced to match the wired
 device as a workaround.)
+.Pp
+The following example shows how to create an infiniband failover interface.
+.Bd -literal -offset indent
+# ifconfig ib0 up
+# ifconfig ib1 up
+# ifconfig lagg0 create laggtype infiniband
+# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e
+	1.1.1.1 netmask 255.255.255.0
+.Ed
 .Sh SEE ALSO
 .Xr ng_one2many 4 ,
 .Xr ifconfig 8 ,

Modified: stable/12/sys/conf/files
==============================================================================
--- stable/12/sys/conf/files	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/conf/files	Tue Dec  1 13:58:14 2020	(r368229)
@@ -4578,7 +4578,7 @@ compat/lindebugfs/lindebugfs.c			optional lindebugfs \
 	compile-with "${LINUXKPI_C}"
 
 # OpenFabrics Enterprise Distribution (Infiniband)
-net/if_infiniband.c					optional ofed
+net/if_infiniband.c					optional ofed | lagg
 ofed/drivers/infiniband/core/ib_addr.c			optional ofed	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/core/ib_agent.c			optional ofed	\

Modified: stable/12/sys/net/ieee8023ad_lacp.c
==============================================================================
--- stable/12/sys/net/ieee8023ad_lacp.c	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/ieee8023ad_lacp.c	Tue Dec  1 13:58:14 2020	(r368229)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/ethernet.h>
+#include <net/infiniband.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 

Modified: stable/12/sys/net/if_ethersubr.c
==============================================================================
--- stable/12/sys/net/if_ethersubr.c	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/if_ethersubr.c	Tue Dec  1 13:58:14 2020	(r368229)
@@ -110,7 +110,7 @@ void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
-struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
+struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -566,9 +566,9 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
-		KASSERT(lagg_input_p != NULL,
+		KASSERT(lagg_input_ethernet_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
-		m = (*lagg_input_p)(ifp, m);
+		m = (*lagg_input_ethernet_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {

Modified: stable/12/sys/net/if_infiniband.c
==============================================================================
--- stable/12/sys/net/if_infiniband.c	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/if_infiniband.c	Tue Dec  1 13:58:14 2020	(r368229)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
+#include <net/if_lagg.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
@@ -58,10 +59,13 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
+
 #ifdef INET
 static inline void
-infiniband_ipv4_multicast_map(
-    uint32_t addr, const uint8_t *broadcast, uint8_t *buf)
+infiniband_ipv4_multicast_map(uint32_t addr,
+    const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
@@ -93,8 +97,8 @@ infiniband_ipv4_multicast_map(
 
 #ifdef INET6
 static inline void
-infiniband_ipv6_multicast_map(
-    const struct in6_addr *addr, const uint8_t *broadcast, uint8_t *buf)
+infiniband_ipv6_multicast_map(const struct in6_addr *addr,
+    const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
@@ -344,6 +348,16 @@ infiniband_input(struct ifnet *ifp, struct mbuf *m)
 	/* Direct packet to correct FIB based on interface config. */
 	M_SETFIB(m, ifp->if_fib);
 
+	/* Handle input from a lagg<N> port */
+	if (ifp->if_type == IFT_INFINIBANDLAG) {
+		KASSERT(lagg_input_infiniband_p != NULL,
+		    ("%s: if_lagg not loaded!", __func__));
+		m = (*lagg_input_infiniband_p)(ifp, m);
+		if (__predict_false(m == NULL))
+			goto done;
+		ifp = m->m_pkthdr.rcvif;
+	}
+
 	/*
 	 * Dispatch frame to upper layer.
 	 */
@@ -392,8 +406,8 @@ done:
 }
 
 static int
-infiniband_resolvemulti(
-    struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa)
+infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+    struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET

Modified: stable/12/sys/net/if_lagg.c
==============================================================================
--- stable/12/sys/net/if_lagg.c	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/if_lagg.c	Tue Dec  1 13:58:14 2020	(r368229)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <net/bpf.h>
 #include <net/route.h>
 #include <net/vnet.h>
+#include <net/infiniband.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
@@ -125,7 +126,8 @@ static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link A
 static void	lagg_capabilities(struct lagg_softc *);
 static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
 static int	lagg_port_destroy(struct lagg_port *, int);
-static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_linkstate(struct lagg_softc *);
 static void	lagg_port_state(struct ifnet *, int);
 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
@@ -151,7 +153,8 @@ static	int	lagg_setflag(struct lagg_port *, int, int,
 		    int (*func)(struct ifnet *, int));
 static	int	lagg_setflags(struct lagg_port *, int status);
 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
-static int	lagg_transmit(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_qflush(struct ifnet *);
 static int	lagg_media_change(struct ifnet *);
 static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
@@ -307,7 +310,8 @@ lagg_modevent(module_t mod, int type, void *data)
 
 	switch (type) {
 	case MOD_LOAD:
-		lagg_input_p = lagg_input;
+		lagg_input_ethernet_p = lagg_input_ethernet;
+		lagg_input_infiniband_p = lagg_input_infiniband;
 		lagg_linkstate_p = lagg_port_state;
 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, lagg_port_ifdetach, NULL,
@@ -316,7 +320,8 @@ lagg_modevent(module_t mod, int type, void *data)
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    lagg_detach_cookie);
-		lagg_input_p = NULL;
+		lagg_input_ethernet_p = NULL;
+		lagg_input_infiniband_p = NULL;
 		lagg_linkstate_p = NULL;
 		break;
 	default:
@@ -333,6 +338,7 @@ static moduledata_t lagg_mod = {
 
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_lagg, 1);
+MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
 
 static void
 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
@@ -484,18 +490,48 @@ lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_i
 static int
 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
+	struct iflaggparam iflp;
 	struct lagg_softc *sc;
 	struct ifnet *ifp;
-	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	int if_type;
+	int error;
+	static const uint8_t eaddr[LAGG_ADDR_LEN];
+	static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = {
+		0x00, 0xff, 0xff, 0xff,
+		0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
+	};
 
+	if (params != NULL) {
+		error = copyin(params, &iflp, sizeof(iflp));
+		if (error)
+			return (error);
+
+		switch (iflp.lagg_type) {
+		case LAGG_TYPE_ETHERNET:
+			if_type = IFT_ETHER;
+			break;
+		case LAGG_TYPE_INFINIBAND:
+			if_type = IFT_INFINIBAND;
+			break;
+		default:
+			return (EINVAL);
+		}
+	} else {
+		if_type = IFT_ETHER;
+	}
+
 	sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
-	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+	ifp = sc->sc_ifp = if_alloc(if_type);
 	if (ifp == NULL) {
 		free(sc, M_LAGG);
 		return (ENOSPC);
 	}
 	LAGG_SX_INIT(sc);
 
+	mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
+	callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
+
 	LAGG_XLOCK(sc);
 	if (V_def_use_flowid)
 		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
@@ -508,15 +544,25 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
 
 	CK_SLIST_INIT(&sc->sc_ports);
 
-	/* Initialise pseudo media types */
-	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
-	    lagg_media_status);
-	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
-	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+	switch (if_type) {
+	case IFT_ETHER:
+		/* Initialise pseudo media types */
+		ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+		    lagg_media_status);
+		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+		ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
 
-	if_initname(ifp, laggname, unit);
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_ethernet;
+		break;
+	case IFT_INFINIBAND:
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_infiniband;
+		break;
+	default:
+		break;
+	}
 	ifp->if_softc = sc;
-	ifp->if_transmit = lagg_transmit;
 	ifp->if_qflush = lagg_qflush;
 	ifp->if_init = lagg_init;
 	ifp->if_ioctl = lagg_ioctl;
@@ -529,9 +575,18 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
 
 	/*
 	 * Attach as an ordinary ethernet device, children will be attached
-	 * as special device IFT_IEEE8023ADLAG.
+	 * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
 	 */
-	ether_ifattach(ifp, eaddr);
+	switch (if_type) {
+	case IFT_ETHER:
+		ether_ifattach(ifp, eaddr);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifattach(ifp, eaddr, ib_bcast_addr);
+		break;
+	default:
+		break;
+	}
 
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
@@ -569,14 +624,24 @@ lagg_clone_destroy(struct ifnet *ifp)
 	lagg_proto_detach(sc);
 	LAGG_XUNLOCK(sc);
 
-	ifmedia_removeall(&sc->sc_media);
-	ether_ifdetach(ifp);
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+		ifmedia_removeall(&sc->sc_media);
+		ether_ifdetach(ifp);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifdetach(ifp);
+		break;
+	default:
+		break;
+	}
 	if_free(ifp);
 
 	LAGG_LIST_LOCK();
 	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
 	LAGG_LIST_UNLOCK();
 
+	mtx_destroy(&sc->sc_mtx);
 	LAGG_SX_DESTROY(sc);
 	free(sc, M_LAGG);
 }
@@ -643,6 +708,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 	struct lagg_port *lp, *tlp;
 	struct ifreq ifr;
 	int error, i, oldmtu;
+	int if_type;
 	uint64_t *pval;
 
 	LAGG_XLOCK_ASSERT(sc);
@@ -666,9 +732,22 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 		return (EBUSY);
 	}
 
-	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
-	if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
-		return (EPROTONOSUPPORT);
+	switch (sc->sc_ifp->if_type) {
+	case IFT_ETHER:
+		/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+		if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_IEEE8023ADLAG;
+		break;
+	case IFT_INFINIBAND:
+		/* XXX Disallow non-infiniband interfaces */
+		if (ifp->if_type != IFT_INFINIBAND)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_INFINIBANDLAG;
+		break;
+	default:
+		break;
+	}
 
 	/* Allow the first Ethernet member to define the MTU */
 	oldmtu = -1;
@@ -725,14 +804,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 	if_ref(ifp);
 	lp->lp_ifp = ifp;
 
-	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
 	lp->lp_ifcapenable = ifp->if_capenable;
 	if (CK_SLIST_EMPTY(&sc->sc_ports)) {
-		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 		lagg_proto_lladdr(sc);
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	} else {
-		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 	}
 	lagg_setflags(lp, 1);
 
@@ -741,7 +820,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 
 	/* Change the interface type */
 	lp->lp_iftype = ifp->if_type;
-	ifp->if_type = IFT_IEEE8023ADLAG;
+	ifp->if_type = if_type;
 	ifp->if_lagg = lp;
 	lp->lp_ioctl = ifp->if_ioctl;
 	ifp->if_ioctl = lagg_port_ioctl;
@@ -859,15 +938,15 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 
 	/* Update the primary interface */
 	if (lp == sc->sc_primary) {
-		uint8_t lladdr[ETHER_ADDR_LEN];
+		uint8_t lladdr[LAGG_ADDR_LEN];
 
 		if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
-			bzero(&lladdr, ETHER_ADDR_LEN);
+			bzero(&lladdr, LAGG_ADDR_LEN);
 		else
-			bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
+			bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
 		sc->sc_primary = lp0;
 		if (sc->sc_destroying == 0) {
-			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
 			lagg_proto_lladdr(sc);
 			EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 		}
@@ -877,7 +956,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 		 * as well, to switch from old lladdr to its 'real' one)
 		 */
 		CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
-			if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
+			if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen);
 	}
 
 	if (lp->lp_ifflags)
@@ -886,7 +965,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 	if (lp->lp_detaching == 0) {
 		lagg_setflags(lp, 0);
 		lagg_setcaps(lp, lp->lp_ifcapenable);
-		if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
+		if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
 	}
 
 	/*
@@ -910,9 +989,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t
 	int error = 0;
 
 	/* Should be checked by the caller */
-	if (ifp->if_type != IFT_IEEE8023ADLAG ||
-	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+	switch (ifp->if_type) {
+	case IFT_IEEE8023ADLAG:
+	case IFT_INFINIBANDLAG:
+		if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+			goto fallback;
+		break;
+	default:
 		goto fallback;
+	}
 
 	switch (cmd) {
 	case SIOCGLAGGPORT:
@@ -1104,6 +1189,44 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqpor
 }
 
 static void
+lagg_watchdog_infiniband(void *arg)
+{
+	struct lagg_softc *sc;
+	struct lagg_port *lp;
+	struct ifnet *ifp;
+	struct ifnet *lp_ifp;
+
+	sc = arg;
+
+	/*
+	 * Because infiniband nodes have a fixed MAC address, which is
+	 * generated by the so-called GID, we need to regularly update
+	 * the link level address of the parent lagg<N> device when
+	 * the active port changes. Possibly we could piggy-back on
+	 * link up/down events aswell, but using a timer also provides
+	 * a guarantee against too frequent events. This operation
+	 * does not have to be atomic.
+	 */
+	LAGG_RLOCK();
+	lp = lagg_link_active(sc, sc->sc_primary);
+	if (lp != NULL) {
+		ifp = sc->sc_ifp;
+		lp_ifp = lp->lp_ifp;
+
+		if (ifp != NULL && lp_ifp != NULL &&
+		    memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) {
+			memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
+			CURVNET_SET(ifp->if_vnet);
+			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+			CURVNET_RESTORE();
+		}
+	}
+	LAGG_RUNLOCK();
+
+	callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
+}
+
+static void
 lagg_init(void *xsc)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)xsc;
@@ -1125,12 +1248,18 @@ lagg_init(void *xsc)
 	 */
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
-		    ETHER_ADDR_LEN) != 0)
-			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+		    ifp->if_addrlen) != 0)
+			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
 	}
 
 	lagg_proto_init(sc);
 
+	if (ifp->if_type == IFT_INFINIBAND) {
+		mtx_lock(&sc->sc_mtx);
+		lagg_watchdog_infiniband(sc);
+		mtx_unlock(&sc->sc_mtx);
+	}
+
 	LAGG_XUNLOCK(sc);
 }
 
@@ -1147,6 +1276,12 @@ lagg_stop(struct lagg_softc *sc)
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	lagg_proto_stop(sc);
+
+	mtx_lock(&sc->sc_mtx);
+	callout_stop(&sc->sc_watchdog);
+	mtx_unlock(&sc->sc_mtx);
+
+	callout_drain(&sc->sc_watchdog);
 }
 
 static int
@@ -1200,7 +1335,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
 			error = EPROTONOSUPPORT;
 			break;
 		}
-
+		/* Infiniband only supports the failover protocol. */
+		if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
+		    ifp->if_type == IFT_INFINIBAND) {
+			error = EPROTONOSUPPORT;
+			break;
+		}
 		LAGG_XLOCK(sc);
 		lagg_proto_detach(sc);
 		LAGG_UNLOCK_ASSERT();
@@ -1516,7 +1656,10 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
-		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+		if (ifp->if_type == IFT_INFINIBAND)
+			error = EINVAL;
+		else
+			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
@@ -1738,7 +1881,7 @@ lagg_setflags(struct lagg_port *lp, int status)
 }
 
 static int
-lagg_transmit(struct ifnet *ifp, struct mbuf *m)
+lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error;
@@ -1763,6 +1906,32 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 	return (error);
 }
 
+static int
+lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	int error;
+
+#if defined(KERN_TLS) || defined(RATELIMIT)
+	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
+	LAGG_RLOCK();
+	/* We need a Tx algorithm and at least one port */
+	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return (ENXIO);
+	}
+
+	INFINIBAND_BPF_MTAP(ifp, m);
+
+	error = lagg_proto_start(sc, m);
+	LAGG_RUNLOCK();
+	return (error);
+}
+
 /*
  * The ifp->if_qflush entry point for lagg(4) is no-op.
  */
@@ -1772,7 +1941,7 @@ lagg_qflush(struct ifnet *ifp __unused)
 }
 
 static struct mbuf *
-lagg_input(struct ifnet *ifp, struct mbuf *m)
+lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
@@ -1788,6 +1957,34 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
 	}
 
 	ETHER_BPF_MTAP(scifp, m);
+
+	m = lagg_proto_input(sc, lp, m);
+	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
+		m_freem(m);
+		m = NULL;
+	}
+
+	LAGG_RUNLOCK();
+	return (m);
+}
+
+static struct mbuf *
+lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_port *lp = ifp->if_lagg;
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *scifp = sc->sc_ifp;
+
+	LAGG_RLOCK();
+	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    lp->lp_detaching != 0 ||
+	    sc->sc_proto == LAGG_PROTO_NONE) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		return (NULL);
+	}
+
+	INFINIBAND_BPF_MTAP(scifp, m);
 
 	m = lagg_proto_input(sc, lp, m);
 	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {

Modified: stable/12/sys/net/if_lagg.h
==============================================================================
--- stable/12/sys/net/if_lagg.h	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/if_lagg.h	Tue Dec  1 13:58:14 2020	(r368229)
@@ -72,7 +72,34 @@ struct lagg_protos {
 	{ "default",		LAGG_PROTO_DEFAULT }			\
 }
 
+/* Supported lagg TYPEs */
+typedef enum {
+	LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */
+	LAGG_TYPE_INFINIBAND,	/* infiniband */
+	LAGG_TYPE_MAX,
+} lagg_type;
+
+struct lagg_types {
+	const char		*lt_name;
+	lagg_type		lt_value;
+};
+
+#define	LAGG_TYPE_DEFAULT	LAGG_TYPE_ETHERNET
+#define LAGG_TYPES	{						\
+	{ "ethernet",		LAGG_TYPE_ETHERNET },			\
+	{ "infiniband",		LAGG_TYPE_INFINIBAND },			\
+}
+
 /*
+ * lagg create clone params
+ */
+struct iflaggparam {
+	uint8_t lagg_type;	/* see LAGG_TYPE_XXX */
+	uint8_t reserved_8[3];
+	uint32_t reserved_32[3];
+};
+
+/*
  * lagg ioctls.
  */
 
@@ -205,7 +232,7 @@ struct lagg_counters {
 
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rmlock			sc_mtx;
+	struct mtx			sc_mtx;		/* watchdog mutex */
 	struct sx			sc_sx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
@@ -229,12 +256,15 @@ struct lagg_softc {
 	u_int				sc_opts;
 	int				flowid_shift;	/* shift the flowid */
 	struct lagg_counters		detached_counters; /* detached ports sum */
+	struct callout			sc_watchdog;	/* watchdog timer */
 };
 
 struct lagg_port {
 	struct ifnet			*lp_ifp;	/* physical interface */
 	struct lagg_softc		*lp_softc;	/* parent lagg */
-	uint8_t				lp_lladdr[ETHER_ADDR_LEN];
+#define	LAGG_ADDR_LEN \
+	MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN)
+	uint8_t				lp_lladdr[LAGG_ADDR_LEN];
 
 	u_char				lp_iftype;	/* interface type */
 	uint32_t			lp_prio;	/* port priority */
@@ -256,7 +286,8 @@ struct lagg_port {
 	struct epoch_context	lp_epoch_ctx;
 };
 
-extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
 int		lagg_enqueue(struct ifnet *, struct mbuf *);

Modified: stable/12/sys/net/if_types.h
==============================================================================
--- stable/12/sys/net/if_types.h	Tue Dec  1 13:47:16 2020	(r368228)
+++ stable/12/sys/net/if_types.h	Tue Dec  1 13:58:14 2020	(r368229)
@@ -242,6 +242,7 @@ typedef enum {
 	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
 	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
 	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
+	IFT_INFINIBANDLAG = 0xc8,	/* Infiniband Link Aggregate */
 	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
 	IFT_STF		= 0xd7,		/* 6to4 interface */
 


More information about the svn-src-all mailing list