svn commit: r325614 - in head/sys/ofed: drivers/infiniband/core include/rdma

Hans Petter Selasky hselasky at FreeBSD.org
Thu Nov 9 19:22:45 UTC 2017


Author: hselasky
Date: Thu Nov  9 19:22:43 2017
New Revision: 325614
URL: https://svnweb.freebsd.org/changeset/base/325614

Log:
  Multiple fixes for using IPv6 link-local addresses with RDMA in ibcore.
  
  1) Fail to resolve RDMA address if rtalloc1() returns the loopback
  device, lo0, as the gateway interface. Currently RDMA loopback is
  not supported.
  
  2) Use ip_dev_find() and ip6_dev_find() to lookup network interfaces
  with matching IPv4 and IPv6 addresses, respectivly.
  
  3) In addr_resolve() make sure the "ifa" pointer is always set, also when
  the "ifp" is NULL. Else a NULL pointer access might happen trying to
  read from the "ifa" pointer later on.
  
  4) In rdma_addr_find_dmac_by_grh() make sure the "bound_dev_if" field
  gets set properly instead of passing the scope ID through the IPv6
  socket address structure. This is more in line with upstream OFED
  in Linux.
  
  5) In rdma_addr_find_smac_by_sgid() there is no need to pass the
  scope ID for IPv6. Either it is stored in the "bound_dev_if" field
  or ip6_dev_find() will find the correct network device regardless
  of the scope ID.
  
  Sponsored by:	Mellanox Technologies
  MFC after:	1 week

Modified:
  head/sys/ofed/drivers/infiniband/core/addr.c
  head/sys/ofed/drivers/infiniband/core/cma.c
  head/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
  head/sys/ofed/drivers/infiniband/core/verbs.c
  head/sys/ofed/include/rdma/ib_addr.h

Modified: head/sys/ofed/drivers/infiniband/core/addr.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/addr.c	Thu Nov  9 19:15:28 2017	(r325613)
+++ head/sys/ofed/drivers/infiniband/core/addr.c	Thu Nov  9 19:22:43 2017	(r325614)
@@ -110,14 +110,6 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, str
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 
-#define	SCOPE_ID_CACHE(_scope_id, _addr6) do {		\
-	(_addr6)->sin6_addr.s6_addr[3] = (_scope_id);	\
-	(_addr6)->sin6_scope_id = 0; } while (0)
-
-#define	SCOPE_ID_RESTORE(_scope_id, _addr6) do {	\
-	(_addr6)->sin6_scope_id = (_scope_id);		\
-	(_addr6)->sin6_addr.s6_addr[3] = 0; } while (0)
-
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
 		      u16 *vlan_id)
 {
@@ -149,34 +141,17 @@ int rdma_translate_ip(struct sockaddr *addr, struct rd
 
 #if defined(INET6)
 	case AF_INET6:
-		{
-			struct sockaddr_in6 *sin6;
-			struct ifaddr *ifa;
-			in_port_t port;
-			uint32_t scope_id;
+		dev = ip6_dev_find(&init_net,
+			((const struct sockaddr_in6 *)addr)->sin6_addr);
 
-			sin6 = (struct sockaddr_in6 *)addr;
-			port = sin6->sin6_port;
-			sin6->sin6_port = 0;
-			scope_id = sin6->sin6_scope_id;
-			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-				SCOPE_ID_CACHE(scope_id, sin6);
-			CURVNET_SET_QUIET(&init_net);
-			ifa = ifa_ifwithaddr(addr);
-			CURVNET_RESTORE();
-			sin6->sin6_port = port;
-			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-				SCOPE_ID_RESTORE(scope_id, sin6);
-			if (ifa == NULL) {
-				ret = -ENODEV;
-				break;
-			}
-			ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
-			if (vlan_id)
-				*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
-			ifa_free(ifa);
-			break;
-		}
+		if (!dev)
+			return ret;
+
+		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		if (vlan_id)
+			*vlan_id = rdma_vlan_dev_vlan_id(dev);
+		dev_put(dev);
+		break;
 #endif
 	default:
 		break;
@@ -222,12 +197,9 @@ static int addr_resolve(struct sockaddr *src_in,
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct rtentry *rte;
-#if defined(INET) || defined(INET6)
-	in_port_t port;
+#if defined(INET6)
+	struct sockaddr_in6 dstv6_tmp;
 #endif
-#ifdef INET6
-	uint32_t scope_id;
-#endif
 	u_char edst[MAX_ADDR_LEN];
 	int multi;
 	int bcast;
@@ -247,11 +219,7 @@ static int addr_resolve(struct sockaddr *src_in,
 	ifp = NULL;
 	rte = NULL;
 	ifa = NULL;
-	ifp = NULL;
 	memset(edst, 0, sizeof(edst));
-#ifdef INET6
-	scope_id = -1U;
-#endif
 
 	switch (dst_in->sa_family) {
 #ifdef INET
@@ -263,29 +231,11 @@ static int addr_resolve(struct sockaddr *src_in,
 			multi = 1;
 		sin = (struct sockaddr_in *)src_in;
 		if (sin->sin_addr.s_addr != INADDR_ANY) {
-			/*
-			 * Address comparison fails if the port is set
-			 * cache it here to be restored later.
-			 */
-			port = sin->sin_port;
-			sin->sin_port = 0;
-			memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
-
-			/*
-			 * If we have a source address to use look it
-			 * up first and verify that it is a local
-			 * interface:
-			 */
-			CURVNET_SET_QUIET(&init_net);
-			ifa = ifa_ifwithaddr(src_in);
-			CURVNET_RESTORE();
-			sin->sin_port = port;
-			if (ifa == NULL) {
+			ifp = ip_dev_find(&init_net, sin->sin_addr.s_addr);
+			if (ifp == NULL) {
 				error = ENETUNREACH;
 				goto done;
 			}
-			ifp = ifa->ifa_ifp;
-			ifa_free(ifa);
 			if (bcast || multi)
 				goto mcast;
 		}
@@ -293,42 +243,26 @@ static int addr_resolve(struct sockaddr *src_in,
 #endif
 #ifdef INET6
 	case AF_INET6:
+		/* Make destination socket address writeable */
+		dstv6_tmp = *(struct sockaddr_in6 *)dst_in;
+		dst_in = (struct sockaddr *)&dstv6_tmp;
 		sin6 = (struct sockaddr_in6 *)dst_in;
 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			multi = 1;
-		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
-			/*
-			 * The IB address comparison fails if the
-			 * scope ID is set and not part of the addr:
-			 */
-			scope_id = sin6->sin6_scope_id;
-			if (scope_id < 256)
-				SCOPE_ID_CACHE(scope_id, sin6);
-		}
+		/*
+		 * Make sure the scope ID gets embedded, else rtalloc1() will
+		 * resolve to the loopback interface.
+		 */
+		sin6->sin6_scope_id = addr->bound_dev_if;
+		sa6_embedscope(sin6, 0);
+
 		sin6 = (struct sockaddr_in6 *)src_in;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-			port = sin6->sin6_port;
-			sin6->sin6_port = 0;
-			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
-				if (scope_id < 256)
-					SCOPE_ID_CACHE(scope_id, sin6);
-			}
-
-			/*
-			 * If we have a source address to use look it
-			 * up first and verify that it is a local
-			 * interface:
-			 */
-			CURVNET_SET_QUIET(&init_net);
-			ifa = ifa_ifwithaddr(src_in);
-			CURVNET_RESTORE();
-			sin6->sin6_port = port;
-			if (ifa == NULL) {
+			ifp = ip6_dev_find(&init_net, sin6->sin6_addr);
+			if (ifp == NULL) {
 				error = ENETUNREACH;
 				goto done;
 			}
-			ifp = ifa->ifa_ifp;
-			ifa_free(ifa);
 			if (bcast || multi)
 				goto mcast;
 		}
@@ -342,9 +276,13 @@ static int addr_resolve(struct sockaddr *src_in,
 	 * Make sure the route exists and has a valid link.
 	 */
 	rte = rtalloc1(dst_in, 1, 0);
-	if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
-		if (rte)
+	if (rte == NULL || rte->rt_ifp == NULL ||
+	    RT_LINK_IS_UP(rte->rt_ifp) == 0 ||
+	    rte->rt_ifp == V_loif) {
+		if (rte != NULL) {
 			RTFREE_LOCKED(rte);
+			rte = NULL;
+		}
 		error = EHOSTUNREACH;
 		goto done;
 	}
@@ -356,20 +294,27 @@ static int addr_resolve(struct sockaddr *src_in,
 	 * correct interface pointer and unlock the route.
 	 */
 	if (multi || bcast) {
+		/* rt_ifa holds the route answer source address */
+		ifa = rte->rt_ifa;
+
 		if (ifp == NULL) {
 			ifp = rte->rt_ifp;
-			/* rt_ifa holds the route answer source address */
-			ifa = rte->rt_ifa;
+			dev_hold(ifp);
 		}
 		RTFREE_LOCKED(rte);
-	} else if (ifp && ifp != rte->rt_ifp) {
+		rte = NULL;
+	} else if (ifp != NULL && ifp != rte->rt_ifp) {
 		RTFREE_LOCKED(rte);
+		rte = NULL;
 		error = ENETUNREACH;
 		goto done;
 	} else {
+		/* rt_ifa holds the route answer source address */
+		ifa = rte->rt_ifa;
+
 		if (ifp == NULL) {
 			ifp = rte->rt_ifp;
-			ifa = rte->rt_ifa;
+			dev_hold(ifp);
 		}
 		RT_UNLOCK(rte);
 	}
@@ -418,23 +363,17 @@ mcast:
 		error = EINVAL;
 		break;
 	}
-	RTFREE(rte);
 done:
 	if (error == 0)
 		error = -rdma_copy_addr(addr, ifp, edst);
 	if (error == 0)
 		memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
-#ifdef INET6
-	if (scope_id < 256) {
-		sin6 = (struct sockaddr_in6 *)src_in;
-		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-			SCOPE_ID_RESTORE(scope_id, sin6);
-		sin6 = (struct sockaddr_in6 *)dst_in;
-		SCOPE_ID_RESTORE(scope_id, sin6);
-	}
-#endif
 	if (error == EWOULDBLOCK)
 		error = ENODATA;
+	if (rte != NULL)
+		RTFREE(rte);
+	if (ifp != NULL)
+		dev_put(ifp);
 
 	CURVNET_RESTORE();
 	return -error;
@@ -567,7 +506,7 @@ static void resolve_cb(int status, struct sockaddr *sr
 }
 
 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
-			       u16 *vlan_id, u32 scope_id)
+			       u16 *vlan_id, int *if_index)
 {
 	int ret = 0;
 	struct rdma_dev_addr dev_addr;
@@ -580,16 +519,17 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, uni
 		struct sockaddr_in6 _sockaddr_in6;
 	} sgid_addr, dgid_addr;
 
-
-	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid, scope_id);
+	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 	if (ret)
 		return ret;
 
-	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid, scope_id);
+	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 	if (ret)
 		return ret;
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
+	if (if_index)
+		dev_addr.bound_dev_if = *if_index;
 
 	ctx.addr = &dev_addr;
 	init_completion(&ctx.comp);
@@ -611,24 +551,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, uni
 }
 EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
 
-u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num)
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 {
-#ifdef INET6
-	struct ifnet *ifp;
-	if (ib->get_netdev == NULL)
-		return (-1U);
-	ifp = ib->get_netdev(ib, port_num);
-	if (ifp == NULL)
-		return (-1U);
-	return (in6_getscopezone(ifp, IPV6_ADDR_SCOPE_LINKLOCAL));
-#else
-	return (-1U);
-#endif
-}
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
-    u32 scope_id)
-{
 	int ret = 0;
 	struct rdma_dev_addr dev_addr;
 	union {
@@ -637,7 +561,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8
 		struct sockaddr_in6 _sockaddr_in6;
 	} gid_addr;
 
-	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid, scope_id);
+	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
 	if (ret)
 		return ret;
 	memset(&dev_addr, 0, sizeof(dev_addr));

Modified: head/sys/ofed/drivers/infiniband/core/cma.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/cma.c	Thu Nov  9 19:15:28 2017	(r325613)
+++ head/sys/ofed/drivers/infiniband/core/cma.c	Thu Nov  9 19:22:43 2017	(r325614)
@@ -51,6 +51,9 @@
 #include <net/tcp.h>
 #include <net/ipv6.h>
 
+#include <netinet6/scope6_var.h>
+#include <netinet6/ip6_var.h>
+
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
 #include <rdma/ib_cache.h>
@@ -710,11 +713,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *i
 	    == RDMA_TRANSPORT_IB &&
 	    rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
 	    == IB_LINK_LAYER_ETHERNET) {
-		u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device,
-		    id_priv->id.port_num);
-
-		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL,
-		    scope_id);
+		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
 		if (ret)
 			goto out;
 	}
@@ -1452,19 +1451,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, str
 		goto err3;
 
 	if (is_iboe && !is_sidr) {
-		u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device,
-		    ib_event->param.req_rcvd.port);
-
 		if (ib_event->param.req_rcvd.primary_path != NULL)
 			rdma_addr_find_smac_by_sgid(
 				&ib_event->param.req_rcvd.primary_path->sgid,
-				psmac, NULL, scope_id);
+				psmac, NULL);
 		else
 			psmac = NULL;
 		if (ib_event->param.req_rcvd.alternate_path != NULL)
 			rdma_addr_find_smac_by_sgid(
 				&ib_event->param.req_rcvd.alternate_path->sgid,
-				palt_smac, NULL, scope_id);
+				palt_smac, NULL);
 		else
 			palt_smac = NULL;
 	}
@@ -2311,8 +2307,12 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct
 		src_addr->sa_family = dst_addr->sa_family;
 #ifdef INET6
 		if (dst_addr->sa_family == AF_INET6) {
-			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
-				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+			struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
+			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
+			src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+			if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) ||
+			    IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr))
+				id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
 		}
 #endif
 	}
@@ -2666,20 +2666,23 @@ out:
 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 			       struct sockaddr *addr)
 {
-#if defined(INET6)
-	struct sockaddr_in6 *sin6;
+#ifdef INET6
+	struct sockaddr_in6 sin6;
 
 	if (addr->sa_family != AF_INET6)
 		return 0;
 
-	sin6 = (struct sockaddr_in6 *) addr;
-	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
-	    !sin6->sin6_scope_id)
-			return -EINVAL;
+	sin6 = *(struct sockaddr_in6 *)addr;
 
-	dev_addr->bound_dev_if = sin6->sin6_scope_id;
+	if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) {
+		/* check if IPv6 scope ID is set */
+		if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0)
+			return -EINVAL;
+		dev_addr->bound_dev_if = sin6.sin6_scope_id;
+	}
 #endif
-	return 0;
+	return (0);
 }
 
 int rdma_listen(struct rdma_cm_id *id, int backlog)

Modified: head/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/uverbs_cmd.c	Thu Nov  9 19:15:28 2017	(r325613)
+++ head/sys/ofed/drivers/infiniband/core/uverbs_cmd.c	Thu Nov  9 19:22:43 2017	(r325614)
@@ -2094,13 +2094,23 @@ static ssize_t __uverbs_modify_qp(struct ib_uverbs_fil
 					attr->smac);
 			attr->vlan_id = rdma_get_vlan_id(&sgid);
 		} else {
+			struct net_device *idev;
+			int if_index;
+
+			if (qp->device->get_netdev != NULL &&
+			    (idev = qp->device->get_netdev(qp->device, port_num)) != NULL)
+				if_index = idev->if_index;
+			else
+				if_index = 0;
+
 			ret = rdma_addr_find_dmac_by_grh(&sgid, dgid,
 							 attr->ah_attr.dmac,
-							 &attr->vlan_id, -1U);
+							 &attr->vlan_id,
+							 &if_index);
 			if (ret)
 				goto out;
 			ret = rdma_addr_find_smac_by_sgid(&sgid, attr->smac,
-							  NULL, -1U);
+							  NULL);
 			if (ret)
 				goto out;
 		}

Modified: head/sys/ofed/drivers/infiniband/core/verbs.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/verbs.c	Thu Nov  9 19:15:28 2017	(r325613)
+++ head/sys/ofed/drivers/infiniband/core/verbs.c	Thu Nov  9 19:22:43 2017	(r325614)
@@ -41,6 +41,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
@@ -207,10 +208,18 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 po
 			memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
 			ah_attr->vlan_id = wc->vlan_id;
 		} else {
-			u32 scope_id = rdma_get_ipv6_scope_id(device, port_num);
+			struct net_device *idev;
+			int if_index;
+
+			if (device->get_netdev != NULL &&
+			    (idev = device->get_netdev(device, port_num)) != NULL)
+				if_index = idev->if_index;
+			else
+				if_index = 0;
+
 			ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
 					 ah_attr->dmac, &ah_attr->vlan_id,
-					 scope_id);
+					 &if_index);
 			if (ret)
 				return ret;
 		}

Modified: head/sys/ofed/include/rdma/ib_addr.h
==============================================================================
--- head/sys/ofed/include/rdma/ib_addr.h	Thu Nov  9 19:15:28 2017	(r325613)
+++ head/sys/ofed/include/rdma/ib_addr.h	Thu Nov  9 19:22:43 2017	(r325614)
@@ -105,10 +105,9 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 	      const unsigned char *dst_dev_addr);
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
-				u32 scope_id);
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
-			       u16 *vlan_id, u32 scope_id);
+			       u16 *vlan_id, int *if_index);
 
 static inline int ip_addr_size(struct sockaddr *addr)
 {
@@ -171,8 +170,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, u
 }
 
 /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
-static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid,
-    uint32_t scope_id)
+static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
 {
 	if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
 		struct sockaddr_in *out_in = (struct sockaddr_in *)out;
@@ -186,14 +184,9 @@ static inline int rdma_gid2ip(struct sockaddr *out, un
 		out_in->sin6_len = sizeof(*out_in);
 		out_in->sin6_family = AF_INET6;
 		memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
-		if (scope_id < 256 &&
-		    IN6_IS_SCOPE_LINKLOCAL(&out_in->sin6_addr))
-			out_in->sin6_scope_id = scope_id;
 	}
 	return 0;
 }
-
-u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num);
 
 /* This func is called only in loopback ip address (127.0.0.1)
  * case in which sgid is not relevant


More information about the svn-src-head mailing list