git: 48f38f47b105 - stable/13 - lltable: Add support for "child" LLEs holding encap for IPv4oIPv6 entries.

Alexander V. Chernikov melifaro at FreeBSD.org
Tue Sep 7 21:13:00 UTC 2021


The branch stable/13 has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=48f38f47b1051695e2ad7021798edf61495b7030

commit 48f38f47b1051695e2ad7021798edf61495b7030
Author:     Alexander V. Chernikov <melifaro at FreeBSD.org>
AuthorDate: 2021-08-21 14:13:32 +0000
Commit:     Alexander V. Chernikov <melifaro at FreeBSD.org>
CommitDate: 2021-09-07 21:02:58 +0000

    lltable: Add support for "child" LLEs holding encap for IPv4oIPv6 entries.
    
    Currently we use pre-calculated headers inside LLE entries as prepend data
     for `if_output` functions. Using these headers allows saving some
     CPU cycles/memory accesses on the fast path.
    
    However, this approach makes adding L2 header for IPv4 traffic with IPv6
     nexthops more complex, as it is not possible to store multiple
     pre-calculated headers inside lle. Additionally, the solution space is
     limited by the fact that PCB caching saves LLEs in addition to the nexthop.
    
    Thus, add support for creating special "child" LLEs for the purpose of holding
     custom family encaps and store mbufs pending resolution. To simplify handling
     of those LLEs, store them in a linked-list inside a "parent" (e.g. normal) LLE.
     Such LLEs are not visible when iterating LLE table. Their lifecycle is bound
     to the "parent" LLE - it is not possible to delete "child" when parent is alive.
     Furthermore, "child" LLEs are static (RTF_STATIC), avoding complex state
     machine used by the standard LLEs.
    
    nd6_lookup() and nd6_resolve() now accepts an additional argument, family,
     allowing to return such child LLEs. This change uses `LLE_SF()` macro which
     packs family and flags in a single int field. This is done to simplify merging
     back to stable/. Once this code lands, most of the cases will be converted to
     use a dedicated `family` parameter.
    
    Differential Revision: https://reviews.freebsd.org/D31379
    
    (cherry picked from commit c541bd368f863bbf5c08dd5c1ecce0166ad47389)
---
 sys/net/if_ethersubr.c                     |   4 +-
 sys/net/if_fwsubr.c                        |   4 +-
 sys/net/if_infiniband.c                    |   3 +-
 sys/net/if_llatbl.c                        |  70 +++++++++++-
 sys/net/if_llatbl.h                        |  12 +-
 sys/netinet/toecore.c                      |   2 +-
 sys/netinet6/icmp6.c                       |   2 +-
 sys/netinet6/in6.c                         |   5 +
 sys/netinet6/nd6.c                         | 176 +++++++++++++++++++++++------
 sys/netinet6/nd6.h                         |   1 +
 sys/netinet6/nd6_nbr.c                     |   6 +-
 sys/ofed/drivers/infiniband/core/ib_addr.c |   5 +-
 12 files changed, 241 insertions(+), 49 deletions(-)

diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 718de9625044..70a75a3f5ad4 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -236,8 +236,8 @@ ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0)
-			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
-			    plle);
+			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst, phdr,
+			    &lleflags, plle);
 		else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c
index 29ca2f713e8e..a6c43d4d05a4 100644
--- a/sys/net/if_fwsubr.c
+++ b/sys/net/if_fwsubr.c
@@ -176,8 +176,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
-			error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
-			    (u_char *) destfw, NULL, NULL);
+			error = nd6_resolve(fc->fc_ifp, LLE_SF(AF_INET6, is_gw),
+			    m, dst, (u_char *) destfw, NULL, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
index 528f20b7c98d..244b2a5ba117 100644
--- a/sys/net/if_infiniband.c
+++ b/sys/net/if_infiniband.c
@@ -253,7 +253,8 @@ infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0) {
-			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle);
+			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst,
+			    phdr, &lleflags, plle);
 		} else {
 			infiniband_ipv6_multicast_map(
 			    &((const struct sockaddr_in6 *)dst)->sin6_addr,
diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c
index c656974c80ee..e4dfc45705a8 100644
--- a/sys/net/if_llatbl.c
+++ b/sys/net/if_llatbl.c
@@ -398,6 +398,26 @@ lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
 	return (error);
 }
 
+/*
+ * Searches for the child entry matching @family inside @lle.
+ * Returns the entry or NULL.
+ */
+struct llentry *
+llentry_lookup_family(struct llentry *lle, int family)
+{
+	struct llentry *child_lle;
+
+	if (lle == NULL)
+		return (NULL);
+
+	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
+		if (child_lle->r_family == family)
+			return (child_lle);
+	}
+
+	return (NULL);
+}
+
 /*
  * Requests feedback from the datapath.
  * First packet using @lle should result in
@@ -407,9 +427,17 @@ lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
 void
 llentry_request_feedback(struct llentry *lle)
 {
+	struct llentry *child_lle;
+
 	LLE_REQ_LOCK(lle);
 	lle->r_skip_req = 1;
 	LLE_REQ_UNLOCK(lle);
+
+	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
+		LLE_REQ_LOCK(child_lle);
+		child_lle->r_skip_req = 1;
+		LLE_REQ_UNLOCK(child_lle);
+	}
 }
 
 /*
@@ -431,8 +459,8 @@ llentry_mark_used(struct llentry *lle)
  * Return 0 if the entry was not used, relevant time_uptime
  *  otherwise.
  */
-time_t
-llentry_get_hittime(struct llentry *lle)
+static time_t
+llentry_get_hittime_raw(struct llentry *lle)
 {
 	time_t lle_hittime = 0;
 
@@ -444,6 +472,23 @@ llentry_get_hittime(struct llentry *lle)
 	return (lle_hittime);
 }
 
+time_t
+llentry_get_hittime(struct llentry *lle)
+{
+	time_t lle_hittime = 0;
+	struct llentry *child_lle;
+
+	lle_hittime = llentry_get_hittime_raw(lle);
+
+	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
+		time_t hittime = llentry_get_hittime_raw(child_lle);
+		if (hittime > lle_hittime)
+			lle_hittime = hittime;
+	}
+
+	return (lle_hittime);
+}
+
 /*
  * Update link-layer header for given @lle after
  * interface lladdr was changed.
@@ -585,7 +630,7 @@ lltable_delete_addr(struct lltable *llt, u_int flags,
 
 	ifp = llt->llt_ifp;
 	IF_AFDATA_WLOCK(ifp);
-	lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
+	lle = lla_lookup(llt, LLE_SF(l3addr->sa_family, LLE_EXCLUSIVE), l3addr);
 
 	if (lle == NULL) {
 		IF_AFDATA_WUNLOCK(ifp);
@@ -700,6 +745,25 @@ lltable_link_entry(struct lltable *llt, struct llentry *lle)
 	return (llt->llt_link_entry(llt, lle));
 }
 
+void
+lltable_link_child_entry(struct llentry *lle, struct llentry *child_lle)
+{
+	child_lle->lle_parent = lle;
+	child_lle->lle_tbl = lle->lle_tbl;
+	child_lle->la_flags |= LLE_LINKED;
+	CK_SLIST_INSERT_HEAD(&lle->lle_children, child_lle, lle_child_next);
+}
+
+void
+lltable_unlink_child_entry(struct llentry *child_lle)
+{
+	struct llentry *lle = child_lle->lle_parent;
+
+	child_lle->la_flags &= ~LLE_LINKED;
+	child_lle->lle_parent = NULL;
+	CK_SLIST_REMOVE(&lle->lle_children, child_lle, llentry, lle_child_next);
+}
+
 int
 lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
 {
diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h
index ffbaa7a946bb..7ad9d59a1a0e 100644
--- a/sys/net/if_llatbl.h
+++ b/sys/net/if_llatbl.h
@@ -58,7 +58,8 @@ struct llentry {
 	} r_l3addr;
 	char			r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
 	uint8_t			r_hdrlen;	/* length for LL header */
-	uint8_t			spare0[3];
+	uint8_t			r_family;	/* Upper layer proto family */
+	uint8_t			spare0[2];
 	uint16_t		r_flags;	/* LLE runtime flags */
 	uint16_t		r_skip_req;	/* feedback from fast path */
 
@@ -78,6 +79,9 @@ struct llentry {
 	time_t			lle_hittime;	/* Time when r_skip_req was unset */
 	int			 lle_refcnt;
 	char			*ll_addr;	/* link-layer address */
+	CK_SLIST_HEAD(llentry_children_head,llentry)	lle_children;	/* child encaps */
+	CK_SLIST_ENTRY(llentry)	lle_child_next;	/* child encaps */
+	struct llentry		*lle_parent;	/* parent for a child */
 
 	CK_LIST_ENTRY(llentry)	lle_chain;	/* chain of deleted items */
 	struct callout		lle_timer;
@@ -104,6 +108,8 @@ struct llentry {
 
 #define LLE_IS_VALID(lle)	(((lle) != NULL) && ((lle) != (void *)-1))
 
+#define	LLE_SF(_fam, _flags)	(((_flags) & 0xFFFF) | ((_fam) << 16))
+
 #define	LLE_ADDREF(lle) do {					\
 	LLE_WLOCK_ASSERT(lle);					\
 	KASSERT((lle)->lle_refcnt >= 0,				\
@@ -195,6 +201,7 @@ MALLOC_DECLARE(M_LLTABLE);
 #define	LLE_REDIRECT	0x0010	/* installed by redirect; has host rtentry */
 #define	LLE_PUB		0x0020	/* publish entry ??? */
 #define	LLE_LINKED	0x0040	/* linked to lookup structure */
+#define	LLE_CHILD	0x0080	/* Child LLE storing different AF encap */
 /* LLE request flags */
 #define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
 #define	LLE_UNLOCKED	0x4000	/* return lle unlocked */
@@ -234,6 +241,8 @@ int lltable_delete_addr(struct lltable *llt, u_int flags,
     const struct sockaddr *l3addr);
 int lltable_link_entry(struct lltable *llt, struct llentry *lle);
 int lltable_unlink_entry(struct lltable *llt, struct llentry *lle);
+void lltable_link_child_entry(struct llentry *parent_lle, struct llentry *child_lle);
+void lltable_unlink_child_entry(struct llentry *child_lle);
 void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa);
 struct ifnet *lltable_get_ifp(const struct lltable *llt);
 int lltable_get_af(const struct lltable *llt);
@@ -267,6 +276,7 @@ llentry_provide_feedback(struct llentry *lle)
 		return;
 	llentry_mark_used(lle);
 }
+struct llentry *llentry_lookup_family(struct llentry *lle, int family);
 
 int		lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
 
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 6e59fa4dd90d..a8f9eb79817d 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -474,7 +474,7 @@ toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
 #endif
 #ifdef INET6
 	case AF_INET6:
-		rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
+		rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, NULL, NULL);
 		break;
 #endif
 	default:
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 6b8f0f7be5bb..f4a5574084fd 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -2546,7 +2546,7 @@ icmp6_redirect_output(struct mbuf *m0, struct nhop_object *nh)
 		struct nd_opt_hdr *nd_opt;
 		char *lladdr;
 
-		ln = nd6_lookup(router_ll6, 0, ifp);
+		ln = nd6_lookup(router_ll6, LLE_SF(AF_INET6,  0), ifp);
 		if (ln == NULL)
 			goto nolladdropt;
 
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index d5b3452c0b06..142a05ded2b6 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -2335,6 +2335,11 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
 	lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
 	if (lle == NULL)
 		return (NULL);
+
+	int family = flags >> 16;
+	if (__predict_false(family != AF_INET6))
+		lle = llentry_lookup_family(lle, family);
+
 	if (flags & LLE_UNLOCKED)
 		return (lle);
 
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index ea64b3a6c14c..6a9e2a4fdd7c 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -139,7 +139,7 @@ static void nd6_free_redirect(const struct llentry *);
 static void nd6_llinfo_timer(void *);
 static void nd6_llinfo_settimer_locked(struct llentry *, long);
 static void clear_llinfo_pqueue(struct llentry *);
-static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
+static int nd6_resolve_slow(struct ifnet *, int, int, struct mbuf *,
     const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
 static int nd6_need_cache(struct ifnet *);
 
@@ -530,6 +530,10 @@ nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
 
 	LLE_WLOCK_ASSERT(ln);
 
+	/* Do not schedule timers for child LLEs. */
+	if (ln->la_flags & LLE_CHILD)
+		return;
+
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
@@ -1375,40 +1379,76 @@ nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 	 * Even if the address matches none of our addresses, it might be
 	 * in the neighbor cache.
 	 */
-	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
+	if ((lle = nd6_lookup(&addr->sin6_addr, LLE_SF(AF_INET6, 0), ifp)) != NULL) {
 		LLE_RUNLOCK(lle);
 		rc = 1;
 	}
 	return (rc);
 }
 
+static __noinline void
+nd6_free_children(struct llentry *lle)
+{
+	struct llentry *child_lle;
+
+	NET_EPOCH_ASSERT();
+	LLE_WLOCK_ASSERT(lle);
+
+	while ((child_lle = CK_SLIST_FIRST(&lle->lle_children)) != NULL) {
+		LLE_WLOCK(child_lle);
+		lltable_unlink_child_entry(child_lle);
+		llentry_free(child_lle);
+	}
+}
+
 /*
  * Tries to update @lle address/prepend data with new @lladdr.
  *
  * Returns true on success.
  * In any case, @lle is returned wlocked.
  */
-bool
-nd6_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, char *lladdr)
+static __noinline bool
+nd6_try_set_entry_addr_locked(struct ifnet *ifp, struct llentry *lle, char *lladdr)
 {
-	u_char linkhdr[LLE_MAX_LINKHDR];
-	size_t linkhdrsize;
-	int lladdr_off;
-
-	LLE_WLOCK_ASSERT(lle);
+	u_char buf[LLE_MAX_LINKHDR];
+	int fam, off;
+	size_t sz;
 
-	linkhdrsize = sizeof(linkhdr);
-	if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
-	    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
+	sz = sizeof(buf);
+	if (lltable_calc_llheader(ifp, AF_INET6, lladdr, buf, &sz, &off) != 0)
 		return (false);
+
+	/* Update data */
+	lltable_set_entry_addr(ifp, lle, buf, sz, off);
+
+	struct llentry *child_lle;
+	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
+		LLE_WLOCK(child_lle);
+		fam = child_lle->r_family;
+		sz = sizeof(buf);
+		if (lltable_calc_llheader(ifp, fam, lladdr, buf, &sz, &off) == 0) {
+			/* success */
+			lltable_set_entry_addr(ifp, child_lle, buf, sz, off);
+			child_lle->ln_state = ND6_LLINFO_REACHABLE;
+		}
+		LLE_WUNLOCK(child_lle);
 	}
 
+	return (true);
+}
+
+bool
+nd6_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, char *lladdr)
+{
+	NET_EPOCH_ASSERT();
+	LLE_WLOCK_ASSERT(lle);
+
 	if (!lltable_acquire_wlock(ifp, lle))
 		return (false);
-	lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
+	bool ret = nd6_try_set_entry_addr_locked(ifp, lle, lladdr);
 	IF_AFDATA_WUNLOCK(ifp);
 
-	return (true);
+	return (ret);
 }
 
 /*
@@ -1432,6 +1472,8 @@ nd6_free(struct llentry **lnp, int gc)
 	LLE_WLOCK_ASSERT(ln);
 	ND6_RLOCK_ASSERT();
 
+	KASSERT((ln->la_flags & LLE_CHILD) == 0, ("child lle"));
+
 	ifp = lltable_get_ifp(ln->lle_tbl);
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
 		dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
@@ -1553,6 +1595,8 @@ nd6_free(struct llentry **lnp, int gc)
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
+	nd6_free_children(ln);
+
 	llentry_free(ln);
 	if (dr != NULL)
 		defrouter_rele(dr);
@@ -1827,7 +1871,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 			return (error);
 
 		NET_EPOCH_ENTER(et);
-		ln = nd6_lookup(&nb_addr, 0, ifp);
+		ln = nd6_lookup(&nb_addr, LLE_SF(AF_INET6, 0), ifp);
 		NET_EPOCH_EXIT(et);
 
 		if (ln == NULL) {
@@ -1977,7 +2021,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 	flags = lladdr ? LLE_EXCLUSIVE : 0;
-	ln = nd6_lookup(from, flags, ifp);
+	ln = nd6_lookup(from, LLE_SF(AF_INET6, flags), ifp);
 	is_newentry = 0;
 	if (ln == NULL) {
 		flags |= LLE_EXCLUSIVE;
@@ -2001,7 +2045,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(ln);
 		/* Prefer any existing lle over newly-created one */
-		ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
+		ln_tmp = nd6_lookup(from, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
 		if (ln_tmp == NULL)
 			lltable_link_entry(LLTABLE6(ifp), ln);
 		IF_AFDATA_WUNLOCK(ifp);
@@ -2086,6 +2130,8 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, ln, chain);
+	if (do_update)
+		nd6_flush_children_holdchain(ifp, ln);
 
 	/*
 	 * When the link-layer address of a router changes, select the
@@ -2227,7 +2273,7 @@ nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
  * - other errors (alloc failure, etc)
  */
 int
-nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+nd6_resolve(struct ifnet *ifp, int gw_flags, struct mbuf *m,
     const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
@@ -2261,8 +2307,9 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
 		}
 	}
 
-	ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
-	    ifp);
+	int family = gw_flags >> 16;
+	int lookup_flags = plle ? LLE_EXCLUSIVE : LLE_UNLOCKED;
+	ln = nd6_lookup(&dst6->sin6_addr, LLE_SF(family, lookup_flags), ifp);
 	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
 		/* Entry found, let's copy lle info */
 		bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
@@ -2278,19 +2325,39 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
 	} else if (plle && ln)
 		LLE_WUNLOCK(ln);
 
-	return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
+	return (nd6_resolve_slow(ifp, family, 0, m, dst6, desten, pflags, plle));
 }
 
 /*
- * Finds or creates a new llentry for @addr.
+ * Finds or creates a new llentry for @addr and @family.
  * Returns wlocked llentry or NULL.
+ *
+ *
+ * Child LLEs.
+ *
+ * Do not have their own state machine (gets marked as static)
+ *  settimer bails out for child LLEs just in case.
+ *
+ * Locking order: parent lle gets locked first, chen goes the child.
  */
 static __noinline struct llentry *
-nd6_get_llentry(struct ifnet *ifp, const struct in6_addr *addr)
+nd6_get_llentry(struct ifnet *ifp, const struct in6_addr *addr, int family)
 {
+	struct llentry *child_lle = NULL;
 	struct llentry *lle, *lle_tmp;
 
 	lle = nd6_alloc(addr, 0, ifp);
+	if (lle != NULL && family != AF_INET6) {
+		child_lle = nd6_alloc(addr, 0, ifp);
+		if (child_lle == NULL) {
+			lltable_free_entry(LLTABLE6(ifp), lle);
+			return (NULL);
+		}
+		child_lle->r_family = family;
+		child_lle->la_flags |= LLE_CHILD | LLE_STATIC;
+		child_lle->ln_state = ND6_LLINFO_INCOMPLETE;
+	}
+
 	if (lle == NULL) {
 		char ip6buf[INET6_ADDRSTRLEN];
 		log(LOG_DEBUG,
@@ -2303,15 +2370,30 @@ nd6_get_llentry(struct ifnet *ifp, const struct in6_addr *addr)
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(lle);
 	/* Prefer any existing entry over newly-created one */
-	lle_tmp = nd6_lookup(addr, LLE_EXCLUSIVE, ifp);
+	lle_tmp = nd6_lookup(addr, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
 	if (lle_tmp == NULL)
 		lltable_link_entry(LLTABLE6(ifp), lle);
-	IF_AFDATA_WUNLOCK(ifp);
-	if (lle_tmp != NULL) {
+	else {
 		lltable_free_entry(LLTABLE6(ifp), lle);
-		return (lle_tmp);
-	} else
-		return (lle);
+		lle = lle_tmp;
+	}
+	if (child_lle != NULL) {
+		/* Check if child lle for the same family exists */
+		lle_tmp = llentry_lookup_family(lle, child_lle->r_family);
+		LLE_WLOCK(child_lle);
+		if (lle_tmp == NULL) {
+			/* Attach */
+			lltable_link_child_entry(lle, child_lle);
+		} else {
+			/* child lle already exists, free newly-created one */
+			lltable_free_entry(LLTABLE6(ifp), child_lle);
+			child_lle = lle_tmp;
+		}
+		LLE_WUNLOCK(lle);
+		lle = child_lle;
+	}
+	IF_AFDATA_WUNLOCK(ifp);
+	return (lle);
 }
 
 /*
@@ -2326,7 +2408,7 @@ nd6_get_llentry(struct ifnet *ifp, const struct in6_addr *addr)
  * Set noinline to be dtrace-friendly
  */
 static __noinline int
-nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
+nd6_resolve_slow(struct ifnet *ifp, int family, int flags, struct mbuf *m,
     const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
@@ -2343,14 +2425,14 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
-	lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
+	lle = nd6_lookup(&dst->sin6_addr, LLE_SF(family, LLE_EXCLUSIVE), ifp);
 	if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 		/*
 		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 		 * the condition below is not very efficient.  But we believe
 		 * it is tolerable, because this should be a rare case.
 		 */
-		lle = nd6_get_llentry(ifp, &dst->sin6_addr);
+		lle = nd6_get_llentry(ifp, &dst->sin6_addr, family);
 	}
 
 	if (lle == NULL) {
@@ -2367,7 +2449,7 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
-	if (lle->ln_state == ND6_LLINFO_STALE)
+	if ((!(lle->la_flags & LLE_CHILD)) && (lle->ln_state == ND6_LLINFO_STALE))
 		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
 
 	/*
@@ -2432,6 +2514,14 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
 	 */
 	psrc = NULL;
 	send_ns = 0;
+
+	/* If we have child lle, switch to the parent to send NS */
+	if (lle->la_flags & LLE_CHILD) {
+		struct llentry *lle_parent = lle->lle_parent;
+		LLE_WUNLOCK(lle);
+		lle = lle_parent;
+		LLE_WLOCK(lle);
+	}
 	if (lle->la_asked == 0) {
 		lle->la_asked++;
 		send_ns = 1;
@@ -2463,7 +2553,7 @@ nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
 	int error;
 
 	flags |= LLE_ADDRONLY;
-	error = nd6_resolve_slow(ifp, flags, NULL,
+	error = nd6_resolve_slow(ifp, AF_INET6, flags, NULL,
 	    (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
 	return (error);
 }
@@ -2499,6 +2589,22 @@ nd6_flush_holdchain(struct ifnet *ifp, struct llentry *lle, struct mbuf *chain)
 	return (error);
 }
 
+__noinline void
+nd6_flush_children_holdchain(struct ifnet *ifp, struct llentry *lle)
+{
+	struct llentry *child_lle;
+	struct mbuf *chain;
+
+	NET_EPOCH_ASSERT();
+
+	CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
+		LLE_WLOCK(child_lle);
+		chain = nd6_grab_holdchain(child_lle);
+		LLE_WUNLOCK(child_lle);
+		nd6_flush_holdchain(ifp, child_lle, chain);
+	}
+}
+
 static int
 nd6_need_cache(struct ifnet *ifp)
 {
@@ -2552,7 +2658,7 @@ nd6_add_ifa_lle(struct in6_ifaddr *ia)
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Unlink any entry if exists */
-	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
+	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_SF(AF_INET6, LLE_EXCLUSIVE), dst);
 	if (ln_tmp != NULL)
 		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
 	lltable_link_entry(LLTABLE6(ifp), ln);
diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h
index fe0f2b22cc48..3f9f8219b018 100644
--- a/sys/netinet6/nd6.h
+++ b/sys/netinet6/nd6.h
@@ -379,6 +379,7 @@ void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
 bool nd6_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, char *lladdr);
 struct mbuf *nd6_grab_holdchain(struct llentry *);
 int nd6_flush_holdchain(struct ifnet *, struct llentry *, struct mbuf *);
+void nd6_flush_children_holdchain(struct ifnet *, struct llentry *);
 int nd6_add_ifa_lle(struct in6_ifaddr *);
 void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
 int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 974c454e93a5..30d73f9d71a9 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -630,6 +630,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
 	size_t linkhdrsize;
 	int flags, is_override, is_router, is_solicited;
 	int lladdr_off, lladdrlen, checklink;
+	bool flush_holdchain = false;
 
 	NET_EPOCH_ASSERT();
 
@@ -747,7 +748,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
 	 * If no neighbor cache entry is found, NA SHOULD silently be
 	 * discarded.
 	 */
-	ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
+	ln = nd6_lookup(&taddr6, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
 	if (ln == NULL) {
 		goto freeit;
 	}
@@ -773,6 +774,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
 		if (!nd6_try_set_entry_addr(ifp, ln, lladdr))
 			goto freeit;
 
+		flush_holdchain = true;
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 		if (is_solicited)
 			nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
@@ -899,6 +901,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, ln, chain);
+	if (flush_holdchain)
+		nd6_flush_children_holdchain(ifp, ln);
 
 	if (checklink)
 		pfxlist_onlink_check();
diff --git a/sys/ofed/drivers/infiniband/core/ib_addr.c b/sys/ofed/drivers/infiniband/core/ib_addr.c
index a8e951721b8d..297469bd4d87 100644
--- a/sys/ofed/drivers/infiniband/core/ib_addr.c
+++ b/sys/ofed/drivers/infiniband/core/ib_addr.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/module.h>
+#include <net/if_llatbl.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/netevent.h>
@@ -584,8 +585,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	} else {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		memset(edst, 0, MAX_ADDR_LEN);
-		error = nd6_resolve(ifp, is_gw, NULL, is_gw ?
-		    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
+		error = nd6_resolve(ifp, LLE_SF(AF_INET6, is_gw), NULL,
+		    is_gw ? &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
 		    edst, NULL, NULL);
 		if (error != 0)
 			goto error_put_ifp;


More information about the dev-commits-src-branches mailing list