svn commit: r316714 - in stable/11/sys: netinet netinet6

Mark Johnston markj at FreeBSD.org
Tue Apr 11 18:48:19 UTC 2017


Author: markj
Date: Tue Apr 11 18:48:17 2017
New Revision: 316714
URL: https://svnweb.freebsd.org/changeset/base/316714

Log:
  MFC r306829, r310286, r311695:
  Lock the ND prefix list and add refcounting for prefixes.

Modified:
  stable/11/sys/netinet/sctp_output.c
  stable/11/sys/netinet6/in6.c
  stable/11/sys/netinet6/in6_ifattach.c
  stable/11/sys/netinet6/nd6.c
  stable/11/sys/netinet6/nd6.h
  stable/11/sys/netinet6/nd6_rtr.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/netinet/sctp_output.c
==============================================================================
--- stable/11/sys/netinet/sctp_output.c	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet/sctp_output.c	Tue Apr 11 18:48:17 2017	(r316714)
@@ -13775,6 +13775,7 @@ sctp_v6src_match_nexthop(struct sockaddr
 		return (0);
 
 	/* get prefix entry of address */
+	ND6_RLOCK();
 	LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) {
 		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
 			continue;
@@ -13784,6 +13785,7 @@ sctp_v6src_match_nexthop(struct sockaddr
 	}
 	/* no prefix entry in the prefix list */
 	if (pfx == NULL) {
+		ND6_RUNLOCK();
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 		return (0);
@@ -13805,9 +13807,11 @@ sctp_v6src_match_nexthop(struct sockaddr
 		if (sctp_cmpaddr((struct sockaddr *)&gw6, ro->ro_rt->rt_gateway)) {
 			ND6_RUNLOCK();
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
+			ND6_RUNLOCK();
 			return (1);
 		}
 	}
+	ND6_RUNLOCK();
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
 	return (0);
 }

Modified: stable/11/sys/netinet6/in6.c
==============================================================================
--- stable/11/sys/netinet6/in6.c	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet6/in6.c	Tue Apr 11 18:48:17 2017	(r316714)
@@ -647,6 +647,7 @@ in6_control(struct socket *so, u_long cm
 				}
 			}
 		}
+		nd6_prefix_rele(pr);
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
@@ -694,8 +695,12 @@ aifaddr_out:
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
-		if (pr && pr->ndpr_addrcnt == 0)
-			prelist_remove(pr);
+		if (pr != NULL && pr->ndpr_addrcnt == 0) {
+			ND6_WLOCK();
+			nd6_prefix_unlink(pr, NULL);
+			ND6_WUNLOCK();
+			nd6_prefix_del(pr);
+		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}

Modified: stable/11/sys/netinet6/in6_ifattach.c
==============================================================================
--- stable/11/sys/netinet6/in6_ifattach.c	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet6/in6_ifattach.c	Tue Apr 11 18:48:17 2017	(r316714)
@@ -453,6 +453,7 @@ in6_ifattach_linklocal(struct ifnet *ifp
 	struct in6_ifaddr *ia;
 	struct in6_aliasreq ifra;
 	struct nd_prefixctl pr0;
+	struct nd_prefix *pr;
 	int error;
 
 	/*
@@ -535,10 +536,11 @@ in6_ifattach_linklocal(struct ifnet *ifp
 	 * address, and then reconfigure another one, the prefix is still
 	 * valid with referring to the old link-local address.
 	 */
-	if (nd6_prefix_lookup(&pr0) == NULL) {
+	if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 		if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0)
 			return (error);
-	}
+	} else
+		nd6_prefix_rele(pr);
 
 	return 0;
 }
@@ -778,15 +780,6 @@ _in6_ifdetach(struct ifnet *ifp, int pur
 		return;
 
 	/*
-	 * Remove neighbor management table.
-	 * Enabling the nd6_purge will panic on vmove for interfaces on VNET
-	 * teardown as the IPv6 layer is cleaned up already and the locks
-	 * are destroyed.
-	 */
-	if (purgeulp)
-		nd6_purge(ifp);
-
-	/*
 	 * nuke any of IPv6 addresses we have
 	 * XXX: all addresses should be already removed
 	 */
@@ -804,12 +797,10 @@ _in6_ifdetach(struct ifnet *ifp, int pur
 	in6_purgemaddrs(ifp);
 
 	/*
-	 * remove neighbor management table.  we call it twice just to make
-	 * sure we nuke everything.  maybe we need just one call.
-	 * XXX: since the first call did not release addresses, some prefixes
-	 * might remain.  We should call nd6_purge() again to release the
-	 * prefixes after removing all addresses above.
-	 * (Or can we just delay calling nd6_purge until at this point?)
+	 * Remove neighbor management table.
+	 * Enabling the nd6_purge will panic on vmove for interfaces on VNET
+	 * teardown as the IPv6 layer is cleaned up already and the locks
+	 * are destroyed.
 	 */
 	if (purgeulp)
 		nd6_purge(ifp);

Modified: stable/11/sys/netinet6/nd6.c
==============================================================================
--- stable/11/sys/netinet6/nd6.c	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet6/nd6.c	Tue Apr 11 18:48:17 2017	(r316714)
@@ -38,8 +38,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
@@ -47,7 +49,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
-#include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/sdt.h>
@@ -118,6 +119,8 @@ static eventhandler_tag lle_event_eh, if
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 VNET_DEFINE(struct rwlock, nd6_lock);
+VNET_DEFINE(uint64_t, nd6_list_genid);
+VNET_DEFINE(struct mtx, nd6_onlink_mtx);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
@@ -209,11 +212,10 @@ void
 nd6_init(void)
 {
 
-	rw_init(&V_nd6_lock, "nd6");
+	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
+	rw_init(&V_nd6_lock, "nd6 list");
 
 	LIST_INIT(&V_nd_prefix);
-
-	/* initialization of the default router list */
 	TAILQ_INIT(&V_nd_defrouter);
 
 	/* Start timers. */
@@ -245,6 +247,7 @@ nd6_destroy()
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
 	}
 	rw_destroy(&V_nd6_lock);
+	mtx_destroy(&V_nd6_onlink_mtx);
 }
 #endif
 
@@ -903,13 +906,15 @@ nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_drhead drq;
+	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 	struct in6_ifaddr *ia6, *nia6;
+	uint64_t genid;
 
 	TAILQ_INIT(&drq);
+	LIST_INIT(&prl);
 
-	/* expire default router list */
 	ND6_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
 		if (dr->expire && dr->expire < time_uptime)
@@ -1016,22 +1021,46 @@ nd6_timer(void *arg)
 		}
 	}
 
-	/* expire prefix list */
+	ND6_WLOCK();
+restart:
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
-		 * check prefix lifetime.
-		 * since pltime is just for autoconf, pltime processing for
-		 * prefix is not necessary.
+		 * Expire prefixes. Since the pltime is only used for
+		 * autoconfigured addresses, pltime processing for prefixes is
+		 * not necessary.
+		 *
+		 * Only unlink after all derived addresses have expired. This
+		 * may not occur until two hours after the prefix has expired
+		 * per RFC 4862. If the prefix expires before its derived
+		 * addresses, mark it off-link. This will be done automatically
+		 * after unlinking if no address references remain.
 		 */
-		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
-		    time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
+		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
+			continue;
 
-			/*
-			 * address expiration and prefix expiration are
-			 * separate.  NEVER perform in6_purgeaddr here.
-			 */
-			prelist_remove(pr);
+		if (pr->ndpr_addrcnt == 0) {
+			nd6_prefix_unlink(pr, &prl);
+			continue;
 		}
+		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+			genid = V_nd6_list_genid;
+			nd6_prefix_ref(pr);
+			ND6_WUNLOCK();
+			ND6_ONLINK_LOCK();
+			(void)nd6_prefix_offlink(pr);
+			ND6_ONLINK_UNLOCK();
+			ND6_WLOCK();
+			nd6_prefix_rele(pr);
+			if (genid != V_nd6_list_genid)
+				goto restart;
+		}
+	}
+	ND6_WUNLOCK();
+
+	while ((pr = LIST_FIRST(&prl)) != NULL) {
+		LIST_REMOVE(pr, ndpr_entry);
+		nd6_prefix_del(pr);
 	}
 
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
@@ -1118,10 +1147,12 @@ void
 nd6_purge(struct ifnet *ifp)
 {
 	struct nd_drhead drq;
+	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
 	TAILQ_INIT(&drq);
+	LIST_INIT(&prl);
 
 	/*
 	 * Nuke default router list entries toward ifp.
@@ -1136,33 +1167,31 @@ nd6_purge(struct ifnet *ifp)
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
-
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (!dr->installed)
 			continue;
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
+
+	/*
+	 * Remove prefixes on ifp. We should have already removed addresses on
+	 * this interface, so no addresses should be referencing these prefixes.
+	 */
+	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
+		if (pr->ndpr_ifp == ifp)
+			nd6_prefix_unlink(pr, &prl);
+	}
 	ND6_WUNLOCK();
 
+	/* Delete the unlinked router and prefix objects. */
 	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
 		TAILQ_REMOVE(&drq, dr, dr_entry);
 		defrouter_del(dr);
 	}
-
-	/* Nuke prefix list entries toward ifp */
-	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
-		if (pr->ndpr_ifp == ifp) {
-			/*
-			 * Because if_detach() does *not* release prefixes
-			 * while purging addresses the reference count will
-			 * still be above zero. We therefore reset it to
-			 * make sure that the prefix really gets purged.
-			 */
-			pr->ndpr_addrcnt = 0;
-
-			prelist_remove(pr);
-		}
+	while ((pr = LIST_FIRST(&prl)) != NULL) {
+		LIST_REMOVE(pr, ndpr_entry);
+		nd6_prefix_del(pr);
 	}
 
 	/* cancel default outgoing interface setting */
@@ -1228,7 +1257,8 @@ nd6_is_new_addr_neighbor(const struct so
 	struct rt_addrinfo info;
 	struct sockaddr_in6 rt_key;
 	const struct sockaddr *dst6;
-	int fibnum;
+	uint64_t genid;
+	int error, fibnum;
 
 	/*
 	 * A link-local address is always a neighbor.
@@ -1266,19 +1296,29 @@ nd6_is_new_addr_neighbor(const struct so
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
+	ND6_RLOCK();
+restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
 
-		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
-
+		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			/* Always use the default FIB here. */
 			dst6 = (const struct sockaddr *)&pr->ndpr_prefix;
 
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+
 			/* Restore length field before retrying lookup */
 			rt_key.sin6_len = sizeof(rt_key);
-			if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0)
+			error = rib_lookup_info(fibnum, dst6, 0, 0, &info);
+
+			ND6_RLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
+			if (error != 0)
 				continue;
+
 			/*
 			 * This is the case where multiple interfaces
 			 * have the same prefix, but only one is installed 
@@ -1290,14 +1330,17 @@ nd6_is_new_addr_neighbor(const struct so
 			 * differ.
 			 */
 			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
-			       &rt_key.sin6_addr))
+			    &rt_key.sin6_addr))
 				continue;
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
-		    &addr->sin6_addr, &pr->ndpr_mask))
+		    &addr->sin6_addr, &pr->ndpr_mask)) {
+			ND6_RUNLOCK();
 			return (1);
+		}
 	}
+	ND6_RUNLOCK();
 
 	/*
 	 * If the address is assigned on the node of the other side of
@@ -1728,15 +1771,22 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
 	case SIOCSPFXFLUSH_IN6:
 	{
 		/* flush all the prefix advertised by routers */
+		struct in6_ifaddr *ia, *ia_next;
 		struct nd_prefix *pr, *next;
+		struct nd_prhead prl;
 
-		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
-			struct in6_ifaddr *ia, *ia_next;
+		LIST_INIT(&prl);
 
+		ND6_WLOCK();
+		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
+			nd6_prefix_unlink(pr, &prl);
+		}
+		ND6_WUNLOCK();
 
-			/* do we really have to remove addresses as well? */
+		while ((pr = LIST_FIRST(&prl)) != NULL) {
+			LIST_REMOVE(pr, ndpr_entry);
 			/* XXXRW: in6_ifaddrhead locking. */
 			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 			    ia_next) {
@@ -1746,7 +1796,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
-			prelist_remove(pr);
+			nd6_prefix_del(pr);
 		}
 		break;
 	}
@@ -2690,9 +2740,10 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
 			if (error != 0)
-				break;
+				goto out;
 		}
 	}
+out:
 	ND6_RUNLOCK();
 	return (error);
 }

Modified: stable/11/sys/netinet6/nd6.h
==============================================================================
--- stable/11/sys/netinet6/nd6.h	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet6/nd6.h	Tue Apr 11 18:48:17 2017	(r316714)
@@ -256,7 +256,7 @@ struct nd_prefixctl {
 	struct prf_ra ndpr_flags;
 };
 
-
+LIST_HEAD(nd_prhead, nd_prefix);
 struct nd_prefix {
 	struct ifnet *ndpr_ifp;
 	LIST_ENTRY(nd_prefix) ndpr_entry;
@@ -276,6 +276,7 @@ struct nd_prefix {
 	LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
 	u_char	ndpr_plen;
 	int	ndpr_addrcnt;	/* count of derived addresses */
+	volatile u_int ndpr_refcnt;
 };
 
 #define ndpr_raf		ndpr_flags
@@ -314,8 +315,6 @@ struct nd_pfxrouter {
 	struct nd_defrouter *router;
 };
 
-LIST_HEAD(nd_prhead, nd_prefix);
-
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_IP6NDP);
 #endif
@@ -346,17 +345,30 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861)
 
 /* Lock for the prefix and default router lists. */
 VNET_DECLARE(struct rwlock, nd6_lock);
+VNET_DECLARE(uint64_t, nd6_list_genid);
 #define	V_nd6_lock			VNET(nd6_lock)
+#define	V_nd6_list_genid		VNET(nd6_list_genid)
 
 #define	ND6_RLOCK()			rw_rlock(&V_nd6_lock)
 #define	ND6_RUNLOCK()			rw_runlock(&V_nd6_lock)
 #define	ND6_WLOCK()			rw_wlock(&V_nd6_lock)
 #define	ND6_WUNLOCK()			rw_wunlock(&V_nd6_lock)
+#define	ND6_TRY_UPGRADE()		rw_try_upgrade(&V_nd6_lock)
 #define	ND6_WLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_WLOCKED)
 #define	ND6_RLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_RLOCKED)
 #define	ND6_LOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_LOCKED)
 #define	ND6_UNLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_UNLOCKED)
 
+/* Mutex for prefix onlink/offlink transitions. */
+VNET_DECLARE(struct mtx, nd6_onlink_mtx);
+#define	V_nd6_onlink_mtx		VNET(nd6_onlink_mtx)
+
+#define	ND6_ONLINK_LOCK()		mtx_lock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_TRYLOCK()		mtx_trylock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_UNLOCK()		mtx_unlock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_LOCK_ASSERT()	mtx_assert(&V_nd6_onlink_mtx, MA_OWNED)
+#define	ND6_ONLINK_UNLOCK_ASSERT()	mtx_assert(&V_nd6_onlink_mtx, MA_NOTOWNED)
+
 #define nd6log(x)	do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
 
 /* nd6_rtr.c */
@@ -463,9 +475,14 @@ void defrouter_rele(struct nd_defrouter 
 bool defrouter_remove(struct in6_addr *, struct ifnet *);
 void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
 void defrouter_del(struct nd_defrouter *);
-void prelist_remove(struct nd_prefix *);
 int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
-	struct nd_prefix **);
+    struct nd_prefix **);
+void nd6_prefix_unlink(struct nd_prefix *, struct nd_prhead *);
+void nd6_prefix_del(struct nd_prefix *);
+void nd6_prefix_ref(struct nd_prefix *);
+void nd6_prefix_rele(struct nd_prefix *);
+int nd6_prefix_onlink(struct nd_prefix *);
+int nd6_prefix_offlink(struct nd_prefix *);
 void pfxlist_onlink_check(void);
 struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
 struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);

Modified: stable/11/sys/netinet6/nd6_rtr.c
==============================================================================
--- stable/11/sys/netinet6/nd6_rtr.c	Tue Apr 11 17:36:19 2017	(r316713)
+++ stable/11/sys/netinet6/nd6_rtr.c	Tue Apr 11 18:48:17 2017	(r316714)
@@ -87,9 +87,6 @@ static int in6_init_prefix_ltimes(struct
 static void in6_init_address_ltimes(struct nd_prefix *,
     struct in6_addrlifetime *);
 
-static int nd6_prefix_onlink(struct nd_prefix *);
-static int nd6_prefix_offlink(struct nd_prefix *);
-
 static int rt6_deleteroute(const struct rtentry *, void *);
 
 VNET_DECLARE(int, nd6_recalc_reachtm_interval);
@@ -661,6 +658,7 @@ defrouter_unlink(struct nd_defrouter *dr
 
 	ND6_WLOCK_ASSERT();
 	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+	V_nd6_list_genid++;
 	if (drq != NULL)
 		TAILQ_INSERT_TAIL(drq, dr, dr_entry);
 }
@@ -670,6 +668,7 @@ defrouter_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
+	struct nd_pfxrouter *pfxrtr;
 
 	ND6_UNLOCK_ASSERT();
 
@@ -688,11 +687,13 @@ defrouter_del(struct nd_defrouter *dr)
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
+	ND6_WLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
-		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
+	ND6_WUNLOCK();
+
 	pfxlist_onlink_check();
 
 	/*
@@ -852,14 +853,18 @@ static struct nd_defrouter *
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
+	uint64_t genid;
 	int oldpref;
+	bool writelocked;
 
 	if (new->rtlifetime == 0) {
 		defrouter_remove(&new->rtaddr, new->ifp);
 		return (NULL);
 	}
 
-	ND6_WLOCK();
+	ND6_RLOCK();
+	writelocked = false;
+restart:
 	dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
 	if (dr != NULL) {
 		oldpref = rtpref(dr);
@@ -875,10 +880,32 @@ defrtrlist_update(struct nd_defrouter *n
 		 * router is still installed in the kernel.
 		 */
 		if (dr->installed && rtpref(new) == oldpref) {
-			ND6_WUNLOCK();
+			if (writelocked)
+				ND6_WUNLOCK();
+			else
+				ND6_RUNLOCK();
 			return (dr);
 		}
+	}
 
+	/*
+	 * The router needs to be reinserted into the default router
+	 * list, so upgrade to a write lock. If that fails and the list
+	 * has potentially changed while the lock was dropped, we'll
+	 * redo the lookup with the write lock held.
+	 */
+	if (!writelocked) {
+		writelocked = true;
+		if (!ND6_TRY_UPGRADE()) {
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+			ND6_WLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
+		}
+	}
+
+	if (dr != NULL) {
 		/*
 		 * The preferred router may have changed, so relocate this
 		 * router.
@@ -912,6 +939,7 @@ defrtrlist_update(struct nd_defrouter *n
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+	V_nd6_list_genid++;
 	ND6_WUNLOCK();
 
 	defrouter_select();
@@ -924,11 +952,12 @@ pfxrtr_lookup(struct nd_prefix *pr, stru
 {
 	struct nd_pfxrouter *search;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
 		if (search->router == dr)
 			break;
 	}
-
 	return (search);
 }
 
@@ -936,55 +965,110 @@ static void
 pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
+	bool update;
+
+	ND6_UNLOCK_ASSERT();
+
+	ND6_RLOCK();
+	if (pfxrtr_lookup(pr, dr) != NULL) {
+		ND6_RUNLOCK();
+		return;
+	}
+	ND6_RUNLOCK();
 
 	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
 		return;
-	new->router = dr;
 	defrouter_ref(dr);
+	new->router = dr;
 
-	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
+	ND6_WLOCK();
+	if (pfxrtr_lookup(pr, dr) == NULL) {
+		LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
+		update = true;
+	} else {
+		/* We lost a race to add the reference. */
+		defrouter_rele(dr);
+		free(new, M_IP6NDP);
+		update = false;
+	}
+	ND6_WUNLOCK();
 
-	pfxlist_onlink_check();
+	if (update)
+		pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 
+	ND6_WLOCK_ASSERT();
+
 	LIST_REMOVE(pfr, pfr_entry);
 	defrouter_rele(pfr->router);
 	free(pfr, M_IP6NDP);
 }
 
-struct nd_prefix *
-nd6_prefix_lookup(struct nd_prefixctl *key)
+static struct nd_prefix *
+nd6_prefix_lookup_locked(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
 		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
+			nd6_prefix_ref(search);
 			break;
 		}
 	}
+	return (search);
+}
 
+struct nd_prefix *
+nd6_prefix_lookup(struct nd_prefixctl *key)
+{
+	struct nd_prefix *search;
+
+	ND6_RLOCK();
+	search = nd6_prefix_lookup_locked(key);
+	ND6_RUNLOCK();
 	return (search);
 }
 
+void
+nd6_prefix_ref(struct nd_prefix *pr)
+{
+
+	refcount_acquire(&pr->ndpr_refcnt);
+}
+
+void
+nd6_prefix_rele(struct nd_prefix *pr)
+{
+
+	if (refcount_release(&pr->ndpr_refcnt)) {
+		KASSERT(LIST_EMPTY(&pr->ndpr_advrtrs),
+		    ("prefix %p has advertising routers", pr));
+		free(pr, M_IP6NDP);
+	}
+}
+
 int
 nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
     struct nd_prefix **newp)
 {
-	struct nd_prefix *new = NULL;
-	int error = 0;
+	struct nd_prefix *new;
 	char ip6buf[INET6_ADDRSTRLEN];
+	int error;
 
 	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
 		return (ENOMEM);
+	refcount_init(&new->ndpr_refcnt, newp != NULL ? 2 : 1);
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
@@ -1003,20 +1087,22 @@ nd6_prelist_add(struct nd_prefixctl *pr,
 	/* make prefix in the canonical form */
 	IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
 
-	/* link ndpr_entry to nd_prefix list */
+	ND6_WLOCK();
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
+	V_nd6_list_genid++;
+	ND6_WUNLOCK();
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
-		int e;
-
-		if ((e = nd6_prefix_onlink(new)) != 0) {
+		ND6_ONLINK_LOCK();
+		if ((error = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
-			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+			    pr->ndpr_plen, if_name(pr->ndpr_ifp), error));
 			/* proceed anyway. XXX: is it correct? */
 		}
+		ND6_ONLINK_UNLOCK();
 	}
 
 	if (dr != NULL)
@@ -1026,51 +1112,67 @@ nd6_prelist_add(struct nd_prefixctl *pr,
 	return (0);
 }
 
+/*
+ * Remove a prefix from the prefix list and optionally stash it in a
+ * caller-provided list.
+ *
+ * The ND6 lock must be held.
+ */
 void
-prelist_remove(struct nd_prefix *pr)
+nd6_prefix_unlink(struct nd_prefix *pr, struct nd_prhead *list)
+{
+
+	ND6_WLOCK_ASSERT();
+
+	LIST_REMOVE(pr, ndpr_entry);
+	V_nd6_list_genid++;
+	if (list != NULL)
+		LIST_INSERT_HEAD(list, pr, ndpr_entry);
+}
+
+/*
+ * Free an unlinked prefix, first marking it off-link if necessary.
+ */
+void
+nd6_prefix_del(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e;
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	/* make sure to invalidate the prefix until it is really freed. */
-	pr->ndpr_vltime = 0;
-	pr->ndpr_pltime = 0;
+	KASSERT(pr->ndpr_addrcnt == 0,
+	    ("prefix %p has referencing addresses", pr));
+	ND6_UNLOCK_ASSERT();
 
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
 	 * when executing "ndp -p".
 	 */
-
-	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
-	    (e = nd6_prefix_offlink(pr)) != 0) {
-		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
-		    "on %s, errno=%d\n",
-		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
-		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
-		/* what should we do? */
+	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+		ND6_ONLINK_LOCK();
+		if ((e = nd6_prefix_offlink(pr)) != 0) {
+			nd6log((LOG_ERR,
+			    "nd6_prefix_del: failed to make %s/%d offlink "
+			    "on %s, errno=%d\n",
+			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+			/* what should we do? */
+		}
+		ND6_ONLINK_UNLOCK();
 	}
 
-	if (pr->ndpr_addrcnt > 0)
-		return;		/* notice here? */
-
-	/* unlink ndpr_entry from nd_prefix list */
-	LIST_REMOVE(pr, ndpr_entry);
-
-	/* free list of routers that advertised the prefix */
-	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
+	/* Release references to routers that have advertised this prefix. */
+	ND6_WLOCK();
+	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next)
 		pfxrtr_del(pfr);
-	}
-	free(pr, M_IP6NDP);
+	ND6_WUNLOCK();
+
+	nd6_prefix_rele(pr);
 
 	pfxlist_onlink_check();
 }
 
-/*
- * dr - may be NULL
- */
-
 static int
 prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
     struct mbuf *m, int mcast)
@@ -1120,21 +1222,22 @@ prelist_update(struct nd_prefixctl *new,
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
-			int e;
-
-			if ((e = nd6_prefix_onlink(pr)) != 0) {
+			ND6_ONLINK_LOCK();
+			if ((error = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf,
-					    &pr->ndpr_prefix.sin6_addr),
-				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+				        &pr->ndpr_prefix.sin6_addr),
+				    pr->ndpr_plen, if_name(pr->ndpr_ifp),
+				    error));
 				/* proceed anyway. XXX: is it correct? */
 			}
+			ND6_ONLINK_UNLOCK();
 		}
 
-		if (dr && pfxrtr_lookup(pr, dr) == NULL)
+		if (dr != NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		if (new->ndpr_vltime == 0)
@@ -1393,8 +1496,10 @@ prelist_update(struct nd_prefixctl *new,
 		}
 	}
 
- end:
-	return error;
+end:
+	if (pr != NULL)
+		nd6_prefix_rele(pr);
+	return (error);
 }
 
 /*
@@ -1409,6 +1514,8 @@ find_pfxlist_reachable_router(struct nd_
 	struct llentry *ln;
 	int canreach;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
 		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
 		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
@@ -1444,6 +1551,11 @@ pfxlist_onlink_check(void)
 	struct nd_defrouter *dr;
 	struct nd_pfxrouter *pfxrtr = NULL;
 	struct rm_priotracker in6_ifa_tracker;
+	uint64_t genid;
+	uint32_t flags;
+
+	ND6_ONLINK_LOCK();
+	ND6_RLOCK();
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
@@ -1459,7 +1571,6 @@ pfxlist_onlink_check(void)
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
-		ND6_RLOCK();
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
@@ -1470,7 +1581,6 @@ pfxlist_onlink_check(void)
 			if (pfxrtr != NULL)
 				break;
 		}
-		ND6_RUNLOCK();
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
@@ -1515,6 +1625,7 @@ pfxlist_onlink_check(void)
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
+restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		char ip6buf[INET6_ADDRSTRLEN];
 		int e;
@@ -1524,21 +1635,20 @@ pfxlist_onlink_check(void)
 		    pr->ndpr_raf_auto == 0)
 			continue;
 
-		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
-		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
-			if ((e = nd6_prefix_offlink(pr)) != 0) {
+		flags = pr->ndpr_stateflags & (NDPRF_DETACHED | NDPRF_ONLINK);
+		if (flags == 0 || flags == (NDPRF_DETACHED | NDPRF_ONLINK)) {
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+			if ((flags & NDPRF_ONLINK) != 0 &&
+			    (e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
-			}
-		}
-		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
-		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
-		    pr->ndpr_raf_onlink) {
-			if ((e = nd6_prefix_onlink(pr)) != 0) {
+			} else if ((flags & NDPRF_ONLINK) == 0 &&
+			    (e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
@@ -1546,6 +1656,9 @@ pfxlist_onlink_check(void)
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
+			ND6_RLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
 		}
 	}
 
@@ -1606,6 +1719,8 @@ pfxlist_onlink_check(void)
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+	ND6_RUNLOCK();
+	ND6_ONLINK_UNLOCK();
 }
 
 static int
@@ -1686,23 +1801,20 @@ nd6_prefix_onlink_rtrequest(struct nd_pr
 	return (a_failure);
 }
 
-static int
+int
 nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
-	int error = 0;
 	char ip6buf[INET6_ADDRSTRLEN];
+	int error;
 
-	/* sanity check */
-	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
-		nd6log((LOG_ERR,
-		    "nd6_prefix_onlink: %s/%d is already on-link\n",
-		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list