svn commit: r306829 - in head/sys: netinet netinet6

Mark Johnston markj at FreeBSD.org
Fri Oct 7 21:10:55 UTC 2016


Author: markj
Date: Fri Oct  7 21:10:53 2016
New Revision: 306829
URL: https://svnweb.freebsd.org/changeset/base/306829

Log:
  Lock the ND prefix list and add refcounting for prefixes.
  
  This change extends the nd6 lock to protect the ND prefix list as well
  as the list of advertising routers associated with each prefix. To handle
  cases where the nd6 lock must be dropped while iterating over either the
  prefix or default router lists, a generation counter is used to track
  modifications to the lists. Additionally, a new mutex is used to serialize
  prefix on-link/off-link transitions. This mutex must be acquired before
  the nd6 lock and is held while updating the routing table in
  nd6_prefix_onlink() and nd6_prefix_offlink().
  
  Reviewed by:	ae, tuexen (SCTP bits)
  Tested by:	Jason Wolfe <jason at llnw.com>,
  		Larry Rosenman <ler at lerctr.org>
  MFC after:	2 months
  Differential Revision:	https://reviews.freebsd.org/D8125

Modified:
  head/sys/netinet/sctp_output.c
  head/sys/netinet6/in6.c
  head/sys/netinet6/in6_ifattach.c
  head/sys/netinet6/nd6.c
  head/sys/netinet6/nd6.h
  head/sys/netinet6/nd6_rtr.c

Modified: head/sys/netinet/sctp_output.c
==============================================================================
--- head/sys/netinet/sctp_output.c	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet/sctp_output.c	Fri Oct  7 21:10:53 2016	(r306829)
@@ -13747,6 +13747,7 @@ sctp_v6src_match_nexthop(struct sockaddr
 		return (0);
 
 	/* get prefix entry of address */
+	ND6_RLOCK();
 	LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) {
 		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
 			continue;
@@ -13756,6 +13757,7 @@ sctp_v6src_match_nexthop(struct sockaddr
 	}
 	/* no prefix entry in the prefix list */
 	if (pfx == NULL) {
+		ND6_RUNLOCK();
 		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
 		return (0);
@@ -13777,9 +13779,11 @@ sctp_v6src_match_nexthop(struct sockaddr
 		if (sctp_cmpaddr((struct sockaddr *)&gw6,
 		    ro->ro_rt->rt_gateway)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
+			ND6_RUNLOCK();
 			return (1);
 		}
 	}
+	ND6_RUNLOCK();
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
 	return (0);
 }

Modified: head/sys/netinet6/in6.c
==============================================================================
--- head/sys/netinet6/in6.c	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet6/in6.c	Fri Oct  7 21:10:53 2016	(r306829)
@@ -647,6 +647,7 @@ in6_control(struct socket *so, u_long cm
 				}
 			}
 		}
+		nd6_prefix_rele(pr);
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
@@ -694,8 +695,12 @@ aifaddr_out:
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
-		if (pr && pr->ndpr_addrcnt == 0)
-			prelist_remove(pr);
+		if (pr != NULL && pr->ndpr_addrcnt == 0) {
+			ND6_WLOCK();
+			nd6_prefix_unlink(pr, NULL);
+			ND6_WUNLOCK();
+			nd6_prefix_del(pr);
+		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}

Modified: head/sys/netinet6/in6_ifattach.c
==============================================================================
--- head/sys/netinet6/in6_ifattach.c	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet6/in6_ifattach.c	Fri Oct  7 21:10:53 2016	(r306829)
@@ -453,6 +453,7 @@ in6_ifattach_linklocal(struct ifnet *ifp
 	struct in6_ifaddr *ia;
 	struct in6_aliasreq ifra;
 	struct nd_prefixctl pr0;
+	struct nd_prefix *pr;
 	int error;
 
 	/*
@@ -535,10 +536,11 @@ in6_ifattach_linklocal(struct ifnet *ifp
 	 * address, and then reconfigure another one, the prefix is still
 	 * valid with referring to the old link-local address.
 	 */
-	if (nd6_prefix_lookup(&pr0) == NULL) {
+	if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 		if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0)
 			return (error);
-	}
+	} else
+		nd6_prefix_rele(pr);
 
 	return 0;
 }
@@ -778,15 +780,6 @@ _in6_ifdetach(struct ifnet *ifp, int pur
 		return;
 
 	/*
-	 * Remove neighbor management table.
-	 * Enabling the nd6_purge will panic on vmove for interfaces on VNET
-	 * teardown as the IPv6 layer is cleaned up already and the locks
-	 * are destroyed.
-	 */
-	if (purgeulp)
-		nd6_purge(ifp);
-
-	/*
 	 * nuke any of IPv6 addresses we have
 	 * XXX: all addresses should be already removed
 	 */
@@ -804,12 +797,10 @@ _in6_ifdetach(struct ifnet *ifp, int pur
 	in6_purgemaddrs(ifp);
 
 	/*
-	 * remove neighbor management table.  we call it twice just to make
-	 * sure we nuke everything.  maybe we need just one call.
-	 * XXX: since the first call did not release addresses, some prefixes
-	 * might remain.  We should call nd6_purge() again to release the
-	 * prefixes after removing all addresses above.
-	 * (Or can we just delay calling nd6_purge until at this point?)
+	 * Remove neighbor management table.
+	 * Enabling the nd6_purge will panic on vmove for interfaces on VNET
+	 * teardown as the IPv6 layer is cleaned up already and the locks
+	 * are destroyed.
 	 */
 	if (purgeulp)
 		nd6_purge(ifp);

Modified: head/sys/netinet6/nd6.c
==============================================================================
--- head/sys/netinet6/nd6.c	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet6/nd6.c	Fri Oct  7 21:10:53 2016	(r306829)
@@ -38,8 +38,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
@@ -47,7 +49,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
-#include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/sdt.h>
@@ -118,6 +119,8 @@ static eventhandler_tag lle_event_eh, if
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 VNET_DEFINE(struct rwlock, nd6_lock);
+VNET_DEFINE(uint64_t, nd6_list_genid);
+VNET_DEFINE(struct mtx, nd6_onlink_mtx);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
@@ -209,11 +212,10 @@ void
 nd6_init(void)
 {
 
-	rw_init(&V_nd6_lock, "nd6");
+	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
+	rw_init(&V_nd6_lock, "nd6 list");
 
 	LIST_INIT(&V_nd_prefix);
-
-	/* initialization of the default router list */
 	TAILQ_INIT(&V_nd_defrouter);
 
 	/* Start timers. */
@@ -245,6 +247,7 @@ nd6_destroy()
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
 	}
 	rw_destroy(&V_nd6_lock);
+	mtx_destroy(&V_nd6_onlink_mtx);
 }
 #endif
 
@@ -903,13 +906,15 @@ nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_drhead drq;
+	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 	struct in6_ifaddr *ia6, *nia6;
+	bool onlink_locked;
 
 	TAILQ_INIT(&drq);
+	LIST_INIT(&prl);
 
-	/* expire default router list */
 	ND6_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
 		if (dr->expire && dr->expire < time_uptime)
@@ -1016,23 +1021,51 @@ nd6_timer(void *arg)
 		}
 	}
 
-	/* expire prefix list */
+	ND6_WLOCK();
+	onlink_locked = false;
+restart:
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
-		 * check prefix lifetime.
-		 * since pltime is just for autoconf, pltime processing for
-		 * prefix is not necessary.
+		 * Expire prefixes. Since the pltime is only used for
+		 * autoconfigured addresses, pltime processing for prefixes is
+		 * not necessary.
+		 *
+		 * Only unlink after all derived addresses have expired. This
+		 * may not occur until two hours after the prefix has expired
+		 * per RFC 4862. If the prefix expires before its derived
+		 * addresses, mark it off-link. This will be done automatically
+		 * after unlinking if no address references remain.
 		 */
-		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
-		    time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
+		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
+			continue;
 
-			/*
-			 * address expiration and prefix expiration are
-			 * separate.  NEVER perform in6_purgeaddr here.
-			 */
-			prelist_remove(pr);
+		if (pr->ndpr_addrcnt == 0) {
+			nd6_prefix_unlink(pr, &prl);
+			continue;
+		}
+		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+			if (!onlink_locked) {
+				onlink_locked = ND6_ONLINK_TRYLOCK();
+				if (!onlink_locked) {
+					ND6_WUNLOCK();
+					ND6_ONLINK_LOCK();
+					onlink_locked = true;
+					ND6_WLOCK();
+					goto restart;
+				}
+			}
+			(void)nd6_prefix_offlink(pr);
 		}
 	}
+	ND6_WUNLOCK();
+	if (onlink_locked)
+		ND6_ONLINK_UNLOCK();
+
+	while ((pr = LIST_FIRST(&prl)) != NULL) {
+		LIST_REMOVE(pr, ndpr_entry);
+		nd6_prefix_del(pr);
+	}
 
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
 	    nd6_timer, curvnet);
@@ -1118,10 +1151,12 @@ void
 nd6_purge(struct ifnet *ifp)
 {
 	struct nd_drhead drq;
+	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
 	TAILQ_INIT(&drq);
+	LIST_INIT(&prl);
 
 	/*
 	 * Nuke default router list entries toward ifp.
@@ -1136,33 +1171,31 @@ nd6_purge(struct ifnet *ifp)
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
-
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (!dr->installed)
 			continue;
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
+
+	/*
+	 * Remove prefixes on ifp. We should have already removed addresses on
+	 * this interface, so no addresses should be referencing these prefixes.
+	 */
+	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
+		if (pr->ndpr_ifp == ifp)
+			nd6_prefix_unlink(pr, &prl);
+	}
 	ND6_WUNLOCK();
 
+	/* Delete the unlinked router and prefix objects. */
 	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
 		TAILQ_REMOVE(&drq, dr, dr_entry);
 		defrouter_del(dr);
 	}
-
-	/* Nuke prefix list entries toward ifp */
-	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
-		if (pr->ndpr_ifp == ifp) {
-			/*
-			 * Because if_detach() does *not* release prefixes
-			 * while purging addresses the reference count will
-			 * still be above zero. We therefore reset it to
-			 * make sure that the prefix really gets purged.
-			 */
-			pr->ndpr_addrcnt = 0;
-
-			prelist_remove(pr);
-		}
+	while ((pr = LIST_FIRST(&prl)) != NULL) {
+		LIST_REMOVE(pr, ndpr_entry);
+		nd6_prefix_del(pr);
 	}
 
 	/* cancel default outgoing interface setting */
@@ -1228,7 +1261,8 @@ nd6_is_new_addr_neighbor(const struct so
 	struct rt_addrinfo info;
 	struct sockaddr_in6 rt_key;
 	const struct sockaddr *dst6;
-	int fibnum;
+	uint64_t genid;
+	int error, fibnum;
 
 	/*
 	 * A link-local address is always a neighbor.
@@ -1266,19 +1300,29 @@ nd6_is_new_addr_neighbor(const struct so
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
+	ND6_RLOCK();
+restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
 
-		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
-
+		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			/* Always use the default FIB here. */
 			dst6 = (const struct sockaddr *)&pr->ndpr_prefix;
 
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+
 			/* Restore length field before retrying lookup */
 			rt_key.sin6_len = sizeof(rt_key);
-			if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0)
+			error = rib_lookup_info(fibnum, dst6, 0, 0, &info);
+
+			ND6_RLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
+			if (error != 0)
 				continue;
+
 			/*
 			 * This is the case where multiple interfaces
 			 * have the same prefix, but only one is installed 
@@ -1290,14 +1334,17 @@ nd6_is_new_addr_neighbor(const struct so
 			 * differ.
 			 */
 			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
-			       &rt_key.sin6_addr))
+			    &rt_key.sin6_addr))
 				continue;
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
-		    &addr->sin6_addr, &pr->ndpr_mask))
+		    &addr->sin6_addr, &pr->ndpr_mask)) {
+			ND6_RUNLOCK();
 			return (1);
+		}
 	}
+	ND6_RUNLOCK();
 
 	/*
 	 * If the address is assigned on the node of the other side of
@@ -1728,15 +1775,22 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
 	case SIOCSPFXFLUSH_IN6:
 	{
 		/* flush all the prefix advertised by routers */
+		struct in6_ifaddr *ia, *ia_next;
 		struct nd_prefix *pr, *next;
+		struct nd_prhead prl;
 
-		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
-			struct in6_ifaddr *ia, *ia_next;
+		LIST_INIT(&prl);
 
+		ND6_WLOCK();
+		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
+			nd6_prefix_unlink(pr, &prl);
+		}
+		ND6_WUNLOCK();
 
-			/* do we really have to remove addresses as well? */
+		while ((pr = LIST_FIRST(&prl)) != NULL) {
+			LIST_REMOVE(pr, ndpr_entry);
 			/* XXXRW: in6_ifaddrhead locking. */
 			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 			    ia_next) {
@@ -1746,7 +1800,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
-			prelist_remove(pr);
+			nd6_prefix_del(pr);
 		}
 		break;
 	}
@@ -2690,9 +2744,10 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
 			if (error != 0)
-				break;
+				goto out;
 		}
 	}
+out:
 	ND6_RUNLOCK();
 	return (error);
 }

Modified: head/sys/netinet6/nd6.h
==============================================================================
--- head/sys/netinet6/nd6.h	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet6/nd6.h	Fri Oct  7 21:10:53 2016	(r306829)
@@ -256,7 +256,7 @@ struct nd_prefixctl {
 	struct prf_ra ndpr_flags;
 };
 
-
+LIST_HEAD(nd_prhead, nd_prefix);
 struct nd_prefix {
 	struct ifnet *ndpr_ifp;
 	LIST_ENTRY(nd_prefix) ndpr_entry;
@@ -276,6 +276,7 @@ struct nd_prefix {
 	LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
 	u_char	ndpr_plen;
 	int	ndpr_addrcnt;	/* count of derived addresses */
+	volatile u_int ndpr_refcnt;
 };
 
 #define ndpr_raf		ndpr_flags
@@ -314,8 +315,6 @@ struct nd_pfxrouter {
 	struct nd_defrouter *router;
 };
 
-LIST_HEAD(nd_prhead, nd_prefix);
-
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_IP6NDP);
 #endif
@@ -346,17 +345,30 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861)
 
 /* Lock for the prefix and default router lists. */
 VNET_DECLARE(struct rwlock, nd6_lock);
+VNET_DECLARE(uint64_t, nd6_list_genid);
 #define	V_nd6_lock			VNET(nd6_lock)
+#define	V_nd6_list_genid		VNET(nd6_list_genid)
 
 #define	ND6_RLOCK()			rw_rlock(&V_nd6_lock)
 #define	ND6_RUNLOCK()			rw_runlock(&V_nd6_lock)
 #define	ND6_WLOCK()			rw_wlock(&V_nd6_lock)
 #define	ND6_WUNLOCK()			rw_wunlock(&V_nd6_lock)
+#define	ND6_TRY_UPGRADE()		rw_try_upgrade(&V_nd6_lock)
 #define	ND6_WLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_WLOCKED)
 #define	ND6_RLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_RLOCKED)
 #define	ND6_LOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_LOCKED)
 #define	ND6_UNLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_UNLOCKED)
 
+/* Mutex for prefix onlink/offlink transitions. */
+VNET_DECLARE(struct mtx, nd6_onlink_mtx);
+#define	V_nd6_onlink_mtx		VNET(nd6_onlink_mtx)
+
+#define	ND6_ONLINK_LOCK()		mtx_lock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_TRYLOCK()		mtx_trylock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_UNLOCK()		mtx_unlock(&V_nd6_onlink_mtx)
+#define	ND6_ONLINK_LOCK_ASSERT()	mtx_assert(&V_nd6_onlink_mtx, MA_OWNED)
+#define	ND6_ONLINK_UNLOCK_ASSERT()	mtx_assert(&V_nd6_onlink_mtx, MA_NOTOWNED)
+
 #define nd6log(x)	do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
 
 /* nd6_rtr.c */
@@ -463,9 +475,14 @@ void defrouter_rele(struct nd_defrouter 
 bool defrouter_remove(struct in6_addr *, struct ifnet *);
 void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
 void defrouter_del(struct nd_defrouter *);
-void prelist_remove(struct nd_prefix *);
 int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
-	struct nd_prefix **);
+    struct nd_prefix **);
+void nd6_prefix_unlink(struct nd_prefix *, struct nd_prhead *);
+void nd6_prefix_del(struct nd_prefix *);
+void nd6_prefix_ref(struct nd_prefix *);
+void nd6_prefix_rele(struct nd_prefix *);
+int nd6_prefix_onlink(struct nd_prefix *);
+int nd6_prefix_offlink(struct nd_prefix *);
 void pfxlist_onlink_check(void);
 struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
 struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);

Modified: head/sys/netinet6/nd6_rtr.c
==============================================================================
--- head/sys/netinet6/nd6_rtr.c	Fri Oct  7 21:03:18 2016	(r306828)
+++ head/sys/netinet6/nd6_rtr.c	Fri Oct  7 21:10:53 2016	(r306829)
@@ -87,9 +87,6 @@ static int in6_init_prefix_ltimes(struct
 static void in6_init_address_ltimes(struct nd_prefix *,
     struct in6_addrlifetime *);
 
-static int nd6_prefix_onlink(struct nd_prefix *);
-static int nd6_prefix_offlink(struct nd_prefix *);
-
 static int rt6_deleteroute(const struct rtentry *, void *);
 
 VNET_DECLARE(int, nd6_recalc_reachtm_interval);
@@ -661,6 +658,7 @@ defrouter_unlink(struct nd_defrouter *dr
 
 	ND6_WLOCK_ASSERT();
 	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+	V_nd6_list_genid++;
 	if (drq != NULL)
 		TAILQ_INSERT_TAIL(drq, dr, dr_entry);
 }
@@ -670,6 +668,7 @@ defrouter_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
+	struct nd_pfxrouter *pfxrtr;
 
 	ND6_UNLOCK_ASSERT();
 
@@ -688,11 +687,13 @@ defrouter_del(struct nd_defrouter *dr)
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
+	ND6_WLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
-		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
+	ND6_WUNLOCK();
+
 	pfxlist_onlink_check();
 
 	/*
@@ -852,14 +853,18 @@ static struct nd_defrouter *
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
+	uint64_t genid;
 	int oldpref;
+	bool writelocked;
 
 	if (new->rtlifetime == 0) {
 		defrouter_remove(&new->rtaddr, new->ifp);
 		return (NULL);
 	}
 
-	ND6_WLOCK();
+	ND6_RLOCK();
+	writelocked = false;
+restart:
 	dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
 	if (dr != NULL) {
 		oldpref = rtpref(dr);
@@ -875,10 +880,32 @@ defrtrlist_update(struct nd_defrouter *n
 		 * router is still installed in the kernel.
 		 */
 		if (dr->installed && rtpref(new) == oldpref) {
-			ND6_WUNLOCK();
+			if (writelocked)
+				ND6_WUNLOCK();
+			else
+				ND6_RUNLOCK();
 			return (dr);
 		}
+	}
 
+	/*
+	 * The router needs to be reinserted into the default router
+	 * list, so upgrade to a write lock. If that fails and the list
+	 * has potentially changed while the lock was dropped, we'll
+	 * redo the lookup with the write lock held.
+	 */
+	if (!writelocked) {
+		writelocked = true;
+		if (!ND6_TRY_UPGRADE()) {
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+			ND6_WLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
+		}
+	}
+
+	if (dr != NULL) {
 		/*
 		 * The preferred router may have changed, so relocate this
 		 * router.
@@ -912,6 +939,7 @@ defrtrlist_update(struct nd_defrouter *n
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+	V_nd6_list_genid++;
 	ND6_WUNLOCK();
 
 	defrouter_select();
@@ -924,11 +952,12 @@ pfxrtr_lookup(struct nd_prefix *pr, stru
 {
 	struct nd_pfxrouter *search;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
 		if (search->router == dr)
 			break;
 	}
-
 	return (search);
 }
 
@@ -936,55 +965,110 @@ static void
 pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
+	bool update;
+
+	ND6_UNLOCK_ASSERT();
+
+	ND6_RLOCK();
+	if (pfxrtr_lookup(pr, dr) != NULL) {
+		ND6_RUNLOCK();
+		return;
+	}
+	ND6_RUNLOCK();
 
 	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
 		return;
-	new->router = dr;
 	defrouter_ref(dr);
+	new->router = dr;
 
-	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
+	ND6_WLOCK();
+	if (pfxrtr_lookup(pr, dr) == NULL) {
+		LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
+		update = true;
+	} else {
+		/* We lost a race to add the reference. */
+		defrouter_rele(dr);
+		free(new, M_IP6NDP);
+		update = false;
+	}
+	ND6_WUNLOCK();
 
-	pfxlist_onlink_check();
+	if (update)
+		pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 
+	ND6_WLOCK_ASSERT();
+
 	LIST_REMOVE(pfr, pfr_entry);
 	defrouter_rele(pfr->router);
 	free(pfr, M_IP6NDP);
 }
 
-struct nd_prefix *
-nd6_prefix_lookup(struct nd_prefixctl *key)
+static struct nd_prefix *
+nd6_prefix_lookup_locked(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
 		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
+			nd6_prefix_ref(search);
 			break;
 		}
 	}
+	return (search);
+}
 
+struct nd_prefix *
+nd6_prefix_lookup(struct nd_prefixctl *key)
+{
+	struct nd_prefix *search;
+
+	ND6_RLOCK();
+	search = nd6_prefix_lookup_locked(key);
+	ND6_RUNLOCK();
 	return (search);
 }
 
+void
+nd6_prefix_ref(struct nd_prefix *pr)
+{
+
+	refcount_acquire(&pr->ndpr_refcnt);
+}
+
+void
+nd6_prefix_rele(struct nd_prefix *pr)
+{
+
+	if (refcount_release(&pr->ndpr_refcnt)) {
+		KASSERT(LIST_EMPTY(&pr->ndpr_advrtrs),
+		    ("prefix %p has advertising routers", pr));
+		free(pr, M_IP6NDP);
+	}
+}
+
 int
 nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
     struct nd_prefix **newp)
 {
-	struct nd_prefix *new = NULL;
-	int error = 0;
+	struct nd_prefix *new;
 	char ip6buf[INET6_ADDRSTRLEN];
+	int error;
 
 	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
 		return (ENOMEM);
+	refcount_init(&new->ndpr_refcnt, newp != NULL ? 2 : 1);
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
@@ -1003,20 +1087,22 @@ nd6_prelist_add(struct nd_prefixctl *pr,
 	/* make prefix in the canonical form */
 	IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
 
-	/* link ndpr_entry to nd_prefix list */
+	ND6_WLOCK();
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
+	V_nd6_list_genid++;
+	ND6_WUNLOCK();
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
-		int e;
-
-		if ((e = nd6_prefix_onlink(new)) != 0) {
+		ND6_ONLINK_LOCK();
+		if ((error = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
-			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+			    pr->ndpr_plen, if_name(pr->ndpr_ifp), error));
 			/* proceed anyway. XXX: is it correct? */
 		}
+		ND6_ONLINK_UNLOCK();
 	}
 
 	if (dr != NULL)
@@ -1026,51 +1112,69 @@ nd6_prelist_add(struct nd_prefixctl *pr,
 	return (0);
 }
 
+/*
+ * Remove a prefix from the prefix list and optionally stash it in a
+ * caller-provided list.
+ *
+ * The ND6 lock must be held.
+ */
 void
-prelist_remove(struct nd_prefix *pr)
+nd6_prefix_unlink(struct nd_prefix *pr, struct nd_prhead *list)
+{
+
+	KASSERT(pr->ndpr_addrcnt == 0,
+	    ("prefix %p has referencing addresses", pr));
+	ND6_WLOCK_ASSERT();
+
+	LIST_REMOVE(pr, ndpr_entry);
+	V_nd6_list_genid++;
+	if (list != NULL)
+		LIST_INSERT_HEAD(list, pr, ndpr_entry);
+}
+
+/*
+ * Free an unlinked prefix, first marking it off-link if necessary.
+ */
+void
+nd6_prefix_del(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e;
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	/* make sure to invalidate the prefix until it is really freed. */
-	pr->ndpr_vltime = 0;
-	pr->ndpr_pltime = 0;
+	KASSERT(pr->ndpr_addrcnt == 0,
+	    ("prefix %p has referencing addresses", pr));
+	ND6_UNLOCK_ASSERT();
 
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
 	 * when executing "ndp -p".
 	 */
-
-	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
-	    (e = nd6_prefix_offlink(pr)) != 0) {
-		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
-		    "on %s, errno=%d\n",
-		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
-		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
-		/* what should we do? */
+	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+		ND6_ONLINK_LOCK();
+		if ((e = nd6_prefix_offlink(pr)) != 0) {
+			nd6log((LOG_ERR,
+			    "nd6_prefix_del: failed to make %s/%d offlink "
+			    "on %s, errno=%d\n",
+			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+			/* what should we do? */
+		}
+		ND6_ONLINK_UNLOCK();
 	}
 
-	if (pr->ndpr_addrcnt > 0)
-		return;		/* notice here? */
-
-	/* unlink ndpr_entry from nd_prefix list */
-	LIST_REMOVE(pr, ndpr_entry);
-
-	/* free list of routers that advertised the prefix */
-	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
+	/* Release references to routers that have advertised this prefix. */
+	ND6_WLOCK();
+	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next)
 		pfxrtr_del(pfr);
-	}
-	free(pr, M_IP6NDP);
+	ND6_WUNLOCK();
+
+	nd6_prefix_rele(pr);
 
 	pfxlist_onlink_check();
 }
 
-/*
- * dr - may be NULL
- */
-
 static int
 prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
     struct mbuf *m, int mcast)
@@ -1120,21 +1224,22 @@ prelist_update(struct nd_prefixctl *new,
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
-			int e;
-
-			if ((e = nd6_prefix_onlink(pr)) != 0) {
+			ND6_ONLINK_LOCK();
+			if ((error = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf,
-					    &pr->ndpr_prefix.sin6_addr),
-				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
+				        &pr->ndpr_prefix.sin6_addr),
+				    pr->ndpr_plen, if_name(pr->ndpr_ifp),
+				    error));
 				/* proceed anyway. XXX: is it correct? */
 			}
+			ND6_ONLINK_UNLOCK();
 		}
 
-		if (dr && pfxrtr_lookup(pr, dr) == NULL)
+		if (dr != NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		if (new->ndpr_vltime == 0)
@@ -1393,8 +1498,10 @@ prelist_update(struct nd_prefixctl *new,
 		}
 	}
 
- end:
-	return error;
+end:
+	if (pr != NULL)
+		nd6_prefix_rele(pr);
+	return (error);
 }
 
 /*
@@ -1409,6 +1516,8 @@ find_pfxlist_reachable_router(struct nd_
 	struct llentry *ln;
 	int canreach;
 
+	ND6_LOCK_ASSERT();
+
 	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
 		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
 		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
@@ -1444,6 +1553,11 @@ pfxlist_onlink_check(void)
 	struct nd_defrouter *dr;
 	struct nd_pfxrouter *pfxrtr = NULL;
 	struct rm_priotracker in6_ifa_tracker;
+	uint64_t genid;
+	uint32_t flags;
+
+	ND6_ONLINK_LOCK();
+	ND6_RLOCK();
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
@@ -1459,7 +1573,6 @@ pfxlist_onlink_check(void)
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
-		ND6_RLOCK();
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
@@ -1470,7 +1583,6 @@ pfxlist_onlink_check(void)
 			if (pfxrtr != NULL)
 				break;
 		}
-		ND6_RUNLOCK();
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
@@ -1515,6 +1627,7 @@ pfxlist_onlink_check(void)
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
+restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		char ip6buf[INET6_ADDRSTRLEN];
 		int e;
@@ -1524,21 +1637,20 @@ pfxlist_onlink_check(void)
 		    pr->ndpr_raf_auto == 0)
 			continue;
 
-		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
-		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
-			if ((e = nd6_prefix_offlink(pr)) != 0) {
+		flags = pr->ndpr_stateflags & (NDPRF_DETACHED | NDPRF_ONLINK);
+		if (flags == 0 || flags == (NDPRF_DETACHED | NDPRF_ONLINK)) {
+			genid = V_nd6_list_genid;
+			ND6_RUNLOCK();
+			if ((flags & NDPRF_ONLINK) != 0 &&
+			    (e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
-			}
-		}
-		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
-		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
-		    pr->ndpr_raf_onlink) {
-			if ((e = nd6_prefix_onlink(pr)) != 0) {
+			} else if ((flags & NDPRF_ONLINK) == 0 &&
+			    (e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
@@ -1546,6 +1658,9 @@ pfxlist_onlink_check(void)
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
+			ND6_RLOCK();
+			if (genid != V_nd6_list_genid)
+				goto restart;
 		}
 	}
 
@@ -1606,6 +1721,8 @@ pfxlist_onlink_check(void)
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+	ND6_RUNLOCK();
+	ND6_ONLINK_UNLOCK();
 }
 
 static int
@@ -1686,23 +1803,20 @@ nd6_prefix_onlink_rtrequest(struct nd_pr
 	return (a_failure);
 }
 
-static int
+int
 nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
-	int error = 0;
 	char ip6buf[INET6_ADDRSTRLEN];

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list