svn commit: r216022 - head/sys/netinet6

Bjoern A. Zeeb bz at FreeBSD.org
Mon Nov 29 00:04:09 UTC 2010


Author: bz
Date: Mon Nov 29 00:04:08 2010
New Revision: 216022
URL: http://svn.freebsd.org/changeset/base/216022

Log:
  Plug well observed races on la_hold entries with the callout handler.
  
  Call the handler function with the lock held, return unlocked as we
  might free the entry.  Rework functions later in the call graph to be
  either called with the lock held or, only if needed, unlocked.
  
  Place asserts to document and tighten assumptions on various lle locking,
  which were not always true before.
  
  We call nd6_ns_output() unlocked and the assignment of ip6->ip6_src was
  decentralized to minimize possible complexity introduced with the formerly
  missing locking there.  This also resulted in a push down of local
  variable scopes into smaller blocks.
  
  Reported by:	many
  PR:		kern/148857
  Submitted by:	Dmitrij Tejblum (tejblum yandex-team.ru) (original version)
  MFC After:	4 days

Modified:
  head/sys/netinet6/in6.c
  head/sys/netinet6/nd6.c
  head/sys/netinet6/nd6_nbr.c

Modified: head/sys/netinet6/in6.c
==============================================================================
--- head/sys/netinet6/in6.c	Sun Nov 28 23:34:20 2010	(r216021)
+++ head/sys/netinet6/in6.c	Mon Nov 29 00:04:08 2010	(r216022)
@@ -2349,10 +2349,12 @@ in6_lltable_new(const struct sockaddr *l
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
-	callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
 	lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
 	lle->base.lle_refcnt = 1;
 	LLE_LOCK_INIT(&lle->base);
+	callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
+	    CALLOUT_RETURNUNLOCKED);
+
 	return &lle->base;
 }
 

Modified: head/sys/netinet6/nd6.c
==============================================================================
--- head/sys/netinet6/nd6.c	Sun Nov 28 23:34:20 2010	(r216021)
+++ head/sys/netinet6/nd6.c	Mon Nov 29 00:04:08 2010	(r216022)
@@ -411,6 +411,8 @@ nd6_llinfo_settimer_locked(struct llentr
 {
 	int canceled;
 
+	LLE_WLOCK_ASSERT(ln);
+
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
@@ -451,6 +453,7 @@ nd6_llinfo_timer(void *arg)
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
+	LLE_WLOCK_ASSERT(ln);
 	ifp = ln->lle_tbl->llt_ifp;
 
 	CURVNET_SET(ifp->if_vnet);
@@ -458,10 +461,10 @@ nd6_llinfo_timer(void *arg)
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
-			nd6_llinfo_settimer(ln, INT_MAX);
+			nd6_llinfo_settimer_locked(ln, INT_MAX);
 		} else {
 			ln->ln_ntick = 0;
-			nd6_llinfo_settimer(ln, ln->ln_ntick);
+			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
 		}
 		goto done;
 	}
@@ -482,8 +485,10 @@ nd6_llinfo_timer(void *arg)
 	case ND6_LLINFO_INCOMPLETE:
 		if (ln->la_asked < V_nd6_mmaxtries) {
 			ln->la_asked++;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, NULL, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			struct mbuf *m = ln->la_hold;
 			if (m) {
@@ -491,24 +496,24 @@ nd6_llinfo_timer(void *arg)
 
 				/*
 				 * assuming every packet in la_hold has the
-				 * same IP header
+				 * same IP header.  Send error after unlock.
 				 */
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
-				icmp6_error2(m, ICMP6_DST_UNREACH,
-				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
-
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
 			(void)nd6_free(ln, 0);
 			ln = NULL;
+			if (m != NULL)
+				icmp6_error2(m, ICMP6_DST_UNREACH,
+				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
 		}
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(ln)) {
 			ln->ln_state = ND6_LLINFO_STALE;
-			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
+			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 		}
 		break;
 
@@ -525,27 +530,34 @@ nd6_llinfo_timer(void *arg)
 			/* We need NUD */
 			ln->la_asked = 1;
 			ln->ln_state = ND6_LLINFO_PROBE;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, dst, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
-			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
+			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 		}
 		break;
 	case ND6_LLINFO_PROBE:
 		if (ln->la_asked < V_nd6_umaxtries) {
 			ln->la_asked++;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, dst, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			(void)nd6_free(ln, 0);
 			ln = NULL;
 		}
 		break;
+	default:
+		panic("%s: paths in a dark night can be confusing: %d",
+		    __func__, ln->ln_state);
 	}
 done:
 	if (ln != NULL)
-		LLE_FREE(ln);
+		LLE_FREE_LOCKED(ln);
 	CURVNET_RESTORE();
 }
 
@@ -996,7 +1008,9 @@ nd6_free(struct llentry *ln, int gc)
 {
         struct llentry *next;
 	struct nd_defrouter *dr;
-	struct ifnet *ifp=NULL;
+	struct ifnet *ifp;
+
+	LLE_WLOCK_ASSERT(ln);
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
@@ -1004,12 +1018,13 @@ nd6_free(struct llentry *ln, int gc)
 	 */
 
 	/* cancel timer */
-	nd6_llinfo_settimer(ln, -1);
+	nd6_llinfo_settimer_locked(ln, -1);
+
+	ifp = ln->lle_tbl->llt_ifp;
 
 	if (!V_ip6_forwarding) {
-		int s;
-		s = splnet();
-		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
+
+		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
@@ -1026,15 +1041,16 @@ nd6_free(struct llentry *ln, int gc)
 			 *      but we intentionally keep it just in case.
 			 */
 			if (dr->expire > time_second)
-				nd6_llinfo_settimer(ln,
+				nd6_llinfo_settimer_locked(ln,
 				    (dr->expire - time_second) * hz);
 			else
-				nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
-			splx(s);
-			LLE_WLOCK(ln);
+				nd6_llinfo_settimer_locked(ln,
+				    (long)V_nd6_gctimer * hz);
+
+			next = LIST_NEXT(ln, lle_next);
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
-			return (LIST_NEXT(ln, lle_next));
+			return (next);
 		}
 
 		if (ln->ln_router || dr) {
@@ -1043,7 +1059,7 @@ nd6_free(struct llentry *ln, int gc)
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
-			rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
+			rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 		}
 
 		if (dr) {
@@ -1071,11 +1087,13 @@ nd6_free(struct llentry *ln, int gc)
 			pfxlist_onlink_check();
 
 			/*
-			 * refresh default router list
+			 * Refresh default router list.  Have to unlock as
+			 * it calls into nd6_lookup(), still holding a ref.
 			 */
+			LLE_WUNLOCK(ln);
 			defrouter_select();
+			LLE_WLOCK(ln);
 		}
-		splx(s);
 	}
 
 	/*
@@ -1086,7 +1104,11 @@ nd6_free(struct llentry *ln, int gc)
 	 */
 	next = LIST_NEXT(ln, lle_next);
 
-	ifp = ln->lle_tbl->llt_ifp;
+	/*
+	 * Save to unlock. We still hold an extra reference and will not
+	 * free(9) in llentry_free() if someone else holds one as well.
+	 */
+	LLE_WUNLOCK(ln);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 	LLE_REMREF(ln);
@@ -1875,6 +1897,9 @@ nd6_output_lle(struct ifnet *ifp, struct
 		LLE_RUNLOCK(ln);
 		goto retry;
 	}
+
+	LLE_WLOCK_ASSERT(ln);
+
 	if (ln->la_hold) {
 		struct mbuf *m_hold;
 		int i;
@@ -1896,17 +1921,7 @@ nd6_output_lle(struct ifnet *ifp, struct
 	} else {
 		ln->la_hold = m;
 	}
-	/*
-	 * We did the lookup (no lle arg) so we
-	 * need to do the unlock here
-	 */
-	if (lle == NULL) {
-		if (flags & LLE_EXCLUSIVE)
-			LLE_WUNLOCK(ln);
-		else
-			LLE_RUNLOCK(ln);
-	}
-	
+
 	/*
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
@@ -1914,10 +1929,21 @@ nd6_output_lle(struct ifnet *ifp, struct
 	if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
 		ln->la_asked++;
 		
-		nd6_llinfo_settimer(ln,
+		nd6_llinfo_settimer_locked(ln,
 		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
+		LLE_WUNLOCK(ln);
 		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
+		if (lle != NULL && ln == lle)
+			LLE_WLOCK(lle);
+
+	} else if (lle == NULL || ln != lle) {
+		/*
+		 * We did the lookup (no lle arg) so we
+		 * need to do the unlock here.
+		 */
+		LLE_WUNLOCK(ln);
 	}
+
 	return (0);
 
   sendpkt:

Modified: head/sys/netinet6/nd6_nbr.c
==============================================================================
--- head/sys/netinet6/nd6_nbr.c	Sun Nov 28 23:34:20 2010	(r216021)
+++ head/sys/netinet6/nd6_nbr.c	Mon Nov 29 00:04:08 2010	(r216022)
@@ -383,7 +383,6 @@ nd6_ns_output(struct ifnet *ifp, const s
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_solicit *nd_ns;
-	struct in6_addr *src, src_in;
 	struct ip6_moptions im6o;
 	int icmp6len;
 	int maxlen;
@@ -467,28 +466,35 @@ nd6_ns_output(struct ifnet *ifp, const s
 		 * - saddr6 belongs to the outgoing interface.
 		 * Otherwise, we perform the source address selection as usual.
 		 */
-		struct ip6_hdr *hip6;		/* hold ip6 */
-		struct in6_addr *hsrc = NULL;
+		struct in6_addr *hsrc;
 
-		if ((ln != NULL) && ln->la_hold) {
-			/*
-			 * assuming every packet in la_hold has the same IP
-			 * header
-			 */
-			hip6 = mtod(ln->la_hold, struct ip6_hdr *);
-			/* XXX pullup? */
-			if (sizeof(*hip6) < ln->la_hold->m_len)
-				hsrc = &hip6->ip6_src;
-			else
-				hsrc = NULL;
+		hsrc = NULL;
+		if (ln != NULL) {
+			LLE_RLOCK(ln);
+			if (ln->la_hold != NULL) {
+				struct ip6_hdr *hip6;		/* hold ip6 */
+
+				/*
+				 * assuming every packet in la_hold has the same IP
+				 * header
+				 */
+				hip6 = mtod(ln->la_hold, struct ip6_hdr *);
+				/* XXX pullup? */
+				if (sizeof(*hip6) < ln->la_hold->m_len) {
+					ip6->ip6_src = hip6->ip6_src;
+					hsrc = &hip6->ip6_src;
+				}
+			}
+			LLE_RUNLOCK(ln);
 		}
 		if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
 		    hsrc)) != NULL) {
-			src = hsrc;
+			/* ip6_src set already. */
 			ifa_free(ifa);
 		} else {
 			int error;
 			struct sockaddr_in6 dst_sa;
+			struct in6_addr src_in;
 
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
@@ -506,7 +512,7 @@ nd6_ns_output(struct ifnet *ifp, const s
 				    error));
 				goto bad;
 			}
-			src = &src_in;
+			ip6->ip6_src = src_in;
 		}
 	} else {
 		/*
@@ -516,10 +522,8 @@ nd6_ns_output(struct ifnet *ifp, const s
 		 * above), but we do so here explicitly to make the intention
 		 * clearer.
 		 */
-		bzero(&src_in, sizeof(src_in));
-		src = &src_in;
+		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
 	}
-	ip6->ip6_src = *src;
 	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
 	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
 	nd_ns->nd_ns_code = 0;


More information about the svn-src-head mailing list