svn commit: r216118 - stable/8/sys/netinet6

Bjoern A. Zeeb bz at FreeBSD.org
Thu Dec 2 10:39:45 UTC 2010


Author: bz
Date: Thu Dec  2 10:39:44 2010
New Revision: 216118
URL: http://svn.freebsd.org/changeset/base/216118

Log:
  MFC r216022:
  
    Plug well observed races on la_hold entries with the callout handler.
  
    Call the handler function with the lock held, return unlocked as we
    might free the entry. Rework functions later in the call graph to be
    either called with the lock held or, only if needed, unlocked.
  
    Place asserts to document and tighten assumptions on various lle locking,
    which were not always true before.
  
    We call nd6_ns_output() unlocked and the assignment of ip6->ip6_src was
    decentralized to minimize possible complexity introduced with the formerly
    missing locking there. This also resulted in a push down of local
    variable scopes into smaller blocks.
  
    Reported by:	many
    Submitted by:	Dmitrij Tejblum (tejblum yandex-team.ru) (original version)
  Tested by:	remko
  PR:		kern/148857
  Approved by:	re (kib)

Modified:
  stable/8/sys/netinet6/in6.c
  stable/8/sys/netinet6/nd6.c
  stable/8/sys/netinet6/nd6_nbr.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)

Modified: stable/8/sys/netinet6/in6.c
==============================================================================
--- stable/8/sys/netinet6/in6.c	Thu Dec  2 09:10:45 2010	(r216117)
+++ stable/8/sys/netinet6/in6.c	Thu Dec  2 10:39:44 2010	(r216118)
@@ -2325,10 +2325,12 @@ in6_lltable_new(const struct sockaddr *l
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
-	callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
 	lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
 	lle->base.lle_refcnt = 1;
 	LLE_LOCK_INIT(&lle->base);
+	callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
+	    CALLOUT_RETURNUNLOCKED);
+
 	return &lle->base;
 }
 

Modified: stable/8/sys/netinet6/nd6.c
==============================================================================
--- stable/8/sys/netinet6/nd6.c	Thu Dec  2 09:10:45 2010	(r216117)
+++ stable/8/sys/netinet6/nd6.c	Thu Dec  2 10:39:44 2010	(r216118)
@@ -403,6 +403,8 @@ nd6_llinfo_settimer_locked(struct llentr
 {
 	int canceled;
 
+	LLE_WLOCK_ASSERT(ln);
+
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
@@ -443,6 +445,7 @@ nd6_llinfo_timer(void *arg)
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
+	LLE_WLOCK_ASSERT(ln);
 	ifp = ln->lle_tbl->llt_ifp;
 
 	CURVNET_SET(ifp->if_vnet);
@@ -450,10 +453,10 @@ nd6_llinfo_timer(void *arg)
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
-			nd6_llinfo_settimer(ln, INT_MAX);
+			nd6_llinfo_settimer_locked(ln, INT_MAX);
 		} else {
 			ln->ln_ntick = 0;
-			nd6_llinfo_settimer(ln, ln->ln_ntick);
+			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
 		}
 		goto done;
 	}
@@ -474,8 +477,10 @@ nd6_llinfo_timer(void *arg)
 	case ND6_LLINFO_INCOMPLETE:
 		if (ln->la_asked < V_nd6_mmaxtries) {
 			ln->la_asked++;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, NULL, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			struct mbuf *m = ln->la_hold;
 			if (m) {
@@ -483,24 +488,24 @@ nd6_llinfo_timer(void *arg)
 
 				/*
 				 * assuming every packet in la_hold has the
-				 * same IP header
+				 * same IP header.  Send error after unlock.
 				 */
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
-				icmp6_error2(m, ICMP6_DST_UNREACH,
-				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
-
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
 			(void)nd6_free(ln, 0);
 			ln = NULL;
+			if (m != NULL)
+				icmp6_error2(m, ICMP6_DST_UNREACH,
+				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
 		}
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(ln)) {
 			ln->ln_state = ND6_LLINFO_STALE;
-			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
+			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 		}
 		break;
 
@@ -517,27 +522,34 @@ nd6_llinfo_timer(void *arg)
 			/* We need NUD */
 			ln->la_asked = 1;
 			ln->ln_state = ND6_LLINFO_PROBE;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, dst, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
-			nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
+			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
 		}
 		break;
 	case ND6_LLINFO_PROBE:
 		if (ln->la_asked < V_nd6_umaxtries) {
 			ln->la_asked++;
-			nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
+			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+			LLE_WUNLOCK(ln);
 			nd6_ns_output(ifp, dst, dst, ln, 0);
+			LLE_WLOCK(ln);
 		} else {
 			(void)nd6_free(ln, 0);
 			ln = NULL;
 		}
 		break;
+	default:
+		panic("%s: paths in a dark night can be confusing: %d",
+		    __func__, ln->ln_state);
 	}
 done:
 	if (ln != NULL)
-		LLE_FREE(ln);
+		LLE_FREE_LOCKED(ln);
 	CURVNET_RESTORE();
 }
 
@@ -992,7 +1004,9 @@ nd6_free(struct llentry *ln, int gc)
 {
         struct llentry *next;
 	struct nd_defrouter *dr;
-	struct ifnet *ifp=NULL;
+	struct ifnet *ifp;
+
+	LLE_WLOCK_ASSERT(ln);
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
@@ -1000,12 +1014,13 @@ nd6_free(struct llentry *ln, int gc)
 	 */
 
 	/* cancel timer */
-	nd6_llinfo_settimer(ln, -1);
+	nd6_llinfo_settimer_locked(ln, -1);
+
+	ifp = ln->lle_tbl->llt_ifp;
 
 	if (!V_ip6_forwarding) {
-		int s;
-		s = splnet();
-		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
+
+		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
@@ -1022,15 +1037,16 @@ nd6_free(struct llentry *ln, int gc)
 			 *      but we intentionally keep it just in case.
 			 */
 			if (dr->expire > time_second)
-				nd6_llinfo_settimer(ln,
+				nd6_llinfo_settimer_locked(ln,
 				    (dr->expire - time_second) * hz);
 			else
-				nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
-			splx(s);
-			LLE_WLOCK(ln);
+				nd6_llinfo_settimer_locked(ln,
+				    (long)V_nd6_gctimer * hz);
+
+			next = LIST_NEXT(ln, lle_next);
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
-			return (LIST_NEXT(ln, lle_next));
+			return (next);
 		}
 
 		if (ln->ln_router || dr) {
@@ -1039,7 +1055,7 @@ nd6_free(struct llentry *ln, int gc)
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
-			rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
+			rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
 		}
 
 		if (dr) {
@@ -1067,11 +1083,13 @@ nd6_free(struct llentry *ln, int gc)
 			pfxlist_onlink_check();
 
 			/*
-			 * refresh default router list
+			 * Refresh default router list.  Have to unlock as
+			 * it calls into nd6_lookup(), still holding a ref.
 			 */
+			LLE_WUNLOCK(ln);
 			defrouter_select();
+			LLE_WLOCK(ln);
 		}
-		splx(s);
 	}
 
 	/*
@@ -1082,7 +1100,11 @@ nd6_free(struct llentry *ln, int gc)
 	 */
 	next = LIST_NEXT(ln, lle_next);
 
-	ifp = ln->lle_tbl->llt_ifp;
+	/*
+	 * Save to unlock. We still hold an extra reference and will not
+	 * free(9) in llentry_free() if someone else holds one as well.
+	 */
+	LLE_WUNLOCK(ln);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 	LLE_REMREF(ln);
@@ -1804,6 +1826,9 @@ nd6_output_lle(struct ifnet *ifp, struct
 		LLE_RUNLOCK(ln);
 		goto retry;
 	}
+
+	LLE_WLOCK_ASSERT(ln);
+
 	if (ln->la_hold) {
 		struct mbuf *m_hold;
 		int i;
@@ -1825,17 +1850,7 @@ nd6_output_lle(struct ifnet *ifp, struct
 	} else {
 		ln->la_hold = m;
 	}
-	/*
-	 * We did the lookup (no lle arg) so we
-	 * need to do the unlock here
-	 */
-	if (lle == NULL) {
-		if (flags & LLE_EXCLUSIVE)
-			LLE_WUNLOCK(ln);
-		else
-			LLE_RUNLOCK(ln);
-	}
-	
+
 	/*
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
@@ -1843,10 +1858,21 @@ nd6_output_lle(struct ifnet *ifp, struct
 	if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
 		ln->la_asked++;
 		
-		nd6_llinfo_settimer(ln,
+		nd6_llinfo_settimer_locked(ln,
 		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
+		LLE_WUNLOCK(ln);
 		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
+		if (lle != NULL && ln == lle)
+			LLE_WLOCK(lle);
+
+	} else if (lle == NULL || ln != lle) {
+		/*
+		 * We did the lookup (no lle arg) so we
+		 * need to do the unlock here.
+		 */
+		LLE_WUNLOCK(ln);
 	}
+
 	return (0);
 
   sendpkt:

Modified: stable/8/sys/netinet6/nd6_nbr.c
==============================================================================
--- stable/8/sys/netinet6/nd6_nbr.c	Thu Dec  2 09:10:45 2010	(r216117)
+++ stable/8/sys/netinet6/nd6_nbr.c	Thu Dec  2 10:39:44 2010	(r216118)
@@ -381,7 +381,6 @@ nd6_ns_output(struct ifnet *ifp, const s
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_solicit *nd_ns;
-	struct in6_addr *src, src_in;
 	struct ip6_moptions im6o;
 	int icmp6len;
 	int maxlen;
@@ -465,28 +464,35 @@ nd6_ns_output(struct ifnet *ifp, const s
 		 * - saddr6 belongs to the outgoing interface.
 		 * Otherwise, we perform the source address selection as usual.
 		 */
-		struct ip6_hdr *hip6;		/* hold ip6 */
-		struct in6_addr *hsrc = NULL;
+		struct in6_addr *hsrc;
 
-		if ((ln != NULL) && ln->la_hold) {
-			/*
-			 * assuming every packet in la_hold has the same IP
-			 * header
-			 */
-			hip6 = mtod(ln->la_hold, struct ip6_hdr *);
-			/* XXX pullup? */
-			if (sizeof(*hip6) < ln->la_hold->m_len)
-				hsrc = &hip6->ip6_src;
-			else
-				hsrc = NULL;
+		hsrc = NULL;
+		if (ln != NULL) {
+			LLE_RLOCK(ln);
+			if (ln->la_hold != NULL) {
+				struct ip6_hdr *hip6;		/* hold ip6 */
+
+				/*
+				 * assuming every packet in la_hold has the same IP
+				 * header
+				 */
+				hip6 = mtod(ln->la_hold, struct ip6_hdr *);
+				/* XXX pullup? */
+				if (sizeof(*hip6) < ln->la_hold->m_len) {
+					ip6->ip6_src = hip6->ip6_src;
+					hsrc = &hip6->ip6_src;
+				}
+			}
+			LLE_RUNLOCK(ln);
 		}
 		if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
 		    hsrc)) != NULL) {
-			src = hsrc;
+			/* ip6_src set already. */
 			ifa_free(ifa);
 		} else {
 			int error;
 			struct sockaddr_in6 dst_sa;
+			struct in6_addr src_in;
 
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
@@ -504,7 +510,7 @@ nd6_ns_output(struct ifnet *ifp, const s
 				    error));
 				goto bad;
 			}
-			src = &src_in;
+			ip6->ip6_src = src_in;
 		}
 	} else {
 		/*
@@ -514,10 +520,8 @@ nd6_ns_output(struct ifnet *ifp, const s
 		 * above), but we do so here explicitly to make the intention
 		 * clearer.
 		 */
-		bzero(&src_in, sizeof(src_in));
-		src = &src_in;
+		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
 	}
-	ip6->ip6_src = *src;
 	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
 	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
 	nd_ns->nd_ns_code = 0;


More information about the svn-src-stable mailing list