kern/165863
    Andrey Zonov 
    andrey at zonov.org
       
    Tue Jul 31 18:43:46 UTC 2012
    
    
  
On 4/11/12 12:26 AM, Ryan Stone wrote:
> 2012/4/10 Gleb Smirnoff <glebius at freebsd.org>:
>>   Thanks, Ryan!
> (snip)
>> Looks okay from my viewpoint. Have you stress tested it? My snap patch
>> definitely had problems, AFAIR.
>>
>> If this patch fixes panics observed by kern/165863 and passes stress
>> testing, then it should be committed ASAP, and shouldn't depend on
>> IPv6 part.
>>
>> --
>> Totus tuus, Glebius.
>
> Hm, I was all ready to commit this, but I've realized that there is a
> problem.  According to callout_reset(9), any caller to callout_reset
> must hold any mutex associated with the callout, but the two places
> that call callout_reset on the la_timer are not done with the
> if_afdata_lock held, and changing that looks to be non-trivial without
> making the if_afdata_lock a huge contention point.
>
> At this point I'm not sure what the best way to proceed is.
>
Hi,
Please review attached patch.  I used Gleb's ideas.  He almost fixed the 
issue, but he didn't observe that entry can be safely unlocked in 
arptimer() because it has refcnt incremented and cannot be removed.  I 
also fixed entry removing based on refcnt that was totally broken.
With my patch system doesn't panic in arp code anymore.  Stress-test as 
follows:
for h in $hosts; do
     ssh $h sysctl net.inet.icmp.icmplim=100000
     ping -f $h > /dev/null 2>&1 &
done
sysctl net.link.ether.inet.max_age=1
while :; do
     ifconfig $if $ip/$mask
done &
IPv6 was not fix so far.  It still didn't free lltable on IP deletion, 
but I will look at this sometime later.
# vmstat -m | grep llt
       lltable   117    31K       -      117  256,512
# for i in `jot 1000`; do ifconfig lo0 inet6 fe80::2/128; done
# vmstat -m | grep llt
       lltable  1117   281K       -     1117  256,512
-- 
Andrey Zonov
-------------- next part --------------
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h	(revision 238945)
+++ sys/net/if_var.h	(working copy)
@@ -415,6 +415,8 @@ EVENTHANDLER_DECLARE(group_change_event, group_cha
 #define	IF_AFDATA_DESTROY(ifp)	rw_destroy(&(ifp)->if_afdata_lock)
 
 #define	IF_AFDATA_LOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED)
+#define	IF_AFDATA_RLOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED)
+#define	IF_AFDATA_WLOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED)
 #define	IF_AFDATA_UNLOCK_ASSERT(ifp)	rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
 
 int	if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
Index: sys/net/if_llatbl.c
===================================================================
--- sys/net/if_llatbl.c	(revision 238945)
+++ sys/net/if_llatbl.c	(working copy)
@@ -106,10 +106,10 @@ llentry_free(struct llentry *lle)
 	size_t pkts_dropped;
 	struct mbuf *next;
 
-	pkts_dropped = 0;
 	LLE_WLOCK_ASSERT(lle);
-	LIST_REMOVE(lle, lle_next);
+	IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
 
+	pkts_dropped = 0;
 	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
 		next = lle->la_hold->m_nextpkt;
 		m_freem(lle->la_hold);
@@ -182,17 +182,16 @@ lltable_free(struct lltable *llt)
 	SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
 	LLTABLE_WUNLOCK();
 
+	IF_AFDATA_WLOCK(llt->llt_ifp);
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
-			int canceled;
-
-			canceled = callout_drain(&lle->la_timer);
 			LLE_WLOCK(lle);
-			if (canceled)
+			if (callout_stop(&lle->la_timer))
 				LLE_REMREF(lle);
 			llentry_free(lle);
 		}
 	}
+	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 
 	free(llt, M_LLTABLE);
 }
Index: sys/net/if_llatbl.h
===================================================================
--- sys/net/if_llatbl.h	(revision 238945)
+++ sys/net/if_llatbl.h	(working copy)
@@ -102,7 +102,7 @@ struct llentry {
 
 #define	LLE_ADDREF(lle) do {					\
 	LLE_WLOCK_ASSERT(lle);					\
-	KASSERT((lle)->lle_refcnt >= 0,				\
+	KASSERT((lle)->lle_refcnt > 0,				\
 		("negative refcnt %d", (lle)->lle_refcnt));	\
 	(lle)->lle_refcnt++;					\
 } while (0)
@@ -115,12 +115,14 @@ struct llentry {
 } while (0)
 
 #define	LLE_FREE_LOCKED(lle) do {				\
-	if ((lle)->lle_refcnt <= 1)				\
-		(lle)->lle_free((lle)->lle_tbl, (lle));\
-	else {							\
-		(lle)->lle_refcnt--;				\
+	KASSERT((lle)->lle_refcnt > 0,				\
+		("negative refcnt %d", (lle)->lle_refcnt));	\
+	(lle)->lle_refcnt--;					\
+	if ((lle)->lle_refcnt == 0) {				\
+		LIST_REMOVE((lle), lle_next);			\
+		(lle)->lle_free((lle)->lle_tbl, (lle));		\
+	} else							\
 		LLE_WUNLOCK(lle);				\
-	}							\
 	/* guard against invalid refs */			\
 	lle = NULL;						\
 } while (0)
@@ -172,7 +174,7 @@ MALLOC_DECLARE(M_LLTABLE);
 #define	LLE_VALID	0x0008	/* ll_addr is valid */
 #define	LLE_PROXY	0x0010	/* proxy entry ??? */
 #define	LLE_PUB		0x0020	/* publish entry ??? */
-#define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
+#define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked */
 #define	LLE_DELETE	0x4000	/* delete on a lookup - match LLE_IFADDR */
 #define	LLE_CREATE	0x8000	/* create on a lookup miss */
 
Index: sys/netinet/in.c
===================================================================
--- sys/netinet/in.c	(revision 238945)
+++ sys/netinet/in.c	(working copy)
@@ -1280,7 +1280,6 @@ in_lltable_new(const struct sockaddr *l3addr, u_in
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
-	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
 	/*
 	 * For IPv4 this will trigger "arpresolve" to generate
 	 * an ARP request.
@@ -1290,7 +1289,10 @@ in_lltable_new(const struct sockaddr *l3addr, u_in
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
-	return &lle->base;
+	callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
+	    CALLOUT_RETURNUNLOCKED);
+
+	return (&lle->base);
 }
 
 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
@@ -1306,26 +1308,24 @@ in_lltable_prefix_free(struct lltable *llt, const
 	int i;
 	size_t pkts_dropped;
 
+	IF_AFDATA_WLOCK(llt->llt_ifp);
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			/*
 			 * (flags & LLE_STATIC) means deleting all entries
 			 * including static ARP entries.
 			 */
-			if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)),
-			    pfx, msk) && ((flags & LLE_STATIC) ||
-			    !(lle->la_flags & LLE_STATIC))) {
-				int canceled;
-
-				canceled = callout_drain(&lle->la_timer);
+			if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)), pfx, msk) &&
+			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
 				LLE_WLOCK(lle);
-				if (canceled)
+				if (callout_stop(&lle->la_timer))
 					LLE_REMREF(lle);
 				pkts_dropped = llentry_free(lle);
 				ARPSTAT_ADD(dropped, pkts_dropped);
 			}
 		}
 	}
+	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 }
 
 
@@ -1431,7 +1431,8 @@ in_lltable_lookup(struct lltable *llt, u_int flags
 	if (lle == NULL) {
 #ifdef DIAGNOSTIC
 		if (flags & LLE_DELETE)
-			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);	
+			log(LOG_INFO, "interface address is missing from "
+			    "cache = %p in delete\n", lle);
 #endif
 		if (!(flags & LLE_CREATE))
 			return (NULL);
Index: sys/netinet/if_ether.c
===================================================================
--- sys/netinet/if_ether.c	(revision 238945)
+++ sys/netinet/if_ether.c	(working copy)
@@ -163,49 +163,37 @@ arp_ifscrub(struct ifnet *ifp, uint32_t addr)
 static void
 arptimer(void *arg)
 {
+	struct llentry *lle = (struct llentry *)arg;
 	struct ifnet *ifp;
-	struct llentry   *lle;
-	int pkts_dropped;
+	size_t pkts_dropped;
 
-	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
-	lle = (struct llentry *)arg;
+	if (lle->la_flags & LLE_STATIC) {
+		LLE_WUNLOCK(lle);
+		return;
+	}
+
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
-	IF_AFDATA_LOCK(ifp);
-	LLE_WLOCK(lle);
-	if (lle->la_flags & LLE_STATIC)
-		LLE_WUNLOCK(lle);
-	else {
-		if (!callout_pending(&lle->la_timer) &&
-		    callout_active(&lle->la_timer)) {
-			callout_stop(&lle->la_timer);
-			LLE_REMREF(lle);
 
-			if (lle->la_flags != LLE_DELETED) {
-				int evt;
+	if (lle->la_flags != LLE_DELETED) {
+		int evt;
 
-				if (lle->la_flags & LLE_VALID)
-					evt = LLENTRY_EXPIRED;
-				else
-					evt = LLENTRY_TIMEDOUT;
-				EVENTHANDLER_INVOKE(lle_event, lle, evt);
-			}
+		if (lle->la_flags & LLE_VALID)
+			evt = LLENTRY_EXPIRED;
+		else
+			evt = LLENTRY_TIMEDOUT;
+		EVENTHANDLER_INVOKE(lle_event, lle, evt);
+	}
 
-			pkts_dropped = llentry_free(lle);
-			ARPSTAT_ADD(dropped, pkts_dropped);
-			ARPSTAT_INC(timeouts);
-		} else {
-#ifdef DIAGNOSTIC
-			struct sockaddr *l3addr = L3_ADDR(lle);
-			log(LOG_INFO,
-			    "arptimer issue: %p, IPv4 address: \"%s\"\n", lle,
-			    inet_ntoa(
-			        ((const struct sockaddr_in *)l3addr)->sin_addr));
-#endif
-			LLE_WUNLOCK(lle);
-		}
-	}
+	callout_stop(&lle->la_timer);
+	LLE_WUNLOCK(lle);
+	IF_AFDATA_LOCK(ifp);
+	LLE_WLOCK(lle);
+	LLE_REMREF(lle);
+	pkts_dropped = llentry_free(lle);
 	IF_AFDATA_UNLOCK(ifp);
+	ARPSTAT_ADD(dropped, pkts_dropped);
+	ARPSTAT_INC(timeouts);
 	CURVNET_RESTORE();
 }
 
Index: sys/netinet6/in6.c
===================================================================
--- sys/netinet6/in6.c	(revision 238945)
+++ sys/netinet6/in6.c	(working copy)
@@ -2497,12 +2497,12 @@ in6_lltable_prefix_free(struct lltable *llt, const
 	 * (flags & LLE_STATIC) means deleting all entries
 	 * including static ND6 entries.
 	 */
+	IF_AFDATA_WLOCK(llt->llt_ifp);
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			if (IN6_ARE_MASKED_ADDR_EQUAL(
-				    &((struct sockaddr_in6 *)L3_ADDR(lle))->sin6_addr,
-				    &pfx->sin6_addr,
-				    &msk->sin6_addr) &&
+			    &satosin6(L3_ADDR(lle))->sin6_addr,
+			    &pfx->sin6_addr, &msk->sin6_addr) &&
 			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
 				int canceled;
 
@@ -2514,6 +2514,7 @@ in6_lltable_prefix_free(struct lltable *llt, const
 			}
 		}
 	}
+	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 }
 
 static int
    
    
More information about the freebsd-net
mailing list