svn commit: r333175 - in head/sys: kern net netinet netinet6 sys

Stephen Hurd shurd at FreeBSD.org
Wed May 2 19:36:31 UTC 2018


Author: shurd
Date: Wed May  2 19:36:29 2018
New Revision: 333175
URL: https://svnweb.freebsd.org/changeset/base/333175

Log:
  Separate list manipulation locking from state change in multicast
  
  Multicast incorrectly calls in to drivers with a mutex held causing drivers
  to have to go through all manner of contortions to use a non sleepable lock.
  Serialize multicast updates instead.
  
  Submitted by:	mmacy <mmacy at mattmacy.io>
  Reviewed by:	shurd, sbruno
  Sponsored by:	Limelight Networks
  Differential Revision:	https://reviews.freebsd.org/D14969

Modified:
  head/sys/kern/subr_gtaskqueue.c
  head/sys/kern/subr_witness.c
  head/sys/net/if.c
  head/sys/netinet/igmp.c
  head/sys/netinet/igmp_var.h
  head/sys/netinet/in.c
  head/sys/netinet/in_mcast.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_var.h
  head/sys/netinet/ip_carp.c
  head/sys/netinet6/in6.c
  head/sys/netinet6/in6_ifattach.c
  head/sys/netinet6/in6_mcast.c
  head/sys/netinet6/in6_pcb.c
  head/sys/netinet6/in6_var.h
  head/sys/netinet6/mld6.c
  head/sys/netinet6/mld6_var.h
  head/sys/sys/gtaskqueue.h

Modified: head/sys/kern/subr_gtaskqueue.c
==============================================================================
--- head/sys/kern/subr_gtaskqueue.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/kern/subr_gtaskqueue.c	Wed May  2 19:36:29 2018	(r333175)
@@ -53,6 +53,7 @@ static void	gtaskqueue_thread_enqueue(void *);
 static void	gtaskqueue_thread_loop(void *arg);
 
 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
+TASKQGROUP_DEFINE(config, 1, 1);
 
 struct gtaskqueue_busy {
 	struct gtask	*tb_running;
@@ -662,7 +663,7 @@ SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_F
 
 void
 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
-    void *uniq, int irq, char *name)
+    void *uniq, int irq, const char *name)
 {
 	cpuset_t mask;
 	int qid, error;
@@ -976,4 +977,13 @@ void
 taskqgroup_destroy(struct taskqgroup *qgroup)
 {
 
+}
+
+void
+taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
+	const char *name)
+{
+
+	GROUPTASK_INIT(gtask, 0, fn, ctx);
+	taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
 }

Modified: head/sys/kern/subr_witness.c
==============================================================================
--- head/sys/kern/subr_witness.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/kern/subr_witness.c	Wed May  2 19:36:29 2018	(r333175)
@@ -532,18 +532,22 @@ static struct witness_order_list_entry order_lists[] =
 	 * IPv4 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
+	{ "in_multi_sx", &lock_class_sx },
 	{ "udpinp", &lock_class_rw },
-	{ "in_multi_mtx", &lock_class_mtx_sleep },
+	{ "in_multi_list_mtx", &lock_class_mtx_sleep },
 	{ "igmp_mtx", &lock_class_mtx_sleep },
+	{ "ifnet_rw", &lock_class_rw },
 	{ "if_addr_lock", &lock_class_rw },
 	{ NULL, NULL },
 	/*
 	 * IPv6 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
+	{ "in6_multi_sx", &lock_class_sx },
 	{ "udpinp", &lock_class_rw },
-	{ "in6_multi_mtx", &lock_class_mtx_sleep },
+	{ "in6_multi_list_mtx", &lock_class_mtx_sleep },
 	{ "mld_mtx", &lock_class_mtx_sleep },
+	{ "ifnet_rw", &lock_class_rw },
 	{ "if_addr_lock", &lock_class_rw },
 	{ NULL, NULL },
 	/*

Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/net/if.c	Wed May  2 19:36:29 2018	(r333175)
@@ -985,11 +985,13 @@ static void
 if_purgemaddrs(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
-	struct ifmultiaddr *next;
 
 	IF_ADDR_WLOCK(ifp);
-	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
+	while (!TAILQ_EMPTY(&ifp->if_multiaddrs)) {
+		ifma = TAILQ_FIRST(&ifp->if_multiaddrs);
+		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 		if_delmulti_locked(ifp, ifma, 1);
+	}
 	IF_ADDR_WUNLOCK(ifp);
 }
 
@@ -3429,6 +3431,12 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 	struct sockaddr_dl sdl;
 	int error;
 
+#ifdef INET
+	IN_MULTI_LIST_UNLOCK_ASSERT();
+#endif
+#ifdef INET6
+	IN6_MULTI_LIST_UNLOCK_ASSERT();
+#endif
 	/*
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
@@ -3610,6 +3618,9 @@ if_delmulti_ifma(struct ifmultiaddr *ifma)
 	struct ifnet *ifp;
 	int lastref;
 
+#ifdef INET
+	IN_MULTI_LIST_UNLOCK_ASSERT();
+#endif
 	ifp = ifma->ifma_ifp;
 #ifdef DIAGNOSTIC
 	if (ifp == NULL) {
@@ -3711,8 +3722,7 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiad
 			if_freemulti(ll_ifma);
 		}
 	}
-
-	if (ifp != NULL)
+	if (ifp != NULL && detaching == 0)
 		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if_freemulti(ifma);

Modified: head/sys/netinet/igmp.c
==============================================================================
--- head/sys/netinet/igmp.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/netinet/igmp.c	Wed May  2 19:36:29 2018	(r333175)
@@ -136,7 +136,7 @@ static int	igmp_v3_enqueue_group_record(struct mbufq *
 		    struct in_multi *, const int, const int, const int);
 static int	igmp_v3_enqueue_filter_change(struct mbufq *,
 		    struct in_multi *);
-static void	igmp_v3_process_group_timers(struct igmp_ifsoftc *,
+static void	igmp_v3_process_group_timers(struct in_multi_head *,
 		    struct mbufq *, struct mbufq *, struct in_multi *,
 		    const int);
 static int	igmp_v3_merge_state_changes(struct in_multi *,
@@ -162,12 +162,12 @@ static const struct netisr_handler igmp_nh = {
  * themselves are not virtualized.
  *
  * Locking:
- *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ *  * The permitted lock order is: IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  *    Any may be taken independently; if any are held at the same
  *    time, the above lock order must be followed.
  *  * All output is delegated to the netisr.
  *    Now that Giant has been eliminated, the netisr may be inlined.
- *  * IN_MULTI_LOCK covers in_multi.
+ *  * IN_MULTI_LIST_LOCK covers in_multi.
  *  * IGMP_LOCK covers igmp_ifsoftc and any global variables in this file,
  *    including the output queue.
  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
@@ -441,7 +441,7 @@ sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
 	if (error)
 		return (error);
 
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	IGMP_LOCK();
 
 	if (name[0] <= 0 || name[0] > V_if_index) {
@@ -475,7 +475,7 @@ sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
 
 out_locked:
 	IGMP_UNLOCK();
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 	return (error);
 }
 
@@ -586,7 +586,6 @@ igi_alloc_locked(/*const*/ struct ifnet *ifp)
 	igi->igi_qi = IGMP_QI_INIT;
 	igi->igi_qri = IGMP_QRI_INIT;
 	igi->igi_uri = IGMP_URI_INIT;
-	SLIST_INIT(&igi->igi_relinmhead);
 	mbufq_init(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
 
 	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
@@ -612,11 +611,12 @@ igmp_ifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifsoftc	*igi;
 	struct ifmultiaddr	*ifma;
-	struct in_multi		*inm, *tinm;
-
+	struct in_multi		*inm;
+	struct in_multi_head inm_free_tmp;
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
 	    ifp->if_xname);
 
+	SLIST_INIT(&inm_free_tmp);
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
@@ -631,24 +631,15 @@ igmp_ifdetach(struct ifnet *ifp)
 			    ("%s: ifma_protospec is NULL", __func__));
 #endif
 			inm = (struct in_multi *)ifma->ifma_protospec;
-			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
-				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
-				    inm, inm_nrele);
-			}
+			if (inm->inm_state == IGMP_LEAVING_MEMBER)
+				inm_rele_locked(&inm_free_tmp, inm);
 			inm_clear_recorded(inm);
 		}
 		IF_ADDR_RUNLOCK(ifp);
-		/*
-		 * Free the in_multi reference(s) for this IGMP lifecycle.
-		 */
-		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
-		    tinm) {
-			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
-			inm_release_locked(inm);
-		}
+		inm_release_list_deferred(&inm_free_tmp);
 	}
-
 	IGMP_UNLOCK();
+
 }
 
 /*
@@ -684,11 +675,6 @@ igi_delete_locked(const struct ifnet *ifp)
 			mbufq_drain(&igi->igi_gq);
 
 			LIST_REMOVE(igi, igi_link);
-
-			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
-			    ("%s: there are dangling in_multi references",
-			    __func__));
-
 			free(igi, M_IGMP);
 			return;
 		}
@@ -722,7 +708,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip
 	}
 	IGMPSTAT_INC(igps_rcv_gen_queries);
 
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
@@ -778,7 +764,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip
 
 out_locked:
 	IGMP_UNLOCK();
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 
 	return (0);
 }
@@ -816,7 +802,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip
 		IGMPSTAT_INC(igps_rcv_group_queries);
 	}
 
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
@@ -872,7 +858,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip
 
 out_locked:
 	IGMP_UNLOCK();
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 
 	return (0);
 }
@@ -899,7 +885,7 @@ igmp_v2_update_group(struct in_multi *inm, const int t
 	CTR4(KTR_IGMPV3, "0x%08x: %s/%s timer=%d", __func__,
 	    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, timer);
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
@@ -1011,7 +997,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip
 			IGMPSTAT_INC(igps_rcv_gsr_queries);
 	}
 
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
@@ -1092,7 +1078,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip
 
 out_locked:
 	IGMP_UNLOCK();
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 
 	return (0);
 }
@@ -1109,7 +1095,7 @@ igmp_input_v3_group_query(struct in_multi *inm, struct
 	int			 retval;
 	uint16_t		 nsrc;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	retval = 0;
@@ -1246,7 +1232,7 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ stru
 	 * If we are a member of this group, and our membership should be
 	 * reported, stop our group timer and transition to the 'lazy' state.
 	 */
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifsoftc *igi;
@@ -1305,7 +1291,7 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ stru
 	}
 
 out_locked:
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 
 	return (0);
 }
@@ -1373,7 +1359,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ stru
 	 * reported, and our group timer is pending or about to be reset,
 	 * stop our group timer by transitioning to the 'lazy' state.
 	 */
-	IN_MULTI_LOCK();
+	IN_MULTI_LIST_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifsoftc *igi;
@@ -1418,7 +1404,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ stru
 	}
 
 out_locked:
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 
 	return (0);
 }
@@ -1647,6 +1633,7 @@ igmp_fasttimo_vnet(void)
 	struct igmp_ifsoftc	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm;
+	struct in_multi_head inm_free_tmp;
 	int			 loop, uri_fasthz;
 
 	loop = 0;
@@ -1662,7 +1649,8 @@ igmp_fasttimo_vnet(void)
 	    !V_state_change_timers_running)
 		return;
 
-	IN_MULTI_LOCK();
+	SLIST_INIT(&inm_free_tmp);
+	IN_MULTI_LIST_LOCK();
 	IGMP_LOCK();
 
 	/*
@@ -1720,7 +1708,7 @@ igmp_fasttimo_vnet(void)
 				    igi->igi_version);
 				break;
 			case IGMP_VERSION_3:
-				igmp_v3_process_group_timers(igi, &qrq,
+				igmp_v3_process_group_timers(&inm_free_tmp, &qrq,
 				    &scq, inm, uri_fasthz);
 				break;
 			}
@@ -1728,8 +1716,6 @@ igmp_fasttimo_vnet(void)
 		IF_ADDR_RUNLOCK(ifp);
 
 		if (igi->igi_version == IGMP_VERSION_3) {
-			struct in_multi		*tinm;
-
 			igmp_dispatch_queue(&qrq, 0, loop);
 			igmp_dispatch_queue(&scq, 0, loop);
 
@@ -1737,18 +1723,13 @@ igmp_fasttimo_vnet(void)
 			 * Free the in_multi reference(s) for this
 			 * IGMP lifecycle.
 			 */
-			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
-			    inm_nrele, tinm) {
-				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
-				    inm_nrele);
-				inm_release_locked(inm);
-			}
+			inm_release_list_deferred(&inm_free_tmp);
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 }
 
 /*
@@ -1760,7 +1741,7 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, co
 {
 	int report_timer_expired;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	if (inm->inm_timer == 0) {
@@ -1802,14 +1783,14 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, co
  * Note: Unlocked read from igi.
  */
 static void
-igmp_v3_process_group_timers(struct igmp_ifsoftc *igi,
+igmp_v3_process_group_timers(struct in_multi_head *inmh,
     struct mbufq *qrq, struct mbufq *scq,
     struct in_multi *inm, const int uri_fasthz)
 {
 	int query_response_timer_expired;
 	int state_change_retransmit_timer_expired;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	query_response_timer_expired = 0;
@@ -1907,8 +1888,7 @@ igmp_v3_process_group_timers(struct igmp_ifsoftc *igi,
 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
 			    inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
-				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
-				    inm, inm_nrele);
+				inm_rele_locked(inmh, inm);
 			}
 		}
 		break;
@@ -1929,7 +1909,7 @@ static void
 igmp_v3_suppress_group_record(struct in_multi *inm)
 {
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
 		("%s: not IGMPv3 mode on link", __func__));
@@ -2003,13 +1983,15 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
 {
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
-	struct in_multi		*inm, *tinm;
+	struct in_multi		*inm;
+	struct in_multi_head inm_free_tmp;
 
 	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
 	    igi->igi_ifp, igi->igi_ifp->if_xname);
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
+	SLIST_INIT(&inm_free_tmp);
 
 	/*
 	 * Stop the v3 General Query Response on this link stone dead.
@@ -2050,7 +2032,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
 			 * message is sent upstream to the old querier --
 			 * transition to NOT would lose the leave and race.
 			 */
-			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
+			inm_rele_locked(&inm_free_tmp, inm);
 			/* FALLTHROUGH */
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
@@ -2069,10 +2051,8 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
 		mbufq_drain(&inm->inm_scq);
 	}
 	IF_ADDR_RUNLOCK(ifp);
-	SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) {
-		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
-		inm_release_locked(inm);
-	}
+
+	inm_release_list_deferred(&inm_free_tmp);
 }
 
 /*
@@ -2199,7 +2179,7 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int
 	struct ip		*ip;
 	struct mbuf		*m;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	ifp = inm->inm_ifp;
@@ -2276,10 +2256,8 @@ igmp_change_state(struct in_multi *inm)
 	struct ifnet *ifp;
 	int error;
 
-	IN_MULTI_LOCK_ASSERT();
-
 	error = 0;
-
+	IN_MULTI_LOCK_ASSERT();
 	/*
 	 * Try to detect if the upper layer just asked us to change state
 	 * for an interface which has now gone away.
@@ -2379,9 +2357,10 @@ igmp_initial_join(struct in_multi *inm, struct igmp_if
 		 * group around for the final INCLUDE {} enqueue.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3 &&
-		    inm->inm_state == IGMP_LEAVING_MEMBER)
-			inm_release_locked(inm);
-
+		    inm->inm_state == IGMP_LEAVING_MEMBER) {
+			MPASS(inm->inm_refcount > 1);
+			inm_rele_locked(NULL, inm);
+		}
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 
 		switch (igi->igi_version) {
@@ -2473,7 +2452,7 @@ igmp_handle_state_change(struct in_multi *inm, struct 
 
 	ifp = inm->inm_ifp;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
@@ -2531,7 +2510,7 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifs
 	    __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
@@ -2658,7 +2637,7 @@ igmp_v3_enqueue_group_record(struct mbufq *mq, struct 
 	in_addr_t		 naddr;
 	uint8_t			 mode;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	error = 0;
 	ifp = inm->inm_ifp;
@@ -3018,7 +2997,7 @@ igmp_v3_enqueue_filter_change(struct mbufq *mq, struct
 	uint8_t			 mode, now, then;
 	rectype_t		 crt, drt, nrt;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	if (inm->inm_nsrc == 0 ||
 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
@@ -3221,7 +3200,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, stru
 	domerge = 0;
 	recslen = 0;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
@@ -3320,7 +3299,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifsoftc *ig
 	struct in_multi		*inm;
 	int			 retval, loop;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
@@ -3632,7 +3611,6 @@ DB_SHOW_COMMAND(igi_list, db_show_igi_list)
 		db_printf("    qi %u\n", igi->igi_qi);
 		db_printf("    qri %u\n", igi->igi_qri);
 		db_printf("    uri %u\n", igi->igi_uri);
-		/* SLIST_HEAD(,in_multi)   igi_relinmhead */
 		/* struct mbufq    igi_gq; */
 		db_printf("\n");
 	}

Modified: head/sys/netinet/igmp_var.h
==============================================================================
--- head/sys/netinet/igmp_var.h	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/netinet/igmp_var.h	Wed May  2 19:36:29 2018	(r333175)
@@ -214,7 +214,6 @@ struct igmp_ifsoftc {
 	uint32_t igi_qi;	/* IGMPv3 Query Interval (s) */
 	uint32_t igi_qri;	/* IGMPv3 Query Response Interval (s) */
 	uint32_t igi_uri;	/* IGMPv3 Unsolicited Report Interval (s) */
-	SLIST_HEAD(,in_multi)	igi_relinmhead; /* released groups */
 	struct mbufq	igi_gq;		/* general query responses queue */
 };
 

Modified: head/sys/netinet/in.c
==============================================================================
--- head/sys/netinet/in.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/netinet/in.c	Wed May  2 19:36:29 2018	(r333175)
@@ -632,12 +632,10 @@ in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifne
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
-		IN_MULTI_LOCK();
 		if (ii->ii_allhosts) {
-			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
+			(void)in_leavegroup(ii->ii_allhosts, NULL);
 			ii->ii_allhosts = NULL;
 		}
-		IN_MULTI_UNLOCK();
 	}
 
 	IF_ADDR_WLOCK(ifp);
@@ -994,11 +992,12 @@ in_broadcast(struct in_addr in, struct ifnet *ifp)
 void
 in_ifdetach(struct ifnet *ifp)
 {
-
+	IN_MULTI_LOCK();
 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
 	in_pcbpurgeif0(&V_udbinfo, ifp);
 	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in_purgemaddrs(ifp);
+	IN_MULTI_UNLOCK();
 }
 
 /*
@@ -1011,12 +1010,12 @@ in_ifdetach(struct ifnet *ifp)
 static void
 in_purgemaddrs(struct ifnet *ifp)
 {
-	LIST_HEAD(,in_multi) purgeinms;
-	struct in_multi		*inm, *tinm;
+	struct in_multi_head purgeinms;
+	struct in_multi		*inm;
 	struct ifmultiaddr	*ifma;
 
-	LIST_INIT(&purgeinms);
-	IN_MULTI_LOCK();
+	SLIST_INIT(&purgeinms);
+	IN_MULTI_LIST_LOCK();
 
 	/*
 	 * Extract list of in_multi associated with the detaching ifp
@@ -1034,17 +1033,13 @@ in_purgemaddrs(struct ifnet *ifp)
 		    ("%s: ifma_protospec is NULL", __func__));
 #endif
 		inm = (struct in_multi *)ifma->ifma_protospec;
-		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
+		inm_rele_locked(&purgeinms, inm);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
-	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
-		LIST_REMOVE(inm, inm_link);
-		inm_release_locked(inm);
-	}
+	inm_release_list_deferred(&purgeinms);
 	igmp_ifdetach(ifp);
-
-	IN_MULTI_UNLOCK();
+	IN_MULTI_LIST_UNLOCK();
 }
 
 struct in_llentry {

Modified: head/sys/netinet/in_mcast.c
==============================================================================
--- head/sys/netinet/in_mcast.c	Wed May  2 17:41:00 2018	(r333174)
+++ head/sys/netinet/in_mcast.c	Wed May  2 19:36:29 2018	(r333175)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
 #include <sys/taskqueue.h>
+#include <sys/gtaskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
@@ -59,6 +60,8 @@ __FBSDID("$FreeBSD$");
 #include <net/route.h>
 #include <net/vnet.h>
 
+#include <net/ethernet.h>
+
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_fib.h>
@@ -91,18 +94,24 @@ static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
 
 /*
  * Locking:
- * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
  *   it can be taken by code in net/if.c also.
  * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
  *
- * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
+ * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly
  * any need for in_multi itself to be virtualized -- it is bound to an ifp
  * anyway no matter what happens.
  */
-struct mtx in_multi_mtx;
-MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
+struct mtx in_multi_list_mtx;
+MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF);
 
+struct mtx in_multi_free_mtx;
+MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF);
+
+struct sx in_multi_sx;
+SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx");
+
 /*
  * Functions with non-static linkage defined in this file should be
  * declared in in_var.h:
@@ -151,6 +160,7 @@ static int	inm_is_ifp_detached(const struct in_multi *
 static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
 static void	inm_purge(struct in_multi *);
 static void	inm_reap(struct in_multi *);
+static void inm_release(struct in_multi *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
 static void	inp_freemoptions_internal(struct ip_moptions *);
@@ -216,6 +226,65 @@ inm_is_ifp_detached(const struct in_multi *inm)
 }
 #endif
 
+static struct grouptask free_gtask;
+static struct in_multi_head inm_free_list;
+static void inm_release_task(void *arg __unused);
+static void inm_init(void)
+{
+	SLIST_INIT(&inm_free_list);
+	taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task");
+}
+
+SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
+	inm_init, NULL);
+
+
+void
+inm_release_list_deferred(struct in_multi_head *inmh)
+{
+
+	if (SLIST_EMPTY(inmh))
+		return;
+	mtx_lock(&in_multi_free_mtx);
+	SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele);
+	mtx_unlock(&in_multi_free_mtx);
+	GROUPTASK_ENQUEUE(&free_gtask);
+}
+
+void
+inm_release_deferred(struct in_multi *inm)
+{
+	struct in_multi_head tmp;
+
+	IN_MULTI_LIST_LOCK_ASSERT();
+	MPASS(inm->inm_refcount > 0);
+	if (--inm->inm_refcount == 0) {
+		SLIST_INIT(&tmp);
+		inm->inm_ifma->ifma_protospec = NULL;
+		SLIST_INSERT_HEAD(&tmp, inm, inm_nrele);
+		inm_release_list_deferred(&tmp);
+	}
+}
+
+static void
+inm_release_task(void *arg __unused)
+{
+	struct in_multi_head inm_free_tmp;
+	struct in_multi *inm, *tinm;
+
+	SLIST_INIT(&inm_free_tmp);
+	mtx_lock(&in_multi_free_mtx);
+	SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele);
+	mtx_unlock(&in_multi_free_mtx);
+	IN_MULTI_LOCK();
+	SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) {
+		SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele);
+		MPASS(inm);
+		inm_release(inm);
+	}
+	IN_MULTI_UNLOCK();
+}
+
 /*
  * Initialize an in_mfilter structure to a known state at t0, t1
  * with an empty source filter list.
@@ -232,7 +301,7 @@ imf_init(struct in_mfilter *imf, const int st0, const 
 /*
  * Function for looking up an in_multi record for an IPv4 multicast address
  * on a given interface. ifp must be valid. If no record found, return NULL.
- * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
+ * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held.
  */
 struct in_multi *
 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
@@ -240,7 +309,7 @@ inm_lookup_locked(struct ifnet *ifp, const struct in_a
 	struct ifmultiaddr *ifma;
 	struct in_multi *inm;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	inm = NULL;
@@ -264,7 +333,7 @@ inm_lookup(struct ifnet *ifp, const struct in_addr ina
 {
 	struct in_multi *inm;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 	IF_ADDR_RLOCK(ifp);
 	inm = inm_lookup_locked(ifp, ina);
 	IF_ADDR_RUNLOCK(ifp);
@@ -451,7 +520,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 	IN_MULTI_LOCK_ASSERT();
 
 	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
-
+	IN_MULTI_LIST_LOCK();
 	inm = inm_lookup(ifp, *group);
 	if (inm != NULL) {
 		/*
@@ -460,11 +529,13 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 		 */
 		KASSERT(inm->inm_refcount >= 1,
 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
-		++inm->inm_refcount;
+		inm_acquire_locked(inm);
 		*pinm = inm;
-		return (0);
 	}
-
+	IN_MULTI_LIST_UNLOCK();
+	if (inm != NULL)
+		return (0);
+	
 	memset(&gsin, 0, sizeof(gsin));
 	gsin.sin_family = AF_INET;
 	gsin.sin_len = sizeof(struct sockaddr_in);
@@ -479,6 +550,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 		return (error);
 
 	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
+	IN_MULTI_LIST_LOCK();
 	IF_ADDR_WLOCK(ifp);
 
 	/*
@@ -504,10 +576,9 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 			    __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
 		}
 #endif
-		++inm->inm_refcount;
+		inm_acquire_locked(inm);
 		*pinm = inm;
-		IF_ADDR_WUNLOCK(ifp);
-		return (0);
+		goto out_locked;
 	}
 
 	IF_ADDR_WLOCK_ASSERT(ifp);
@@ -522,6 +593,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
 	if (inm == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
+		IN_MULTI_LIST_UNLOCK();
 		if_delmulti_ifma(ifma);
 		return (ENOMEM);
 	}
@@ -539,8 +611,9 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
 	ifma->ifma_protospec = inm;
 
 	*pinm = inm;
-
+ out_locked:
 	IF_ADDR_WUNLOCK(ifp);
+	IN_MULTI_LIST_UNLOCK();
 	return (0);
 }
 
@@ -550,36 +623,29 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *g
  * If the refcount drops to 0, free the in_multi record and
  * delete the underlying link-layer membership.
  */
-void
-inm_release_locked(struct in_multi *inm)
+static void
+inm_release(struct in_multi *inm)
 {
 	struct ifmultiaddr *ifma;
+	struct ifnet *ifp;
 
-	IN_MULTI_LOCK_ASSERT();
-
 	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
-
-	if (--inm->inm_refcount > 0) {
-		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
-		    inm->inm_refcount);
-		return;
-	}
-
+	MPASS(inm->inm_refcount == 0);
 	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
 
 	ifma = inm->inm_ifma;
+	ifp = inm->inm_ifp;
 
 	/* XXX this access is not covered by IF_ADDR_LOCK */
 	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
-	KASSERT(ifma->ifma_protospec == inm,
-	    ("%s: ifma_protospec != inm", __func__));
-	ifma->ifma_protospec = NULL;
-
+	if (ifp)
+		CURVNET_SET(ifp->if_vnet);
 	inm_purge(inm);
-
 	free(inm, M_IPMADDR);
 
 	if_delmulti_ifma(ifma);
+	if (ifp)
+		CURVNET_RESTORE();
 }
 
 /*
@@ -592,7 +658,7 @@ inm_clear_recorded(struct in_multi *inm)
 {
 	struct ip_msource	*ims;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 		if (ims->ims_stp) {
@@ -632,7 +698,7 @@ inm_record_source(struct in_multi *inm, const in_addr_
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 
-	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	find.ims_haddr = ntohl(naddr);
 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
@@ -959,6 +1025,7 @@ inm_merge(struct in_multi *inm, /*const*/ struct in_mf
 	schanged = 0;
 	error = 0;
 	nsrc1 = nsrc0 = 0;
+	IN_MULTI_LIST_LOCK_ASSERT();
 
 	/*
 	 * Update the source filters first, as this may fail.
@@ -1165,6 +1232,7 @@ in_joingroup_locked(struct ifnet *ifp, const struct in
 	int			 error;
 
 	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
 	    ntohl(gina->s_addr), ifp, ifp->if_xname);
@@ -1186,7 +1254,7 @@ in_joingroup_locked(struct ifnet *ifp, const struct in
 		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
 		return (error);
 	}
-
+	IN_MULTI_LIST_LOCK();
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	error = inm_merge(inm, imf);
 	if (error) {
@@ -1201,10 +1269,12 @@ in_joingroup_locked(struct ifnet *ifp, const struct in
 		goto out_inm_release;
 	}
 
-out_inm_release:
+ out_inm_release:
+	IN_MULTI_LIST_UNLOCK();
 	if (error) {
+
 		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
-		inm_release_locked(inm);
+		inm_release_deferred(inm);
 	} else {
 		*pinm = inm;
 	}
@@ -1249,6 +1319,7 @@ in_leavegroup_locked(struct in_multi *inm, /*const*/ s
 	error = 0;
 
 	IN_MULTI_LOCK_ASSERT();
+	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
 	    inm, ntohl(inm->inm_addr.s_addr),
@@ -1272,18 +1343,20 @@ in_leavegroup_locked(struct in_multi *inm, /*const*/ s
 	 * the transaction, it MUST NOT fail.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	CURVNET_SET(inm->inm_ifp->if_vnet);
 	error = igmp_change_state(inm);
+	inm_release_deferred(inm);
+	IN_MULTI_LIST_UNLOCK();
 	CURVNET_RESTORE();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
-	inm_release_locked(inm);
 
 	return (error);
 }
@@ -1315,18 +1388,6 @@ in_addmulti(struct in_addr *ap, struct ifnet *ifp)
 }
 
 /*
- * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
- * This KPI is for legacy kernel consumers only.
- */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list