svn commit: r191040 - in user/kmacy/releng_7_net_backport/sys: netinet sys

Kip Macy kmacy at FreeBSD.org
Mon Apr 13 21:43:26 PDT 2009


Author: kmacy
Date: Tue Apr 14 04:43:25 2009
New Revision: 191040
URL: http://svn.freebsd.org/changeset/base/191040

Log:
  update netinet with igmp changes

Modified:
  user/kmacy/releng_7_net_backport/sys/netinet/igmp.c
  user/kmacy/releng_7_net_backport/sys/netinet/igmp.h
  user/kmacy/releng_7_net_backport/sys/netinet/igmp_var.h
  user/kmacy/releng_7_net_backport/sys/netinet/in.c
  user/kmacy/releng_7_net_backport/sys/netinet/in_mcast.c
  user/kmacy/releng_7_net_backport/sys/netinet/in_proto.c
  user/kmacy/releng_7_net_backport/sys/netinet/in_var.h
  user/kmacy/releng_7_net_backport/sys/netinet/ip_input.c
  user/kmacy/releng_7_net_backport/sys/netinet/ip_var.h
  user/kmacy/releng_7_net_backport/sys/netinet/udp_usrreq.c
  user/kmacy/releng_7_net_backport/sys/netinet/udp_var.h
  user/kmacy/releng_7_net_backport/sys/netinet/vinet.h
  user/kmacy/releng_7_net_backport/sys/sys/mbuf.h
  user/kmacy/releng_7_net_backport/sys/sys/tree.h

Modified: user/kmacy/releng_7_net_backport/sys/netinet/igmp.c
==============================================================================
--- user/kmacy/releng_7_net_backport/sys/netinet/igmp.c	Tue Apr 14 04:15:56 2009	(r191039)
+++ user/kmacy/releng_7_net_backport/sys/netinet/igmp.c	Tue Apr 14 04:43:25 2009	(r191040)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -35,11 +36,13 @@
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
+ * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
@@ -52,6 +55,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -59,8 +63,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/vimage.h>
+#include <sys/ktr.h>
+#include <sys/condvar.h>
 
 #include <net/if.h>
+#include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -78,464 +85,3626 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
-static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+#ifndef KTR_IGMPV3
+#define KTR_IGMPV3 KTR_SUBSYS
+#endif
+
+static struct igmp_ifinfo *
+		igi_alloc_locked(struct ifnet *);
+static void	igi_delete_locked(const struct ifnet *);
+static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
+static void	igmp_fasttimo_vnet(void);
+static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_handle_state_change(struct in_multi *,
+		    struct igmp_ifinfo *);
+static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_input_v1_query(struct ifnet *, const struct ip *);
+static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
+		    const struct igmp *);
+static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
+		    /*const*/ struct igmpv3 *);
+static int	igmp_input_v3_group_query(struct in_multi *,
+		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
+static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+static void	igmp_intr(struct mbuf *);
+static int	igmp_isgroupreported(const struct in_addr);
+static struct mbuf *
+		igmp_ra_alloc(void);
+#ifdef KTR
+static char *	igmp_rec_type_to_str(const int);
+#endif
+static void	igmp_set_version(struct igmp_ifinfo *, const int);
+static void	igmp_slowtimo_vnet(void);
+static void	igmp_sysinit(void);
+static int	igmp_v1v2_queue_report(struct in_multi *, const int);
+static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
+static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
+static void	igmp_v2_update_group(struct in_multi *, const int);
+static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
+static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
+static struct mbuf *
+		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
+static int	igmp_v3_enqueue_group_record(struct ifqueue *,
+		    struct in_multi *, const int, const int, const int);
+static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
+		    struct in_multi *);
+static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
+		    struct ifqueue *, struct ifqueue *, struct in_multi *,
+		    const int);
+static int	igmp_v3_merge_state_changes(struct in_multi *,
+		    struct ifqueue *);
+static void	igmp_v3_suppress_group_record(struct in_multi *);
+static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
+static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
+static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
+
+#ifdef VIMAGE
+static vnet_attach_fn	vnet_igmp_iattach;
+static vnet_detach_fn	vnet_igmp_idetach;
+#else
+static int	vnet_igmp_iattach(const void *);
+static int	vnet_igmp_idetach(const void *);
+#endif /* VIMAGE */
+
+/*
+ * System-wide globals.
+ *
+ * Unlocked access to these is OK, except for the global IGMP output
+ * queue. The IGMP subsystem lock ends up being system-wide for the moment,
+ * because all VIMAGEs have to share a global output queue, as netisrs
+ * themselves are not virtualized.
+ *
+ * Locking:
+ *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ *    Any may be taken independently; if any are held at the same
+ *    time, the above lock order must be followed.
+ *  * All output is delegated to the netisr to handle IFF_NEEDSGIANT.
+ *    Most of the time, direct dispatch will be fine.
+ *  * IN_MULTI_LOCK covers in_multi.
+ *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
+ *    including the output queue.
+ *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
+ *    per-link state iterators.
+ *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
+ *    therefore it is not refcounted.
+ *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
+ *
+ * Reference counting
+ *  * IGMP acquires its own reference every time an in_multi is passed to
+ *    it and the group is being joined for the first time.
+ *  * IGMP releases its reference(s) on in_multi in a deferred way,
+ *    because the operations which process the release run as part of
+ *    a loop whose control variables are directly affected by the release
+ *    (that, and not recursing on the IF_ADDR_LOCK).
+ *
+ * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
+ * to a vnet in ifp->if_vnet.
+ *
+ * SMPng: XXX We may potentially race operations on ifma_protospec.
+ * The problem is that we currently lack a clean way of taking the
+ * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
+ * as anything which modifies ifma needs to be covered by that lock.
+ * So check for ifma_protospec being NULL before proceeding.
+ */
+struct mtx		 igmp_mtx;
+int			 mpsafe_igmp = 0;
+SYSCTL_INT(_debug, OID_AUTO, mpsafe_igmp, CTLFLAG_RDTUN, &mpsafe_igmp, 0,
+    "Enable SMP-safe IGMPv3");
+
+struct mbuf		*m_raopt;		 /* Router Alert option */
+MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+
+/*
+ * Global netisr output queue.
+ * This is only used as a last resort if we cannot directly dispatch.
+ * As IN_MULTI_LOCK is no longer in the bottom half of IP, we can do
+ * this, providing mpsafe_igmp is set. If it is not, we take Giant,
+ * and queueing is forced.
+ */
+struct ifqueue		 igmpoq;
+
+/*
+ * VIMAGE-wide globals.
+ *
+ * The IGMPv3 timers themselves need to run per-image, however,
+ * protosw timers run globally (see tcp).
+ * An ifnet can only be in one vimage at a time, and the loopback
+ * ifnet, loif, is itself virtualized.
+ * It would otherwise be possible to seriously hose IGMP state,
+ * and create inconsistencies in upstream multicast routing, if you have
+ * multiple VIMAGEs running on the same link joining different multicast
+ * groups, UNLESS the "primary IP address" is different. This is because
+ * IGMP for IPv4 does not force link-local addresses to be used for each
+ * node, unlike MLD for IPv6.
+ * Obviously the IGMPv3 per-interface state has per-vimage granularity
+ * also as a result.
+ *
+ * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
+ * policy to control the address used by IGMP on the link.
+ */
+#ifdef VIMAGE_GLOBALS
+int	 interface_timers_running;	 /* IGMPv3 general query response */
+int	 state_change_timers_running;	 /* IGMPv3 state-change retransmit */
+int	 current_state_timers_running;	 /* IGMPv1/v2 host report;
+					  * IGMPv3 g/sg query response */
+
+LIST_HEAD(, igmp_ifinfo)	 igi_head;
+struct igmpstat			 igmpstat;
+struct timeval			 igmp_gsrdelay;
+
+int	 igmp_recvifkludge;
+int	 igmp_sendra;
+int	 igmp_sendlocal;
+int	 igmp_v1enable;
+int	 igmp_v2enable;
+int	 igmp_legacysupp;
+int	 igmp_default_version;
+#endif /* VIMAGE_GLOBALS */
+
+/*
+ * Virtualized sysctls.
+ */
+SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats,
+    CTLFLAG_RW, igmpstat, igmpstat, "");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge,
+    CTLFLAG_RW, igmp_recvifkludge, 0,
+    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra,
+    CTLFLAG_RW, igmp_sendra, 0,
+    "Send IP Router Alert option in IGMPv2/v3 messages");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal,
+    CTLFLAG_RW, igmp_sendlocal, 0,
+    "Send IGMP membership reports for 224.0.0.0/24 groups");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable,
+    CTLFLAG_RW, igmp_v1enable, 0,
+    "Enable backwards compatibility with IGMPv1");
+SYSCTL_V_INT(V_NET, vnet_inet,  _net_inet_igmp, OID_AUTO, v2enable,
+    CTLFLAG_RW, igmp_v2enable, 0,
+    "Enable backwards compatibility with IGMPv2");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp,
+    CTLFLAG_RW, igmp_legacysupp, 0,
+    "Allow v1/v2 reports to suppress v3 group responses");
+SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version,
+    CTLTYPE_INT | CTLFLAG_RW , igmp_default_version, 0,
+    sysctl_igmp_default_version, "I",
+    "Default version of IGMP to run on each interface");
+SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay,
+    CTLTYPE_INT | CTLFLAG_RW , igmp_gsrdelay.tv_sec, 0,
+    sysctl_igmp_gsr, "I",
+    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
+
+/*
+ * Non-virtualized sysctls.
+ */
+SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD,
+    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
+
+static __inline void
+igmp_save_context(struct mbuf *m, struct ifnet *ifp)
+{
+
+#ifdef VIMAGE
+	m->m_pkthdr.header = ifp->if_vnet;
+#endif /* VIMAGE */
+	m->m_pkthdr.flowid = ifp->if_index;
+}
+
+static __inline void
+igmp_scrub_context(struct mbuf *m)
+{
+
+	m->m_pkthdr.header = NULL;
+	m->m_pkthdr.flowid = 0;
+}
+
+#ifdef KTR
+static __inline char *
+inet_ntoa_haddr(in_addr_t haddr)
+{
+	struct in_addr ia;
+
+	ia.s_addr = htonl(haddr);
+	return (inet_ntoa(ia));
+}
+#endif
+
+/*
+ * Restore context from a queued IGMP output chain.
+ * Return saved ifindex.
+ *
+ * VIMAGE: The assertion is there to make sure that we
+ * actually called CURVNET_SET() with what's in the mbuf chain.
+ */
+static __inline uint32_t
+igmp_restore_context(struct mbuf *m)
+{
+
+#ifdef notyet
+#if defined(VIMAGE) && defined(INVARIANTS)
+	KASSERT(curvnet == (m->m_pkthdr.header),
+	    ("%s: called when curvnet was not restored", __func__));
+#endif
+#endif
+	return (m->m_pkthdr.flowid);
+}
+
+/*
+ * Retrieve or set default IGMP version.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
+{
+	int	 error;
+	int	 new;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error)
+		return (error);
+
+	IGMP_LOCK();
+
+	new = V_igmp_default_version;
+
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
+
+	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
+	     V_igmp_default_version, new);
+
+	V_igmp_default_version = new;
+
+out_locked:
+	IGMP_UNLOCK();
+	return (error);
+}
+
+/*
+ * Retrieve or set threshold between group-source queries in seconds.
+ *
+ * VIMAGE: Assume curvnet set by caller.
+ * SMPng: NOTE: Serialized by IGMP lock.
+ */
+static int
+sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	int i;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error)
+		return (error);
+
+	IGMP_LOCK();
+
+	i = V_igmp_gsrdelay.tv_sec;
+
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
+
+	if (i < -1 || i >= 60) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
+	     V_igmp_gsrdelay.tv_sec, i);
+	V_igmp_gsrdelay.tv_sec = i;
+
+out_locked:
+	IGMP_UNLOCK();
+	return (error);
+}
+
+/*
+ * Expose struct igmp_ifinfo to userland, keyed by ifindex.
+ * For use by ifmcstat(8).
+ *
+ * SMPng: NOTE: Does an unlocked ifindex space read.
+ * VIMAGE: Assume curvnet set by caller. The node handler itself
+ * is not directly virtualized.
+ */
+static int
+sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
+{
+	INIT_VNET_NET(curvnet);
+	int			*name;
+	int			 error;
+	u_int			 namelen;
+	struct ifnet		*ifp;
+	struct igmp_ifinfo	*igi;
+
+	name = (int *)arg1;
+	namelen = arg2;
+
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
+	if (error)
+		return (error);
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	if (name[0] <= 0 || name[0] > V_if_index) {
+		error = ENOENT;
+		goto out_locked;
+	}
+
+	error = ENOENT;
+
+	ifp = ifnet_byindex(name[0]);
+	if (ifp == NULL)
+		goto out_locked;
+
+	LIST_FOREACH(igi, &V_igi_head, igi_link) {
+		if (ifp == igi->igi_ifp) {
+			error = SYSCTL_OUT(req, igi,
+			    sizeof(struct igmp_ifinfo));
+			break;
+		}
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+	return (error);
+}
+
+/*
+ * Dispatch an entire queue of pending packet chains
+ * using the netisr.
+ * VIMAGE: Assumes the vnet pointer has been set.
+ */
+static void
+igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
+{
+	struct mbuf *m;
+
+	for (;;) {
+		_IF_DEQUEUE(ifq, m);
+		if (m == NULL)
+			break;
+		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
+		if (loop)
+			m->m_flags |= M_IGMP_LOOP;
+		netisr_dispatch(NETISR_IGMP, m);
+		if (--limit == 0)
+			break;
+	}
+}
+
+/*
+ * Filter outgoing IGMP report state by group.
+ *
+ * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
+ * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
+ * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
+ * this may break certain IGMP snooping switches which rely on the old
+ * report behaviour.
+ *
+ * Return zero if the given group is one for which IGMP reports
+ * should be suppressed, or non-zero if reports should be issued.
+ */
+static __inline int
+igmp_isgroupreported(const struct in_addr addr)
+{
+
+	if (in_allhosts(addr) ||
+	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
+		return (0);
+
+	return (1);
+}
+
+/*
+ * Construct a Router Alert option to use in outgoing packets.
+ */
+static struct mbuf *
+igmp_ra_alloc(void)
+{
+	struct mbuf	*m;
+	struct ipoption	*p;
+
+	MGET(m, M_DONTWAIT, MT_DATA);
+	p = mtod(m, struct ipoption *);
+	p->ipopt_dst.s_addr = INADDR_ANY;
+	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
+	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
+	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
+	p->ipopt_list[3] = 0x00;	/* pad byte */
+	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
+
+	return (m);
+}
+
+/*
+ * Attach IGMP when PF_INET is attached to an interface.
+ *
+ * VIMAGE: Currently we set the vnet pointer, although it is
+ * likely that it was already set by our caller.
+ */
+struct igmp_ifinfo *
+igmp_domifattach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	CURVNET_SET(ifp->if_vnet);
+	IGMP_LOCK();
+
+	igi = igi_alloc_locked(ifp);
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		igi->igi_flags |= IGIF_SILENT;
+
+	IGMP_UNLOCK();
+	CURVNET_RESTORE();
+
+	return (igi);
+}
+
+/*
+ * VIMAGE: assume curvnet set by caller.
+ */
+static struct igmp_ifinfo *
+igi_alloc_locked(/*const*/ struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	IGMP_LOCK_ASSERT();
+
+	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
+	if (igi == NULL)
+		goto out;
+
+	igi->igi_ifp = ifp;
+	igi->igi_version = V_igmp_default_version;
+	igi->igi_flags = 0;
+	igi->igi_rv = IGMP_RV_INIT;
+	igi->igi_qi = IGMP_QI_INIT;
+	igi->igi_qri = IGMP_QRI_INIT;
+	igi->igi_uri = IGMP_URI_INIT;
+
+	SLIST_INIT(&igi->igi_relinmhead);
+
+	/*
+	 * Responses to general queries are subject to bounds.
+	 */
+	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
+
+	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
+
+	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
+	     ifp, ifp->if_xname);
+
+out:
+	return (igi);
+}
+
+/*
+ * Hook for ifdetach.
+ *
+ * NOTE: Some finalization tasks need to run before the protocol domain
+ * is detached, but also before the link layer does its cleanup.
+ *
+ * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
+ * XXX This is also bitten by unlocked ifma_protospec access.
+ *
+ * VIMAGE: curvnet should have been set by caller, but let's not assume
+ * that for now.
+ */
+void
+igmp_ifdetach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo	*igi;
+	struct ifmultiaddr	*ifma;
+	struct in_multi		*inm, *tinm;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
+	    ifp->if_xname);
+
+	CURVNET_SET(ifp->if_vnet);
+
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	if (igi->igi_version == IGMP_VERSION_3) {
+		IF_ADDR_LOCK(ifp);
+		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+			if (ifma->ifma_addr->sa_family != AF_INET ||
+			    ifma->ifma_protospec == NULL)
+				continue;
+#if 0
+			KASSERT(ifma->ifma_protospec != NULL,
+			    ("%s: ifma_protospec is NULL", __func__));
+#endif
+			inm = (struct in_multi *)ifma->ifma_protospec;
+			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
+				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
+				    inm, inm_nrele);
+			}
+			inm_clear_recorded(inm);
+		}
+		IF_ADDR_UNLOCK(ifp);
+		/*
+		 * Free the in_multi reference(s) for this IGMP lifecycle.
+		 */
+		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
+		    tinm) {
+			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
+			inm_release_locked(inm);
+		}
+	}
+
+	IGMP_UNLOCK();
+
+#ifdef VIMAGE
+	/*
+	 * Plug the potential race which may occur when a VIMAGE
+	 * is detached and we are forced to queue pending IGMP output for
+	 * output netisr processing due to !mpsafe_igmp. In this case it
+	 * is possible that igmp_intr() is about to see mbuf chains with
+	 * invalid cached curvnet pointers.
+	 * This is a rare condition, so just blow them all away.
+	 * FUTURE: This may in fact not be needed, because IFF_NEEDSGIANT
+	 * is being removed in 8.x and the netisr may then be eliminated;
+	 * it is needed only if VIMAGE and IFF_NEEDSGIANT need to co-exist
+	 */
+	if (!mpsafe_igmp) {
+		int drops;
+
+		IF_LOCK(&igmpoq);
+		drops = igmpoq.ifq_len;
+		_IF_DRAIN(&igmpoq);
+		IF_UNLOCK(&igmpoq);
+		if (bootverbose && drops) {
+			printf("%s: dropped %d pending IGMP output packets\n",
+			    __func__, drops);
+		}
+	}
+#endif /* VIMAGE */
+
+	CURVNET_RESTORE();
+}
+
+/*
+ * Hook for domifdetach.
+ *
+ * VIMAGE: curvnet should have been set by caller, but let's not assume
+ * that for now.
+ */
+void
+igmp_domifdetach(struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi;
+
+	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	CURVNET_SET(ifp->if_vnet);
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	igi_delete_locked(ifp);
+
+	IGMP_UNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+igi_delete_locked(const struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi, *tigi;
+
+	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
+	    __func__, ifp, ifp->if_xname);
+
+	IGMP_LOCK_ASSERT();
+
+	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
+		if (igi->igi_ifp == ifp) {
+			/*
+			 * Free deferred General Query responses.
+			 */
+			_IF_DRAIN(&igi->igi_gq);
+
+			LIST_REMOVE(igi, igi_link);
+
+			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
+			    ("%s: there are dangling in_multi references",
+			    __func__));
+
+			free(igi, M_IGMP);
+			return;
+		}
+	}
+
+#ifdef INVARIANTS
+	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
+#endif
+}
+
+/*
+ * Process a received IGMPv1 query.
+ * Return non-zero if the message should be dropped.
+ *
+ * VIMAGE: The curvnet pointer is derived from the input ifp.
+ */
+static int
+igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip)
+{
+	INIT_VNET_INET(ifp->if_vnet);
+	struct ifmultiaddr	*ifma;
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+
+	/*
+	 * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1.
+	 * igmp_group is always ignored. Do not drop it as a userland
+	 * daemon may wish to see it.
+	 */
+	if (!in_allhosts(ip->ip_dst)) {
+		IGMPSTAT_INC(igps_rcv_badqueries);
+		return (0);
+	}
+
+	IGMPSTAT_INC(igps_rcv_gen_queries);
+
+	/*
+	 * Switch to IGMPv1 host compatibility mode.
+	 */
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	igmp_set_version(igi, IGMP_VERSION_1);
+
+	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
+
+	/*
+	 * Start the timers in all of our group records
+	 * for the interface on which the query arrived,
+	 * except those which are already running.
+	 */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+		inm = (struct in_multi *)ifma->ifma_protospec;
+		if (inm->inm_timer != 0)
+			continue;
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			inm->inm_timer = IGMP_RANDOM_DELAY(
+			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
+			V_current_state_timers_running = 1;
+			break;
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+	}
+	IF_ADDR_UNLOCK(ifp);
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Process a received IGMPv2 general or group-specific query.
+ */
+static int
+igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+	struct ifmultiaddr	*ifma;
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	uint16_t		 timer;
+
+	/*
+	 * Perform lazy allocation of IGMP link info if required,
+	 * and switch to IGMPv2 host compatibility mode.
+	 */
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	igmp_set_version(igi, IGMP_VERSION_2);
+
+	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	if (!in_nullhost(igmp->igmp_group)) {
+		/*
+		 * IGMPv2 Group-Specific Query.
+		 * If this is a group-specific IGMPv2 query, we need only
+		 * look up the single group to process it.
+		 */
+		inm = inm_lookup(ifp, igmp->igmp_group);
+		if (inm != NULL) {
+			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
+			igmp_v2_update_group(inm, timer);
+		}
+		IGMPSTAT_INC(igps_rcv_group_queries);
+	} else {
+		/*
+		 * IGMPv2 General Query.
+		 * If this was not sent to the all-hosts group, ignore it.
+		 */
+		if (in_allhosts(ip->ip_dst)) {
+			/*
+			 * For each reporting group joined on this
+			 * interface, kick the report timer.
+			 */
+			CTR2(KTR_IGMPV3,
+			    "process v2 general query on ifp %p(%s)",
+			    ifp, ifp->if_xname);
+
+			IF_ADDR_LOCK(ifp);
+			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+				if (ifma->ifma_addr->sa_family != AF_INET ||
+				    ifma->ifma_protospec == NULL)
+					continue;
+				inm = (struct in_multi *)ifma->ifma_protospec;
+				igmp_v2_update_group(inm, timer);
+			}
+			IF_ADDR_UNLOCK(ifp);
+		}
+		IGMPSTAT_INC(igps_rcv_gen_queries);
+	}
+
+out_locked:
+	IGMP_UNLOCK();
+	IN_MULTI_UNLOCK();
+
+	return (0);
+}
+
+/*
+ * Update the report timer on a group in response to an IGMPv2 query.
+ *
+ * If we are becoming the reporting member for this group, start the timer.
+ * If we already are the reporting member for this group, and timer is
+ * below the threshold, reset it.
+ *
+ * We may be updating the group for the first time since we switched
+ * to IGMPv3. If we are, then we must clear any recorded source lists,
+ * and transition to REPORTING state; the group timer is overloaded
+ * for group and group-source query responses. 
+ *
+ * Unlike IGMPv3, the delay per group should be jittered
+ * to avoid bursts of IGMPv2 reports.
+ */
+static void
+igmp_v2_update_group(struct in_multi *inm, const int timer)
+{
+
+	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
+	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
+
+	IN_MULTI_LOCK_ASSERT();
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+		break;
+	case IGMP_REPORTING_MEMBER:
+		if (inm->inm_timer != 0 &&
+		    inm->inm_timer <= timer) {
+			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
+			    "skipping.", __func__);
+			break;
+		}
+		/* FALLTHROUGH */
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
+		inm->inm_state = IGMP_REPORTING_MEMBER;
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		V_current_state_timers_running = 1;
+		break;
+	case IGMP_SLEEPING_MEMBER:
+		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
+		inm->inm_state = IGMP_AWAKENING_MEMBER;
+		break;
+	case IGMP_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Process a received IGMPv3 general, group-specific or
+ * group-and-source-specific query.
+ * Assumes m has already been pulled up to the full IGMP message length.
+ * Return 0 if successful, otherwise an appropriate error code is returned.
+ */
+static int
+igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
+    /*const*/ struct igmpv3 *igmpv3)
+{
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	uint32_t		 maxresp, nsrc, qqi;
+	uint16_t		 timer;
+	uint8_t			 qrv;
+
+	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
+
+	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
+	if (maxresp >= 128) {
+		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
+			  (IGMP_EXP(igmpv3->igmp_code) + 3);
+	}
+
+	/*
+	 * Robustness must never be less than 2 for on-wire IGMPv3.
+	 * FIXME: Check if ifp has IGIF_LOOPBACK set, as we make
+	 * an exception for interfaces whose IGMPv3 state changes
+	 * are redirected to loopback (e.g. MANET).
+	 */
+	qrv = IGMP_QRV(igmpv3->igmp_misc);
+	if (qrv < 2) {
+		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
+		    qrv, IGMP_RV_INIT);
+		qrv = IGMP_RV_INIT;
+	}
+
+	qqi = igmpv3->igmp_qqi;
+	if (qqi >= 128) {
+		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
+		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
+	}
+
+	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	nsrc = ntohs(igmpv3->igmp_numsrc);
+
+	IN_MULTI_LOCK();
+	IGMP_LOCK();
+
+	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
+	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		goto out_locked;
+	}
+
+	igmp_set_version(igi, IGMP_VERSION_3);
+
+	igi->igi_rv = qrv;
+	igi->igi_qi = qqi;
+	igi->igi_qri = maxresp;
+
+	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
+	    maxresp);
+
+	if (in_nullhost(igmpv3->igmp_group)) {
+		/*
+		 * IGMPv3 General Query.

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list