svn commit: r191672 - in head: . sys/conf sys/kern sys/netinet sys/netinet6 sys/sys usr.sbin/ifmcstat

Bruce M Simpson bms at FreeBSD.org
Wed Apr 29 19:19:14 UTC 2009


Author: bms
Date: Wed Apr 29 19:19:13 2009
New Revision: 191672
URL: http://svn.freebsd.org/changeset/base/191672

Log:
  Bite the bullet, and make the IPv6 SSM and MLDv2 mega-commit:
  import from p4 bms_netdev.  Summary of changes:
  
   * Connect netinet6/in6_mcast.c to build.
     The legacy KAME KPIs are mostly preserved.
   * Eliminate now dead code from ip6_output.c.
     Don't do mbuf bingo, we are not going to do RFC 2292 style
     CMSG tricks for multicast options as they are not required
     by any current IPv6 normative reference.
   * Refactor transports (UDP, raw_ip6) to do own mcast filtering.
     SCTP, TCP unaffected by this change.
   * Add ip6_msource, in6_msource structs to in6_var.h.
   * Hookup mld_ifinfo state to in6_ifextra, allocate from
     domifattach path.
   * Eliminate IN6_LOOKUP_MULTI(), it is no longer referenced.
     Kernel consumers which need this should use in6m_lookup().
   * Refactor IPv6 socket group memberships to use a vector (like IPv4).
   * Update ifmcstat(8) for IPv6 SSM.
   * Add witness lock order for IN6_MULTI_LOCK.
   * Move IN6_MULTI_LOCK out of lower ip6_output()/ip6_input() paths.
   * Introduce IP6STAT_ADD/SUB/INC/DEC as per rwatson's IPv4 cleanup.
   * Update carp(4) for new IPv6 SSM KPIs.
   * Virtualize ip6_mrouter socket.
     Changes mostly localized to IPv6 MROUTING.
   * Don't do a local group lookup in MROUTING.
   * Kill unused KAME prototypes in6_purgemkludge(), in6_restoremkludge().
   * Preserve KAME DAD timer jitter behaviour in MLDv1 compatibility mode.
   * Bump __FreeBSD_version to 800084.
   * Update UPDATING.
  
  NOTE WELL:
   * This code hasn't been tested against real MLDv2 queriers
     (yet), although the on-wire protocol has been verified in Wireshark.
   * There are a few unresolved issues in the socket layer APIs to
     do with scope ID propagation.
   * There is a LOR present in ip6_output()'s use of
     in6_setscope() which needs to be resolved. See comments in mld6.c.
     This is believed to be benign and can't be avoided for the moment
     without re-introducing an indirect netisr.
  
  This work was mostly derived from the IGMPv3 implementation, and
  has been sponsored by a third party.

Modified:
  head/UPDATING
  head/sys/conf/files
  head/sys/kern/subr_witness.c
  head/sys/netinet/ip_carp.c
  head/sys/netinet6/icmp6.c
  head/sys/netinet6/in6.c
  head/sys/netinet6/in6_ifattach.c
  head/sys/netinet6/in6_mcast.c
  head/sys/netinet6/in6_pcb.c
  head/sys/netinet6/in6_proto.c
  head/sys/netinet6/in6_var.h
  head/sys/netinet6/ip6_input.c
  head/sys/netinet6/ip6_mroute.c
  head/sys/netinet6/ip6_output.c
  head/sys/netinet6/ip6_var.h
  head/sys/netinet6/mld6.c
  head/sys/netinet6/mld6_var.h
  head/sys/netinet6/raw_ip6.c
  head/sys/netinet6/udp6_usrreq.c
  head/sys/netinet6/vinet6.h
  head/sys/sys/param.h
  head/usr.sbin/ifmcstat/ifmcstat.c

Modified: head/UPDATING
==============================================================================
--- head/UPDATING	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/UPDATING	Wed Apr 29 19:19:13 2009	(r191672)
@@ -22,6 +22,92 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20090429:
+	MLDv2 and Source-Specific Multicast (SSM) have been merged
+	to the IPv6 stack. VIMAGE hooks are in but not yet used.
+	The implementation of SSM within FreeBSD's IPv6 stack closely
+	follows the IPv4 implementation.
+
+	For kernel developers:
+
+	* The most important changes are that the ip6_output() and
+	  ip6_input() paths no longer take the IN6_MULTI_LOCK,
+	  and this lock has been downgraded to a non-recursive mutex.
+
+	* As with the changes to the IPv4 stack to support SSM, filtering
+	  of inbound multicast traffic must now be performed by transport
+	  protocols within the IPv6 stack. This does not apply to TCP and
+	  SCTP, however, it does apply to UDP in IPv6 and raw IPv6.
+
+	* The KPIs used by IPv6 multicast are similar to those used by
+	  the IPv4 stack, with the following differences:
+	   * im6o_mc_filter() is analogous to imo_multicast_filter().
+	   * The legacy KAME entry points in6_joingroup and in6_leavegroup()
+	     are shimmed to in6_mc_join() and in6_mc_leave() respectively.
+	   * IN6_LOOKUP_MULTI() has been deprecated and removed.
+	   * IPv6 relies on MLD for the DAD mechanism. KAME's internal KPIs
+	     for MLDv1 have an additional 'timer' argument which is used to
+	     jitter the initial membership report for the solicited-node
+	     multicast membership on-link.
+	   * This is not strictly needed for MLDv2, which already jitters
+	     its report transmissions.  However, the 'timer' argument is
+	     preserved in case MLDv1 is active on the interface.
+
+	* The KAME linked-list based IPv6 membership implementation has
+	  been refactored to use a vector similar to that used by the IPv4
+	  stack.
+	  Code which maintains a list of its own multicast memberships
+	  internally, e.g. carp, has been updated to reflect the new
+	  semantics.
+
+	* There is a known Lock Order Reversal (LOR) due to in6_setscope()
+	  acquiring the IF_AFDATA_LOCK and being called within ip6_output().
+	  Whilst MLDv2 tries to avoid this otherwise benign LOR, it is an
+	  implementation constraint which needs to be addressed in HEAD.
+
+	For application developers:
+
+	* The changes are broadly similar to those made for the IPv4
+	  stack.
+
+	* The use of IPv4 and IPv6 multicast socket options on the same
+	  socket, using mapped addresses, HAS NOT been tested or supported.
+
+	* There are a number of issues with the implementation of various
+	  IPv6 multicast APIs which need to be resolved in the API surface
+	  before the implementation is fully compatible with KAME userland
+	  use, and these are mostly to do with interface index treatment.
+
+	* The literature available discusses the use of either the delta / ASM
+	  API with setsockopt(2)/getsockopt(2), or the full-state / ASM API
+	  using setsourcefilter(3)/getsourcefilter(3). For more information
+	  please refer to RFC 3768, 'Socket Interface Extensions for
+	  Multicast Source Filters'.
+
+	* Applications which use the published RFC 3678 APIs should be fine.
+
+	For systems administrators:
+
+	* The mtest(8) utility has been refactored to support IPv6, in
+	  addition to IPv4. Interface addresses are no longer accepted
+	  as arguments, their names must be used instead. The utility
+	  will map the interface name to its first IPv4 address as
+	  returned by getifaddrs(3).
+
+	* The ifmcstat(8) utility has also been updated to print the MLDv2
+	  endpoint state and source filter lists via sysctl(3).
+
+	* The net.inet6.ip6.mcast.loop sysctl may be tuned to 0 to disable
+	  loopback of IPv6 multicast datagrams by default; it defaults to 1
+	  to preserve the existing behaviour. Disabling multicast loopback is
+	  recommended for optimal system performance.
+
+	* The IPv6 MROUTING code has been changed to examine this sysctl
+	  instead of attempting to perform a group lookup before looping
+	  back forwarded datagrams.
+
+	Bump __FreeBSD_version to 800084.
+
 20090422:
 	Implement low-level Bluetooth HCI API.
 	Bump __FreeBSD_version to 800083.

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/conf/files	Wed Apr 29 19:19:13 2009	(r191672)
@@ -2381,6 +2381,7 @@ netinet6/in6.c			optional inet6
 netinet6/in6_cksum.c		optional inet6
 netinet6/in6_gif.c		optional gif inet6
 netinet6/in6_ifattach.c		optional inet6
+netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6

Modified: head/sys/kern/subr_witness.c
==============================================================================
--- head/sys/kern/subr_witness.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/kern/subr_witness.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -512,7 +512,8 @@ static struct witness_order_list_entry o
 	{ "ifaddr", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
-	 * Multicast - protocol locks before interface locks, after UDP locks.
+	 * IPv4 multicast:
+	 * protocol locks before interface locks, after UDP locks.
 	 */
 	{ "udpinp", &lock_class_rw },
 	{ "in_multi_mtx", &lock_class_mtx_sleep },
@@ -520,6 +521,15 @@ static struct witness_order_list_entry o
 	{ "if_addr_mtx", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
+	 * IPv6 multicast:
+	 * protocol locks before interface locks, after UDP locks.
+	 */
+	{ "udpinp", &lock_class_rw },
+	{ "in6_multi_mtx", &lock_class_mtx_sleep },
+	{ "mld_mtx", &lock_class_mtx_sleep },
+	{ "if_addr_mtx", &lock_class_mtx_sleep },
+	{ NULL, NULL },
+	/*
 	 * UNIX Domain Sockets
 	 */
 	{ "unp_global_rwlock", &lock_class_rw },

Modified: head/sys/netinet/ip_carp.c
==============================================================================
--- head/sys/netinet/ip_carp.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet/ip_carp.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -400,15 +400,20 @@ carp_clone_create(struct if_clone *ifc, 
 	sc->sc_advskew = 0;
 	sc->sc_init_counter = 1;
 	sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
-#ifdef INET6
-	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
-#endif
 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
 	    M_WAITOK);
 	sc->sc_imo.imo_mfilters = NULL;
 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	sc->sc_imo.imo_multicast_vif = -1;
+#ifdef INET6
+	sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
+	    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+	    M_WAITOK);
+	sc->sc_im6o.im6o_mfilters = NULL;
+	sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
+#endif
 
 	callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
 	callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
@@ -448,6 +453,9 @@ carp_clone_destroy(struct ifnet *ifp)
 	if_detach(ifp);
 	if_free_type(ifp, IFT_ETHER);
 	free(sc->sc_imo.imo_membership, M_CARP);
+#ifdef INET6
+	free(sc->sc_im6o.im6o_membership, M_CARP);
+#endif
 	free(sc, M_CARP);
 }
 
@@ -1449,14 +1457,17 @@ static void
 carp_multicast6_cleanup(struct carp_softc *sc)
 {
 	struct ip6_moptions *im6o = &sc->sc_im6o;
+	u_int16_t n = im6o->im6o_num_memberships;
 
-	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
-		struct in6_multi_mship *imm =
-		    LIST_FIRST(&im6o->im6o_memberships);
-
-		LIST_REMOVE(imm, i6mm_chain);
-		in6_leavegroup(imm);
+	while (n-- > 0) {
+		if (im6o->im6o_membership[n] != NULL) {
+			in6_mc_leave(im6o->im6o_membership[n], NULL);
+			im6o->im6o_membership[n] = NULL;
+		}
 	}
+	KASSERT(im6o->im6o_mfilters == NULL,
+	   ("%s: im6o_mfilters != NULL", __func__));
+	im6o->im6o_num_memberships = 0;
 	im6o->im6o_multicast_ifp = NULL;
 }
 #endif
@@ -1635,10 +1646,11 @@ carp_set_addr6(struct carp_softc *sc, st
 	struct carp_if *cif;
 	struct in6_ifaddr *ia, *ia_if;
 	struct ip6_moptions *im6o = &sc->sc_im6o;
-	struct in6_multi_mship *imm;
 	struct in6_addr in6;
 	int own, error;
 
+	error = 0;
+
 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 		if (!(SC2IFP(sc)->if_flags & IFF_UP))
 			carp_set_state(sc, INIT);
@@ -1686,6 +1698,8 @@ carp_set_addr6(struct carp_softc *sc, st
 		return (EADDRNOTAVAIL);
 
 	if (!sc->sc_naddrs6) {
+		struct in6_multi *in6m;
+
 		im6o->im6o_multicast_ifp = ifp;
 
 		/* join CARP multicast address */
@@ -1694,9 +1708,12 @@ carp_set_addr6(struct carp_softc *sc, st
 		in6.s6_addr8[15] = 0x12;
 		if (in6_setscope(&in6, ifp, NULL) != 0)
 			goto cleanup;
-		if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
+		in6m = NULL;
+		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
+		if (error)
 			goto cleanup;
-		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+		im6o->im6o_membership[0] = in6m;
+		im6o->im6o_num_memberships++;
 
 		/* join solicited multicast address */
 		bzero(&in6, sizeof(in6));
@@ -1707,9 +1724,12 @@ carp_set_addr6(struct carp_softc *sc, st
 		in6.s6_addr8[12] = 0xff;
 		if (in6_setscope(&in6, ifp, NULL) != 0)
 			goto cleanup;
-		if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
+		in6m = NULL;
+		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
+		if (error)
 			goto cleanup;
-		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+		im6o->im6o_membership[1] = in6m;
+		im6o->im6o_num_memberships++;
 	}
 
 	if (!ifp->if_carp) {
@@ -1781,14 +1801,8 @@ carp_set_addr6(struct carp_softc *sc, st
 	return (0);
 
 cleanup:
-	/* clean up multicast memberships */
-	if (!sc->sc_naddrs6) {
-		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
-			imm = LIST_FIRST(&im6o->im6o_memberships);
-			LIST_REMOVE(imm, i6mm_chain);
-			in6_leavegroup(imm);
-		}
-	}
+	if (!sc->sc_naddrs6)
+		carp_multicast6_cleanup(sc);
 	return (error);
 }
 
@@ -1799,21 +1813,13 @@ carp_del_addr6(struct carp_softc *sc, st
 
 	if (!--sc->sc_naddrs6) {
 		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-		struct ip6_moptions *im6o = &sc->sc_im6o;
 
 		CARP_LOCK(cif);
 		callout_stop(&sc->sc_ad_tmo);
 		SC2IFP(sc)->if_flags &= ~IFF_UP;
 		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		sc->sc_vhid = -1;
-		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
-			struct in6_multi_mship *imm =
-			    LIST_FIRST(&im6o->im6o_memberships);
-
-			LIST_REMOVE(imm, i6mm_chain);
-			in6_leavegroup(imm);
-		}
-		im6o->im6o_multicast_ifp = NULL;
+		carp_multicast6_cleanup(sc);
 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 		if (!--cif->vhif_nvrs) {
 			CARP_LOCK_DESTROY(cif);

Modified: head/sys/netinet6/icmp6.c
==============================================================================
--- head/sys/netinet6/icmp6.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/icmp6.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -147,8 +147,6 @@ icmp6_init(void)
 	INIT_VNET_INET6(curvnet);
 
 	V_icmp6errpps_count = 0;
-
-	mld6_init();
 }
 
 static void
@@ -429,6 +427,23 @@ icmp6_input(struct mbuf **mp, int *offp,
 	}
 
 	/*
+	 * Check multicast group membership.
+	 * Note: SSM filters are not applied for ICMPv6 traffic.
+	 */
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		struct ifnet *ifp;
+		struct in6_multi *inm;
+
+		ifp = m->m_pkthdr.rcvif;
+		inm = in6m_lookup(ifp, &ip6->ip6_dst);
+		if (inm == NULL) {
+			IP6STAT_INC(ip6s_notmember);
+			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+			goto freeit;
+		}
+	}
+
+	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
@@ -615,34 +630,20 @@ icmp6_input(struct mbuf **mp, int *offp,
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
-		if (icmp6len < sizeof(struct mld_hdr))
-			goto badlen;
-		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
-			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
-		else
-			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
-			/* give up local */
-			mld6_input(m, off);
-			m = NULL;
+	case MLD_LISTENER_DONE:
+	case MLDV2_LISTENER_REPORT:
+		/*
+		 * Drop MLD traffic which is not link-local.
+		 * XXX Should we also sanity check that these messages
+		 * were directed to a link-local multicast prefix?
+		 */
+		if (ip6->ip6_hlim != 1)
 			goto freeit;
-		}
-		mld6_input(n, off);
+		if (mld_input(m, off, icmp6len) != 0)
+			return (IPPROTO_DONE);
 		/* m stays. */
 		break;
 
-	case MLD_LISTENER_DONE:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
-		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
-			goto badlen;
-		break;		/* nothing to be done in kernel */
-
-	case MLD_MTRACE_RESP:
-	case MLD_MTRACE:
-		/* XXX: these two are experimental.  not officially defined. */
-		/* XXX: per-interface statistics? */
-		break;		/* just pass it to applications */
-
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
@@ -2050,7 +2051,7 @@ icmp6_rip6_input(struct mbuf **mp, int o
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
-		V_ip6stat.ip6s_delivered--;
+		IP6STAT_DEC(ip6s_delivered);
 	}
 	return IPPROTO_DONE;
 }
@@ -2222,7 +2223,14 @@ void
 icmp6_fasttimo(void)
 {
 
-	return;
+	mld_fasttimo();
+}
+
+void
+icmp6_slowtimo(void)
+{
+
+	mld_slowtimo();
 }
 
 static const char *

Modified: head/sys/netinet6/in6.c
==============================================================================
--- head/sys/netinet6/in6.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -106,8 +106,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_pcb.h>
 #include <netinet6/vinet6.h>
 
-MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");
-
 /*
  * Definitions of some costant IP6 addresses.
  */
@@ -119,6 +117,8 @@ const struct in6_addr in6addr_linklocal_
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allv2routers =
+	IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
@@ -135,7 +135,6 @@ static int in6_ifinit __P((struct ifnet 
 	struct sockaddr_in6 *, int));
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 
-struct in6_multihead in6_multihead;	/* XXX BSS initialization */
 int	(*faithprefix_p)(struct in6_addr *);
 
 
@@ -1110,10 +1109,12 @@ in6_update_ifa(struct ifnet *ifp, struct
 			 * should be larger than the MLD delay (this could be
 			 * relaxed a bit, but this simple logic is at least
 			 * safe).
+			 * XXX: Break data hiding guidelines and look at
+			 * state for the solicited multicast group.
 			 */
 			mindelay = 0;
 			if (in6m_sol != NULL &&
-			    in6m_sol->in6m_state == MLD_REPORTPENDING) {
+			    in6m_sol->in6m_state == MLD_REPORTING_MEMBER) {
 				mindelay = in6m_sol->in6m_timer;
 			}
 			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
@@ -1590,36 +1591,6 @@ in6_ifinit(struct ifnet *ifp, struct in6
 	return (error);
 }
 
-struct in6_multi_mship *
-in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
-    int *errorp, int delay)
-{
-	struct in6_multi_mship *imm;
-
-	imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT);
-	if (!imm) {
-		*errorp = ENOBUFS;
-		return NULL;
-	}
-	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, delay);
-	if (!imm->i6mm_maddr) {
-		/* *errorp is alrady set */
-		free(imm, M_IP6MADDR);
-		return NULL;
-	}
-	return imm;
-}
-
-int
-in6_leavegroup(struct in6_multi_mship *imm)
-{
-
-	if (imm->i6mm_maddr)
-		in6_delmulti(imm->i6mm_maddr);
-	free(imm,  M_IP6MADDR);
-	return 0;
-}
-
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  */
@@ -2328,6 +2299,9 @@ in6_domifattach(struct ifnet *ifp)
 		ext->lltable->llt_lookup = in6_lltable_lookup;
 		ext->lltable->llt_dump = in6_lltable_dump;
 	}
+
+	ext->mld_ifinfo = mld_domifattach(ifp);
+
 	return ext;
 }
 
@@ -2336,6 +2310,7 @@ in6_domifdetach(struct ifnet *ifp, void 
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
+	mld_domifdetach(ifp);
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ext->nd_ifinfo);
 	lltable_free(ext->lltable);

Modified: head/sys/netinet6/in6_ifattach.c
==============================================================================
--- head/sys/netinet6/in6_ifattach.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6_ifattach.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
+#include <netinet6/mld6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/vinet6.h>
 
@@ -918,11 +919,35 @@ in6_tmpaddrtimer(void *ignored_arg)
 static void
 in6_purgemaddrs(struct ifnet *ifp)
 {
-	struct in6_multi *in6m;
-	struct in6_multi *oin6m;
+	INIT_VNET_INET6(ifp->if_vnet);
+	LIST_HEAD(,in6_multi)	 purgeinms;
+	struct in6_multi	*inm, *tinm;
+	struct ifmultiaddr	*ifma;
+
+	LIST_INIT(&purgeinms);
+	IN6_MULTI_LOCK();
 
-	LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) {
-		if (in6m->in6m_ifp == ifp)
-			in6_delmulti(in6m);
+	/*
+	 * Extract list of in6_multi associated with the detaching ifp
+	 * which the PF_INET6 layer is about to release.
+	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
+	 * by code further down.
+	 */
+	IF_ADDR_LOCK(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+		if (ifma->ifma_addr->sa_family != AF_INET6 ||
+		    ifma->ifma_protospec == NULL)
+			continue;
+		inm = (struct in6_multi *)ifma->ifma_protospec;
+		LIST_INSERT_HEAD(&purgeinms, inm, in6m_entry);
 	}
+	IF_ADDR_UNLOCK(ifp);
+
+	LIST_FOREACH_SAFE(inm, &purgeinms, in6m_entry, tinm) {
+		LIST_REMOVE(inm, in6m_entry);
+		in6m_release_locked(inm);
+	}
+	mld_ifdetach(ifp);
+
+	IN6_MULTI_UNLOCK();
 }

Modified: head/sys/netinet6/in6_mcast.c
==============================================================================
--- head/sys/netinet6/in6_mcast.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6_mcast.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -29,6 +29,7 @@
 
 /*
  * IPv6 multicast socket, group, and socket option processing module.
+ * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
  */
 
 #include <sys/cdefs.h>
@@ -142,6 +143,9 @@ static struct ip6_moptions *
 static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
 static int	in6p_join_group(struct inpcb *, struct sockopt *);
 static int	in6p_leave_group(struct inpcb *, struct sockopt *);
+static struct ifnet *
+		in6p_lookup_mcast_ifp(const struct inpcb *,
+		    const struct sockaddr_in6 *);
 static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
@@ -1655,12 +1659,12 @@ int
 ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	INIT_VNET_INET6(curvnet);
-	struct ip6_moptions	*imo;
-	int			 error, optval;
-	u_char			 coptval;
+	struct ip6_moptions	*im6o;
+	int			 error;
+	u_int			 optval;
 
 	INP_WLOCK(inp);
-	imo = inp->in6p_moptions;
+	im6o = inp->in6p_moptions;
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
@@ -1674,38 +1678,36 @@ ip6_getmoptions(struct inpcb *inp, struc
 
 	error = 0;
 	switch (sopt->sopt_name) {
-#if 0 /* XXX FIXME */
 	case IPV6_MULTICAST_IF:
-		if (imo == NULL || imo->im6o_multicast_ifp == NULL) {
+		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
 			optval = 0;
 		} else {
-			optval = imo->im6o_multicast_ifp->if_index;
+			optval = im6o->im6o_multicast_ifp->if_index;
 		}
 		INP_WUNLOCK(inp);
-		error = sooptcopyout(sopt, &ifindex, sizeof(u_int));
+		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
-#endif
 
 	case IPV6_MULTICAST_HOPS:
-		if (imo == 0)
-			optval = coptval = V_ip6_defmcasthlim;
+		if (im6o == NULL)
+			optval = V_ip6_defmcasthlim;
 		else
-			optval = coptval = imo->im6o_multicast_loop;
+			optval = im6o->im6o_multicast_loop;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
 
 	case IPV6_MULTICAST_LOOP:
-		if (imo == 0)
-			optval = coptval = IPV6_DEFAULT_MULTICAST_LOOP;
+		if (im6o == NULL)
+			optval = in6_mcast_loop; /* XXX VIMAGE */
 		else
-			optval = coptval = imo->im6o_multicast_loop;
+			optval = im6o->im6o_multicast_loop;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
 
 	case IPV6_MSFILTER:
-		if (imo == NULL) {
+		if (im6o == NULL) {
 			error = EADDRNOTAVAIL;
 			INP_WUNLOCK(inp);
 		} else {
@@ -1725,7 +1727,57 @@ ip6_getmoptions(struct inpcb *inp, struc
 }
 
 /*
+ * Look up the ifnet to use for a multicast group membership,
+ * given the address of an IPv6 group.
+ *
+ * This routine exists to support legacy IPv6 multicast applications.
+ *
+ * If inp is non-NULL, use this socket's current FIB number for any
+ * required FIB lookup. Look up the group address in the unicast FIB,
+ * and use its ifp; usually, this points to the default next-hop.
+ * If the FIB lookup fails, return NULL.
+ *
+ * FUTURE: Support multiple forwarding tables for IPv6.
+ *
+ * Returns NULL if no ifp could be found.
+ */
+static struct ifnet *
+in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused,
+    const struct sockaddr_in6 *gsin6)
+{
+	INIT_VNET_INET6(curvnet);
+	struct route_in6	 ro6;
+	struct ifnet		*ifp;
+
+	KASSERT(in6p->inp_vflag & INP_IPV6,
+	    ("%s: not INP_IPV6 inpcb", __func__));
+	KASSERT(gsin6->sin6_family == AF_INET6,
+	    ("%s: not AF_INET6 group", __func__));
+	KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr),
+	    ("%s: not multicast", __func__));
+
+	ifp = NULL;
+	memset(&ro6, 0, sizeof(struct route_in6));
+	memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
+#ifdef notyet
+	rtalloc_ign_fib(&ro6, 0, inp ? inp->inp_inc.inc_fibnum : 0);
+#else
+	rtalloc_ign((struct route *)&ro6, 0);
+#endif
+	if (ro6.ro_rt != NULL) {
+		ifp = ro6.ro_rt->rt_ifp;
+		KASSERT(ifp != NULL, ("%s: null ifp", __func__));
+		RTFREE(ro6.ro_rt);
+	}
+
+	return (ifp);
+}
+
+/*
  * Join an IPv6 multicast group, possibly with a source.
+ *
+ * FIXME: The KAME use of the unspecified address (::)
+ * to join *all* multicast groups is currently unsupported.
  */
 static int
 in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
@@ -1765,8 +1817,14 @@ in6p_join_group(struct inpcb *inp, struc
 		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
 
-		ifp = ifnet_byindex(mreq.ipv6mr_interface);
-
+		if (mreq.ipv6mr_interface == 0) {
+			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
+		} else {
+			if (mreq.ipv6mr_interface < 0 ||
+			    V_if_index < mreq.ipv6mr_interface)
+				return (EADDRNOTAVAIL);
+			ifp = ifnet_byindex(mreq.ipv6mr_interface);
+		}
 		CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
 		    __func__, mreq.ipv6mr_interface, ifp);
 	} break;
@@ -1813,12 +1871,35 @@ in6p_join_group(struct inpcb *inp, struc
 		break;
 	}
 
+#ifdef notyet
+	/*
+	 * FIXME: Check for unspecified address (all groups).
+	 * Do we have a normative reference for this 'feature'?
+	 *
+	 * We use the unspecified address to specify to accept
+	 * all multicast addresses. Only super user is allowed
+	 * to do this.
+	 * XXX-BZ might need a better PRIV_NETINET_x for this
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(&gsa->sin6.sin6_addr)) {
+		error = priv_check(curthread, PRIV_NETINET_MROUTE);
+		if (error)
+		break;
+	} else
+#endif
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
+#ifdef notyet
+	/*
+	 * FIXME: Set interface scope in group address.
+	 */
+	(void)in6_setscope(&gsa->sin6.sin_addr, ifp, NULL);
+#endif
+
 	/*
 	 * MCAST_JOIN_SOURCE on an exclusive membership is an error.
 	 * On an existing inclusive membership, it just adds the
@@ -1987,7 +2068,23 @@ in6p_leave_group(struct inpcb *inp, stru
 		gsa->sin6.sin6_family = AF_INET6;
 		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
-		ifp = ifnet_byindex(mreq.ipv6mr_interface);
+
+		if (mreq.ipv6mr_interface == 0) {
+#ifdef notyet
+			/*
+			 * FIXME: Resolve scope ambiguity when interface
+			 * index is unspecified.
+			 */
+			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
+#else
+			return (EADDRNOTAVAIL);
+#endif
+		} else {
+			if (mreq.ipv6mr_interface < 0 ||
+			    V_if_index < mreq.ipv6mr_interface)
+				return (EADDRNOTAVAIL);
+			ifp = ifnet_byindex(mreq.ipv6mr_interface);
+		}
 
 		CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
 		    __func__, mreq.ipv6mr_interface, ifp);
@@ -2033,6 +2130,15 @@ in6p_leave_group(struct inpcb *inp, stru
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
+#ifdef notyet
+	/*
+	 * FIXME: Need to embed ifp's scope ID in the address
+	 * handed down to MLD.
+	 * See KAME IPV6_LEAVE_GROUP implementation.
+	 */
+	(void)in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL);
+#endif
+
 	/*
 	 * Find the membership in the membership array.
 	 */
@@ -2348,7 +2454,7 @@ out_in6p_locked:
 int
 ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
-	struct ip6_moptions	*imo;
+	struct ip6_moptions	*im6o;
 	int			 error;
 
 	error = 0;
@@ -2364,7 +2470,6 @@ ip6_setmoptions(struct inpcb *inp, struc
 
 	switch (sopt->sopt_name) {
 	case IPV6_MULTICAST_IF:
-		/* XXX in v6 this one is far more involved */
 		error = in6p_set_multicast_if(inp, sopt);
 		break;
 
@@ -2381,9 +2486,11 @@ ip6_setmoptions(struct inpcb *inp, struc
 		if (hlim < -1 || hlim > 255) {
 			error = EINVAL;
 			break;
+		} else if (hlim == -1) {
+			hlim = V_ip6_defmcasthlim;
 		}
-		imo = in6p_findmoptions(inp);
-		imo->im6o_multicast_hlim = hlim;
+		im6o = in6p_findmoptions(inp);
+		im6o->im6o_multicast_hlim = hlim;
 		INP_WUNLOCK(inp);
 		break;
 	}
@@ -2393,9 +2500,7 @@ ip6_setmoptions(struct inpcb *inp, struc
 
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
-		 * Must be zero or one.  The orimcaddrl multicast API required a
-		 * char argument, which is inconsistent with the rest
-		 * of the socket API.  We allow either a char or an int.
+		 * Must be zero or one.
 		 */
 		if (sopt->sopt_valsize != sizeof(u_int)) {
 			error = EINVAL;
@@ -2404,8 +2509,12 @@ ip6_setmoptions(struct inpcb *inp, struc
 		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
 		if (error)
 			break;
-		imo = in6p_findmoptions(inp);
-		imo->im6o_multicast_loop = loop;
+		if (loop > 1) {
+			error = EINVAL;
+			break;
+		}
+		im6o = in6p_findmoptions(inp);
+		im6o->im6o_multicast_loop = loop;
 		INP_WUNLOCK(inp);
 		break;
 	}

Modified: head/sys/netinet6/in6_pcb.c
==============================================================================
--- head/sys/netinet6/in6_pcb.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6_pcb.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -733,36 +733,36 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinf
 {
 	struct inpcb *in6p;
 	struct ip6_moptions *im6o;
-	struct in6_multi_mship *imm, *nimm;
+	int i, gap;
 
 	INP_INFO_RLOCK(pcbinfo);
 	LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(in6p);
 		im6o = in6p->in6p_moptions;
-		if ((in6p->inp_vflag & INP_IPV6) &&
-		    im6o) {
+		if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
 			/*
-			 * Unselect the outgoing interface if it is being
-			 * detached.
+			 * Unselect the outgoing ifp for multicast if it
+			 * is being detached.
 			 */
 			if (im6o->im6o_multicast_ifp == ifp)
 				im6o->im6o_multicast_ifp = NULL;
-
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
-			 * XXX controversial - is it really legal for kernel
-			 * to force this?
 			 */
-			for (imm = im6o->im6o_memberships.lh_first;
-			     imm != NULL; imm = nimm) {
-				nimm = imm->i6mm_chain.le_next;
-				if (imm->i6mm_maddr->in6m_ifp == ifp) {
-					LIST_REMOVE(imm, i6mm_chain);
-					in6_delmulti(imm->i6mm_maddr);
-					free(imm, M_IP6MADDR);
+			gap = 0;
+			for (i = 0; i < im6o->im6o_num_memberships; i++) {
+				if (im6o->im6o_membership[i]->in6m_ifp ==
+				    ifp) {
+					in6_mc_leave(im6o->im6o_membership[i],
+					    NULL);
+					gap++;
+				} else if (gap != 0) {
+					im6o->im6o_membership[i - gap] =
+					    im6o->im6o_membership[i];
 				}
 			}
+			im6o->im6o_num_memberships -= gap;
 		}
 		INP_WUNLOCK(in6p);
 	}

Modified: head/sys/netinet6/in6_proto.c
==============================================================================
--- head/sys/netinet6/in6_proto.c	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6_proto.c	Wed Apr 29 19:19:13 2009	(r191672)
@@ -236,6 +236,7 @@ struct ip6protosw inet6sw[] = {
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_init =		icmp6_init,
 	.pr_fasttimo =		icmp6_fasttimo,
+	.pr_slowtimo =		icmp6_slowtimo,
 	.pr_usrreqs =		&rip6_usrreqs
 },
 {

Modified: head/sys/netinet6/in6_var.h
==============================================================================
--- head/sys/netinet6/in6_var.h	Wed Apr 29 18:41:08 2009	(r191671)
+++ head/sys/netinet6/in6_var.h	Wed Apr 29 19:19:13 2009	(r191672)
@@ -64,6 +64,12 @@
 #ifndef _NETINET6_IN6_VAR_H_
 #define _NETINET6_IN6_VAR_H_
 
+#include <sys/tree.h>
+
+#ifdef _KERNEL
+#include <sys/libkern.h>
+#endif
+
 /*
  * Interface address, Internet version.  One of these structures
  * is allocated for each interface with an Internet address.
@@ -89,12 +95,15 @@ struct in6_addrlifetime {
 struct nd_ifinfo;
 struct scope6_id;
 struct lltable;
+struct mld_ifinfo;
+
 struct in6_ifextra {
 	struct in6_ifstat *in6_ifstat;
 	struct icmp6_ifstat *icmp6_ifstat;
 	struct nd_ifinfo *nd_ifinfo;
 	struct scope6_id *scope6_id;
 	struct lltable *lltable;
+	struct mld_ifinfo *mld_ifinfo;
 };
 
 #define	LLTABLE6(ifp)	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)
@@ -489,9 +498,6 @@ do {								\
 
 extern struct in6_addr zeroin6_addr;
 extern u_char inet6ctlerrmap[];
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_IP6MADDR);
-#endif /* MALLOC_DECLARE */
 
 /*
  * Macro for finding the internet address structure (in6_ifaddr) corresponding
@@ -514,94 +520,243 @@ do {									\
 #endif /* _KERNEL */
 
 /*
- * Multi-cast membership entry.  One for each group/ifp that a PCB
- * belongs to.
+ * IPv6 multicast MLD-layer source entry.
+ */
+struct ip6_msource {
+	RB_ENTRY(ip6_msource)	im6s_link;	/* RB tree links */
+	struct in6_addr		im6s_addr;
+	struct im6s_st {
+		uint16_t	ex;		/* # of exclusive members */
+		uint16_t	in;		/* # of inclusive members */
+	}			im6s_st[2];	/* state at t0, t1 */
+	uint8_t			im6s_stp;	/* pending query */
+};
+RB_HEAD(ip6_msource_tree, ip6_msource);
+
+/*
+ * IPv6 multicast PCB-layer source entry.
+ *
+ * NOTE: overlapping use of struct ip6_msource fields at start.
+ */
+struct in6_msource {
+	RB_ENTRY(ip6_msource)	im6s_link;	/* Common field */
+	struct in6_addr		im6s_addr;	/* Common field */
+	uint8_t			im6sl_st[2];	/* state before/at commit */
+};
+
+#ifdef _KERNEL
+/*
+ * IPv6 source tree comparison function.
+ *
+ * An ordered predicate is necessary; bcmp() is not documented to return
+ * an indication of order, memcmp() is, and is an ISO C99 requirement.
+ */
+static __inline int
+ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
+{
+
+	return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
+}
+RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
+#endif /* _KERNEL */
+
+/*
+ * IPv6 multicast PCB-layer group filter descriptor.
+ */
+struct in6_mfilter {
+	struct ip6_msource_tree	im6f_sources; /* source list for (S,G) */
+	u_long			im6f_nsrc;    /* # of source entries */
+	uint8_t			im6f_st[2];   /* state before/at commit */
+};
+
+/*
+ * Legacy KAME IPv6 multicast membership descriptor.
  */
 struct in6_multi_mship {
-	struct	in6_multi *i6mm_maddr;	/* Multicast address pointer */
-	LIST_ENTRY(in6_multi_mship) i6mm_chain;  /* multicast options chain */
+	struct	in6_multi *i6mm_maddr;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list