svn commit: r192923 - in head: share/man/man4 sys/netinet6 usr.sbin/ifmcstat

Bruce M Simpson bms at FreeBSD.org
Wed May 27 18:57:14 UTC 2009


Author: bms
Date: Wed May 27 18:57:13 2009
New Revision: 192923
URL: http://svn.freebsd.org/changeset/base/192923

Log:
  Merge final round of MLD changes from p4:
   ip6_input.c, in6.h:
   * Add netinet6-specific mbuf flag M_RTALERT_MLD, shadowing M_PROTO6.
    * Always set this flag if HBH Router Alert option is present for MLD,
      even when not forwarding.
  
   icmp6.c:
   * In icmp6_input(), spell m->m_pkthdr.rcvif as ifp to be consistent.
   * Use scope ID for verifying input. Do not apply SSM filters here, no inpcb.
    * Check for M_RTALERT_MLD when validating MLD traffic, as we can't see
      IPv6 hop options outside of ip6_input().
  
   in6_mcast.c:
   * Use KAME scope/zone ID in in6_multi.
     * Update net.inet6.ip6.mcast.filters implementation to use scope IDs
       for comparisons.
   * Fix scope ID treatment in multicast socket option processing.
     Scope IDs passed in from userland will be ignored as other less
     ambiguous APIs exist for specifying the link.
   * Tighten userland input checks in IPv6 SSM delta and full-state ops.
     * Source filter embedded scope IDs need to be revisited, for now
       just clear them and ignore them on input.
   * Adapt KAME behaviour of looking up the scope ID in the default zone
     for multicast leaves, when the interface is ambiguous.
  
   mld6.c:
   * Tighten origin checks on MLD traffic as per RFC3810 Section 6.2:
    * ip6_src MAY be the unspecified address for MLDv1 reports.
    * ip6_src MAY have link-local address scope for MLDv1 reports,
      MLDv1 queries, and MLDv2 queries.
    * Perform address field validation *before* accepting queries.
   * Use KAME scope/zone ID in query/report processing.
     * Break const correctness for mld_v1_input_report(), mld_v1_input_query()
       as we temporarily modify the input mbuf chain.
     * Clear the scope ID before handoff to userland MLD daemon.
   * Fix MLDv1 old querier present timer processing.
     With the protocol defaults, hosts should revert to MLDv2 after 260s.
   * Add net.inet6.mld.v1enable sysctl, default to on.
  
   ifmcstat.c:
   * Use sysctl by default; -K requests kvm(3) if so compiled.
  
   mld.4:
   * Connect man page to build.
  
  Tested using PCS.

Modified:
  head/share/man/man4/Makefile
  head/share/man/man4/multicast.4
  head/sys/netinet6/icmp6.c
  head/sys/netinet6/in6.h
  head/sys/netinet6/in6_mcast.c
  head/sys/netinet6/ip6_input.c
  head/sys/netinet6/mld6.c
  head/usr.sbin/ifmcstat/ifmcstat.8
  head/usr.sbin/ifmcstat/ifmcstat.c

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile	Wed May 27 18:54:31 2009	(r192922)
+++ head/share/man/man4/Makefile	Wed May 27 18:57:13 2009	(r192923)
@@ -191,6 +191,7 @@ MAN=	aac.4 \
 	meteor.4 \
 	mfi.4 \
 	miibus.4 \
+	mld.4 \
 	mlx.4 \
 	mly.4 \
 	mmc.4 \

Modified: head/share/man/man4/multicast.4
==============================================================================
--- head/share/man/man4/multicast.4	Wed May 27 18:54:31 2009	(r192922)
+++ head/share/man/man4/multicast.4	Wed May 27 18:57:13 2009	(r192923)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 13, 2009
+.Dd May 27, 2009
 .Dt MULTICAST 4
 .Os
 .\"
@@ -962,6 +962,7 @@ after the previous upcall.
 .Xr intro 4 ,
 .Xr ip 4 ,
 .Xr ip6 4 ,
+.Xr mld 4 ,
 .Xr pim 4
 .\"
 .Sh HISTORY
@@ -1002,6 +1003,8 @@ monitoring were implemented by
 in collaboration with
 .An Chris Brown
 (NextHop).
+The IGMPv3 and MLDv2 multicast support was implemented by
+.An Bruce Simpson .
 .Pp
 This manual page was written by
 .An Pavlin Radoslavov

Modified: head/sys/netinet6/icmp6.c
==============================================================================
--- head/sys/netinet6/icmp6.c	Wed May 27 18:54:31 2009	(r192922)
+++ head/sys/netinet6/icmp6.c	Wed May 27 18:57:13 2009	(r192923)
@@ -403,6 +403,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 	INIT_VNET_INET6(curvnet);
 	INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
 	struct mbuf *m = *mp, *n;
+	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
@@ -410,6 +411,8 @@ icmp6_input(struct mbuf **mp, int *offp,
 	int code, sum, noff;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
+	ifp = m->m_pkthdr.rcvif;
+
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
@@ -431,10 +434,8 @@ icmp6_input(struct mbuf **mp, int *offp,
 	 * Note: SSM filters are not applied for ICMPv6 traffic.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-		struct ifnet *ifp;
-		struct in6_multi *inm;
+		struct in6_multi	*inm;
 
-		ifp = m->m_pkthdr.rcvif;
 		inm = in6m_lookup(ifp, &ip6->ip6_dst);
 		if (inm == NULL) {
 			IP6STAT_INC(ip6s_notmember);
@@ -483,19 +484,19 @@ icmp6_input(struct mbuf **mp, int *offp,
 	}
 
 	ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
-	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
+	icmp6_ifstat_inc(ifp, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
+		icmp6_ifstat_inc(ifp, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
+		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
-			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
+			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
 			break;
 		case ICMP6_DST_UNREACH_ADDR:
@@ -515,7 +516,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
+		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
 		/* validation is made in icmp6_mtudisc_update */
 
@@ -529,7 +530,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
+		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			code = PRC_TIMXCEED_INTRANS;
@@ -544,7 +545,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ICMP6_PARAM_PROB:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
+		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
@@ -560,7 +561,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ICMP6_ECHO_REQUEST:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
+		icmp6_ifstat_inc(ifp, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
@@ -623,7 +624,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ICMP6_ECHO_REPLY:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
+		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
@@ -633,11 +634,15 @@ icmp6_input(struct mbuf **mp, int *offp,
 	case MLD_LISTENER_DONE:
 	case MLDV2_LISTENER_REPORT:
 		/*
-		 * Drop MLD traffic which is not link-local.
+		 * Drop MLD traffic which is not link-local, has a hop limit
+		 * of greater than 1 hop, or which does not have the
+		 * IPv6 HBH Router Alert option.
+		 * As IPv6 HBH options are stripped in ip6_input() we must
+		 * check an mbuf header flag.
 		 * XXX Should we also sanity check that these messages
 		 * were directed to a link-local multicast prefix?
 		 */
-		if (ip6->ip6_hlim != 1)
+		if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
 			goto freeit;
 		if (mld_input(m, off, icmp6len) != 0)
 			return (IPPROTO_DONE);
@@ -748,7 +753,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ND_ROUTER_SOLICIT:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
+		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
@@ -764,7 +769,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ND_ROUTER_ADVERT:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
+		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
@@ -780,7 +785,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
+		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
@@ -796,7 +801,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
+		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
@@ -812,7 +817,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		break;
 
 	case ND_REDIRECT:
-		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
+		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
@@ -840,7 +845,7 @@ icmp6_input(struct mbuf **mp, int *offp,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
-		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
+		    ifp ? ifp->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;

Modified: head/sys/netinet6/in6.h
==============================================================================
--- head/sys/netinet6/in6.h	Wed May 27 18:54:31 2009	(r192922)
+++ head/sys/netinet6/in6.h	Wed May 27 18:57:13 2009	(r192923)
@@ -619,6 +619,7 @@ struct ip6_mtuinfo {
 #define	M_DECRYPTED	M_PROTO3
 #define	M_LOOP		M_PROTO4
 #define	M_AUTHIPDGM	M_PROTO5
+#define	M_RTALERT_MLD	M_PROTO6
 
 #ifdef _KERNEL
 struct cmsghdr;

Modified: head/sys/netinet6/in6_mcast.c
==============================================================================
--- head/sys/netinet6/in6_mcast.c	Wed May 27 18:54:31 2009	(r192922)
+++ head/sys/netinet6/in6_mcast.c	Wed May 27 18:57:13 2009	(r192923)
@@ -305,6 +305,10 @@ im6o_match_group(const struct ip6_moptio
  * Find an IPv6 multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  *
+ * XXX TODO: The scope ID, if present in src, is stripped before
+ * any comparison. We SHOULD enforce scope/zone checks where the source
+ * filter entry has a link scope.
+ *
  * NOTE: This does not check if the entry is in-mode, merely if
  * it exists, which may not be the desired behaviour.
  */
@@ -328,6 +332,7 @@ im6o_match_source(const struct ip6_mopti
 
 	psa = (const sockunion_t *)src;
 	find.im6s_addr = psa->sin6.sin6_addr;
+	in6_clearscope(&find.im6s_addr);		/* XXX */
 	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
 
 	return ((struct in6_msource *)ims);
@@ -1159,6 +1164,20 @@ in6_mc_join_locked(struct ifnet *ifp, co
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+#ifdef INVARIANTS
+	/*
+	 * Sanity: Check scope zone ID was set for ifp, if and
+	 * only if group is scoped to an interface.
+	 */
+	KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr),
+	    ("%s: not a multicast address", __func__));
+	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
+		KASSERT(mcaddr->s6_addr16[1] != 0,
+		    ("%s: scope zone ID not set", __func__));
+	}
+#endif
+
 	IN6_MULTI_LOCK_ASSERT();
 
 	CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
@@ -1360,6 +1379,8 @@ in6p_block_unblock_source(struct inpcb *
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+
 	/*
 	 * Check if we are actually a member of this group.
 	 */
@@ -1566,19 +1587,26 @@ in6p_get_source_filters(struct inpcb *in
 	if (error)
 		return (error);
 
-	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+	if (msfr.msfr_group.ss_family != AF_INET6 ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
+	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+		return (EADDRNOTAVAIL);
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
-		return (EINVAL);
+		return (EADDRNOTAVAIL);
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	INP_WLOCK(inp);
 
 	/*
 	 * Lookup group on the socket.
 	 */
-	gsa = (sockunion_t *)&msfr.msfr_group;
 	idx = im6o_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->im6o_mfilters == NULL) {
 		INP_WUNLOCK(inp);
@@ -1803,6 +1831,12 @@ in6p_join_group(struct inpcb *inp, struc
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
+	/*
+	 * Chew everything into struct group_source_req.
+	 * Overwrite the port field if present, as the sockaddr
+	 * being copied in may be matched with a binary comparison.
+	 * Ignore passed-in scope ID.
+	 */
 	switch (sopt->sopt_name) {
 	case IPV6_JOIN_GROUP: {
 		struct ipv6_mreq mreq;
@@ -1846,16 +1880,20 @@ in6p_join_group(struct inpcb *inp, struc
 		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 
-		/*
-		 * Overwrite the port field if present, as the sockaddr
-		 * being copied in may be matched with a binary comparison.
-		 */
-		gsa->sin6.sin6_port = 0;
 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			if (ssa->sin6.sin6_family != AF_INET6 ||
 			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
+			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+				return (EINVAL);
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&ssa->sin6.sin6_addr);
 			ssa->sin6.sin6_port = 0;
+			ssa->sin6.sin6_scope_id = 0;
 		}
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
@@ -1870,34 +1908,22 @@ in6p_join_group(struct inpcb *inp, struc
 		break;
 	}
 
-#ifdef notyet
-	/*
-	 * FIXME: Check for unspecified address (all groups).
-	 * Do we have a normative reference for this 'feature'?
-	 *
-	 * We use the unspecified address to specify to accept
-	 * all multicast addresses. Only super user is allowed
-	 * to do this.
-	 * XXX-BZ might need a better PRIV_NETINET_x for this
-	 */
-	if (IN6_IS_ADDR_UNSPECIFIED(&gsa->sin6.sin6_addr)) {
-		error = priv_check(curthread, PRIV_NETINET_MROUTE);
-		if (error)
-		break;
-	} else
-#endif
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
-#ifdef notyet
+	gsa->sin6.sin6_port = 0;
+	gsa->sin6.sin6_scope_id = 0;
+
 	/*
-	 * FIXME: Set interface scope in group address.
+	 * Always set the scope zone ID on memberships created from userland.
+	 * Use the passed-in ifp to do this.
+	 * XXX The in6_setscope() return value is meaningless.
+	 * XXX SCOPE6_LOCK() is taken by in6_setscope().
 	 */
-	(void)in6_setscope(&gsa->sin6.sin_addr, ifp, NULL);
-#endif
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	/*
 	 * MCAST_JOIN_SOURCE on an exclusive membership is an error.
@@ -2031,6 +2057,8 @@ static int
 in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	INIT_VNET_NET(curvnet);
+	INIT_VNET_INET6(curvnet);
+	struct ipv6_mreq		 mreq;
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
@@ -2038,6 +2066,7 @@ in6p_leave_group(struct inpcb *inp, stru
 	struct ip6_moptions		*imo;
 	struct in6_msource		*ims;
 	struct in6_multi		*inm;
+	uint32_t			 ifindex;
 	size_t				 idx;
 	int				 error, is_final;
 #ifdef KTR
@@ -2045,6 +2074,7 @@ in6p_leave_group(struct inpcb *inp, stru
 #endif
 
 	ifp = NULL;
+	ifindex = 0;
 	error = 0;
 	is_final = 1;
 
@@ -2054,39 +2084,26 @@ in6p_leave_group(struct inpcb *inp, stru
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
+	/*
+	 * Chew everything passed in up into a struct group_source_req
+	 * as that is easier to process.
+	 * Note: Any embedded scope ID in the multicast group passed
+	 * in by userland is ignored, the interface index is the recommended
+	 * mechanism to specify an interface; see below.
+	 */
 	switch (sopt->sopt_name) {
-	case IPV6_LEAVE_GROUP: {
-		struct ipv6_mreq mreq;
-
+	case IPV6_LEAVE_GROUP:
 		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
 		    sizeof(struct ipv6_mreq));
 		if (error)
 			return (error);
-
 		gsa->sin6.sin6_family = AF_INET6;
 		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
-
-		if (mreq.ipv6mr_interface == 0) {
-#ifdef notyet
-			/*
-			 * FIXME: Resolve scope ambiguity when interface
-			 * index is unspecified.
-			 */
-			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
-#else
-			return (EADDRNOTAVAIL);
-#endif
-		} else {
-			if (mreq.ipv6mr_interface < 0 ||
-			    V_if_index < mreq.ipv6mr_interface)
-				return (EADDRNOTAVAIL);
-			ifp = ifnet_byindex(mreq.ipv6mr_interface);
-		}
-
-		CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
-		    __func__, mreq.ipv6mr_interface, ifp);
-	} break;
+		gsa->sin6.sin6_port = 0;
+		gsa->sin6.sin6_scope_id = 0;
+		ifindex = mreq.ipv6mr_interface;
+		break;
 
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
@@ -2105,17 +2122,22 @@ in6p_leave_group(struct inpcb *inp, stru
 		if (gsa->sin6.sin6_family != AF_INET6 ||
 		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
-
 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			if (ssa->sin6.sin6_family != AF_INET6 ||
 			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
+			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+				return (EINVAL);
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&ssa->sin6.sin6_addr);
 		}
-
-		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
-			return (EADDRNOTAVAIL);
-
-		ifp = ifnet_byindex(gsr.gsr_interface);
+		gsa->sin6.sin6_port = 0;
+		gsa->sin6.sin6_scope_id = 0;
+		ifindex = gsr.gsr_interface;
 		break;
 
 	default:
@@ -2128,14 +2150,39 @@ in6p_leave_group(struct inpcb *inp, stru
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
-#ifdef notyet
 	/*
-	 * FIXME: Need to embed ifp's scope ID in the address
-	 * handed down to MLD.
-	 * See KAME IPV6_LEAVE_GROUP implementation.
+	 * Validate interface index if provided. If no interface index
+	 * was provided separately, attempt to look the membership up
+	 * from the default scope as a last resort to disambiguate
+	 * the membership we are being asked to leave.
+	 * XXX SCOPE6 lock potentially taken here.
 	 */
-	(void)in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL);
-#endif
+	if (ifindex != 0) {
+		if (ifindex < 0 || V_if_index < ifindex)
+			return (EADDRNOTAVAIL);
+		ifp = ifnet_byindex(ifindex);
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+		(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+	} else {
+		error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone);
+		if (error)
+			return (EADDRNOTAVAIL);
+		/*
+		 * XXX For now, stomp on zone ID for the corner case.
+		 * This is not the 'KAME way', but we need to see the ifp
+		 * directly until such time as this implementation is
+		 * refactored, assuming the scope IDs are the way to go.
+		 */
+		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
+		KASSERT(ifindex != 0, ("%s: bad zone ID", __func__));
+		ifp = ifnet_byindex(ifindex);
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+	}
+
+	CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp);
+	KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__));
 
 	/*
 	 * Find the membership in the membership array.
@@ -2312,10 +2359,10 @@ in6p_set_source_filters(struct inpcb *in
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
-
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	/*
 	 * Take the INP write lock.
@@ -2393,6 +2440,16 @@ in6p_set_source_filters(struct inpcb *in
 				error = EINVAL;
 				break;
 			}
+			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
+				error = EINVAL;
+				break;
+			}
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&psin->sin6_addr);
 			error = im6f_get_source(imf, psin, &lims);
 			if (error)
 				break;
@@ -2560,7 +2617,7 @@ static int
 sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_NET(curvnet);
-	struct in6_addr			*pgina;
+	struct in6_addr			 mcaddr;
 	struct in6_addr			 src;
 	struct ifnet			*ifp;
 	struct ifmultiaddr		*ifma;
@@ -2591,10 +2648,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
 		return (ENOENT);
 	}
 
-	pgina = (struct in6_addr *)&name[1];
-	if (!IN6_IS_ADDR_MULTICAST(pgina)) {
+	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
+	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
 		CTR2(KTR_MLD, "%s: group %s is not multicast",
-		    __func__, ip6_sprintf(ip6tbuf, pgina));
+		    __func__, ip6_sprintf(ip6tbuf, &mcaddr));
 		return (EINVAL);
 	}
 
@@ -2604,6 +2661,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
 		    __func__, ifindex);
 		return (ENOENT);
 	}
+	/*
+	 * Internal MLD lookups require that scope/zone ID is set.
+	 */
+	(void)in6_setscope(&mcaddr, ifp, NULL);
 
 	retval = sysctl_wire_old_buffer(req,
 	    sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr)));
@@ -2618,7 +2679,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in6_multi *)ifma->ifma_protospec;
-		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, pgina))
+		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
 			continue;
 		fmode = inm->in6m_st[1].iss_fmode;
 		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));

Modified: head/sys/netinet6/ip6_input.c
==============================================================================
--- head/sys/netinet6/ip6_input.c	Wed May 27 18:54:31 2009	(r192922)
+++ head/sys/netinet6/ip6_input.c	Wed May 27 18:57:13 2009	(r192923)
@@ -773,10 +773,11 @@ passin:
 		 * case we should pass the packet to the multicast routing
 		 * daemon.
 		 */
-		if (rtalert != ~0 && V_ip6_forwarding) {
+		if (rtalert != ~0) {
 			switch (rtalert) {
 			case IP6OPT_RTALERT_MLD:
-				ours = 1;
+				if (V_ip6_forwarding)
+					ours = 1;
 				break;
 			default:
 				/*
@@ -820,6 +821,9 @@ passin:
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
+		 *
+		 * XXX TODO: Check hlim and multicast scope here to avoid
+		 * unnecessarily calling into ip6_mforward().
 		 */
 		if (ip6_mforward &&
 		    ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
@@ -882,6 +886,14 @@ passin:
 		if (ip6_ipsec_input(m, nxt))
 			goto bad;
 #endif /* IPSEC */
+
+		/*
+		 * Use mbuf flags to propagate Router Alert option to
+		 * ICMPv6 layer, as hop-by-hop options have been stripped.
+		 */
+		if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
+			m->m_flags |= M_RTALERT_MLD;
+
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	goto out;

Modified: head/sys/netinet6/mld6.c
==============================================================================
--- head/sys/netinet6/mld6.c	Wed May 27 18:54:31 2009	(r192922)
+++ head/sys/netinet6/mld6.c	Wed May 27 18:57:13 2009	(r192923)
@@ -122,9 +122,9 @@ static void	mld_slowtimo_vnet(void);
 static void	mld_sysinit(void);
 static void	mld_sysuninit(void);
 static int	mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
-		    const struct mld_hdr *);
+		    /*const*/ struct mld_hdr *);
 static int	mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
-		    const struct mld_hdr *);
+		    /*const*/ struct mld_hdr *);
 static void	mld_v1_process_group_timer(struct in6_multi *, const int);
 static void	mld_v1_process_querier_timers(struct mld_ifinfo *);
 static int	mld_v1_transmit_report(struct in6_multi *, const int);
@@ -239,6 +239,11 @@ SYSCTL_V_PROC(V_NET, vnet_inet6, _net_in
 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 
+static int	mld_v1enable = 1;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
+    &mld_v1enable, 0, "Enable fallback to MLDv1");
+TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable);
+
 /*
  * Packed Router Alert option structure declaration.
  */
@@ -615,36 +620,97 @@ mli_delete_locked(const struct ifnet *if
 /*
  * Process a received MLDv1 general or address-specific query.
  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
  */
 static int
 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
-    const struct mld_hdr *mld)
+    /*const*/ struct mld_hdr *mld)
 {
 	struct ifmultiaddr	*ifma;
 	struct mld_ifinfo	*mli;
 	struct in6_multi	*inm;
+	int			 is_general_query;
 	uint16_t		 timer;
 #ifdef KTR
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+	is_general_query = 0;
+
+	if (!mld_v1enable) {
+		CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
+		    ip6_sprintf(ip6tbuf, &mld->mld_addr),
+		    ifp, ifp->if_xname);
+		return (0);
+	}
+
+	/*
+	 * RFC3810 Section 6.2: MLD queries must originate from
+	 * a router's link-local address.
+	 */
+	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+		CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
+		    ip6_sprintf(ip6tbuf, &ip6->ip6_src),
+		    ifp, ifp->if_xname);
+		return (0);
+	}
+
+	/*
+	 * Do address field validation upfront before we accept
+	 * the query.
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+		/*
+		 * MLDv1 General Query.
+		 * If this was not sent to the all-nodes group, ignore it.
+		 */
+		struct in6_addr		 dst;
+
+		dst = ip6->ip6_dst;
+		in6_clearscope(&dst);
+		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
+			return (EINVAL);
+		is_general_query = 1;
+	} else {
+		/*
+		 * Embed scope ID of receiving interface in MLD query for
+		 * lookup whilst we don't hold other locks.
+		 */
+		in6_setscope(&mld->mld_addr, ifp, NULL);
+	}
+
 	IN6_MULTI_LOCK();
 	MLD_LOCK();
 	IF_ADDR_LOCK(ifp);
 
-	mli = MLD_IFINFO(ifp);
-	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
-
 	/*
 	 * Switch to MLDv1 host compatibility mode.
 	 */
+	mli = MLD_IFINFO(ifp);
+	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
 	mld_set_version(mli, MLD_VERSION_1);
 
-	timer = ntohs(mld->mld_maxdelay) * PR_FASTHZ / MLD_TIMER_SCALE;
+	timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
-	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+	if (is_general_query) {
+		/*
+		 * For each reporting group joined on this
+		 * interface, kick the report timer.
+		 */
+		CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
+		    ifp, ifp->if_xname);
+		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+			if (ifma->ifma_addr->sa_family != AF_INET6 ||
+			    ifma->ifma_protospec == NULL)
+				continue;
+			inm = (struct in6_multi *)ifma->ifma_protospec;
+			mld_v1_update_group(inm, timer);
+		}
+	} else {
 		/*
 		 * MLDv1 Group-Specific Query.
 		 * If this is a group-specific MLDv1 query, we need only
@@ -657,32 +723,8 @@ mld_v1_input_query(struct ifnet *ifp, co
 			    ifp, ifp->if_xname);
 			mld_v1_update_group(inm, timer);
 		}
-	} else {
-		/*
-		 * MLDv1 General Query.
-		 * If this was not sent to the all-nodes group, ignore it.
-		 */
-		struct in6_addr dst;
-
-		dst = ip6->ip6_dst;
-		in6_clearscope(&dst);
-		if (IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
-			/*
-			 * For each reporting group joined on this
-			 * interface, kick the report timer.
-			 */
-			CTR2(KTR_MLD,
-			    "process v1 general query on ifp %p(%s)",
-			    ifp, ifp->if_xname);
-
-			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
-				if (ifma->ifma_addr->sa_family != AF_INET6 ||
-				    ifma->ifma_protospec == NULL)
-					continue;
-				inm = (struct in6_multi *)ifma->ifma_protospec;
-				mld_v1_update_group(inm, timer);
-			}
-		}
+		/* XXX Clear embedded scope ID as userland won't expect it. */
+		in6_clearscope(&mld->mld_addr);
 	}
 
 	IF_ADDR_UNLOCK(ifp);
@@ -769,18 +811,38 @@ mld_v2_input_query(struct ifnet *ifp, co
 	struct mldv2_query	*mld;
 	struct in6_multi	*inm;
 	uint32_t		 maxdelay, nsrc, qqi;
+	int			 is_general_query;
 	uint16_t		 timer;
 	uint8_t			 qrv;
+#ifdef KTR
+	char			 ip6tbuf[INET6_ADDRSTRLEN];
+#endif
+
+	is_general_query = 0;
+
+	/*
+	 * RFC3810 Section 6.2: MLD queries must originate from
+	 * a router's link-local address.
+	 */
+	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+		CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
+		    ip6_sprintf(ip6tbuf, &ip6->ip6_src),
+		    ifp, ifp->if_xname);
+		return (0);
+	}
 
-	CTR2(KTR_MLD, "process v2 query on ifp %p(%s)", ifp, ifp->if_xname);
+	CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
 
 	maxdelay = ntohs(mld->mld_maxdelay);	/* in 1/10ths of a second */
 	if (maxdelay >= 32678) {
-		maxdelay = (MLD_MRC_MANT(mld->mld_maxdelay) | 0x1000) <<
-			   (MLD_MRC_EXP(mld->mld_maxdelay) + 3);
+		maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
+			   (MLD_MRC_EXP(maxdelay) + 3);
 	}
+	timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
 
 	qrv = MLD_QRV(mld->mld_misc);
 	if (qrv < 2) {
@@ -795,10 +857,6 @@ mld_v2_input_query(struct ifnet *ifp, co
 		     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
 	}
 
-	timer = maxdelay * PR_FASTHZ / MLD_TIMER_SCALE;
-	if (timer == 0)
-		timer = 1;
-
 	nsrc = ntohs(mld->mld_numsrc);
 	if (nsrc > MLD_MAX_GS_SOURCES)
 		return (EMSGSIZE);
@@ -806,6 +864,33 @@ mld_v2_input_query(struct ifnet *ifp, co
 	    (nsrc * sizeof(struct in6_addr)))
 		return (EMSGSIZE);
 
+	/*
+	 * Do further input validation upfront to avoid resetting timers
+	 * should we need to discard this query.
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+		/*
+		 * General Queries SHOULD be directed to ff02::1.
+		 * A general query with a source list has undefined
+		 * behaviour; discard it.
+		 */
+		struct in6_addr		 dst;
+
+		dst = ip6->ip6_dst;
+		in6_clearscope(&dst);
+		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
+		    nsrc > 0)
+			return (EINVAL);
+		is_general_query = 1;
+	} else {
+		/*
+		 * Embed scope ID of receiving interface in MLD query for
+		 * lookup whilst we don't hold other locks (due to KAME
+		 * locking lameness). We own this mbuf chain just now.
+		 */
+		in6_setscope(&mld->mld_addr, ifp, NULL);
+	}
+
 	IN6_MULTI_LOCK();
 	MLD_LOCK();
 	IF_ADDR_LOCK(ifp);
@@ -813,8 +898,15 @@ mld_v2_input_query(struct ifnet *ifp, co
 	mli = MLD_IFINFO(ifp);
 	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
 
-	mld_set_version(mli, MLD_VERSION_2);
+	/*
+	 * Discard the v2 query if we're in Compatibility Mode.
+	 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
+	 * until the Old Version Querier Present timer expires.
+	 */
+	if (mli->mli_version != MLD_VERSION_2)
+		goto out_locked;
 
+	mld_set_version(mli, MLD_VERSION_2);
 	mli->mli_rv = qrv;
 	mli->mli_qi = qqi;
 	mli->mli_qri = maxdelay;
@@ -822,39 +914,20 @@ mld_v2_input_query(struct ifnet *ifp, co
 	CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi,
 	    maxdelay);
 
-	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+	if (is_general_query) {
 		/*
 		 * MLDv2 General Query.
 		 *
 		 * Schedule a current-state report on this ifp for
 		 * all groups, possibly containing source lists.
 		 *
-		 * Strip scope ID embedded by ip6_input(). We do not need
-		 * to do this for the MLD payload.
-		 */
-		struct in6_addr dst;
-
-		dst = ip6->ip6_dst;
-		in6_clearscope(&dst);
-		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
-		    nsrc > 0) {
-			/*
-			 * General Queries SHOULD be directed to ff02::1.
-			 * A general query with a source list has undefined
-			 * behaviour; discard it.
-			 */
-			goto out_locked;
-		}
-
-		CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
-		    ifp, ifp->if_xname);
-
-		/*
 		 * If there is a pending General Query response
 		 * scheduled earlier than the selected delay, do
 		 * not schedule any other reports.
 		 * Otherwise, reset the interface timer.
 		 */
+		CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
+		    ifp, ifp->if_xname);
 		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
 			mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
 			V_interface_timers_running6 = 1;
@@ -890,6 +963,9 @@ mld_v2_input_query(struct ifnet *ifp, co
 		 */
 		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
 			mld_v2_process_group_query(inm, mli, timer, m, off);
+
+		/* XXX Clear embedded scope ID as userland won't expect it. */
+		in6_clearscope(&mld->mld_addr);
 	}
 
 out_locked:
@@ -1017,27 +1093,57 @@ mld_v2_process_group_query(struct in6_mu
 /*
  * Process a received MLDv1 host membership report.
  * Assumes mld points to mld_hdr in pulled up mbuf chain.
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
  */
 static int
 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
-    const struct mld_hdr *mld)
+    /*const*/ struct mld_hdr *mld)
 {
+	struct in6_addr		 src, dst;
 	struct in6_ifaddr	*ia;
 	struct in6_multi	*inm;
-	struct in6_addr		 src, dst;
 #ifdef KTR
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+	if (!mld_v1enable) {
+		CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
+		    ip6_sprintf(ip6tbuf, &mld->mld_addr),
+		    ifp, ifp->if_xname);
+		return (0);
+	}
+
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return (0);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list