svn commit: r290471 - head/sys/netinet6

Adrian Chadd adrian at FreeBSD.org
Fri Nov 6 23:07:46 UTC 2015


Author: adrian
Date: Fri Nov  6 23:07:43 2015
New Revision: 290471
URL: https://svnweb.freebsd.org/changeset/base/290471

Log:
  [netinet6]: Create a new IPv6 netisr which expects the frames to have been verified.
  
  This is required for fragments and encapsulated data (eg tunneling) to be redistributed
  to the RSS bucket based on the eventual IPv6 header and protocol (TCP, UDP, etc) header.
  
  * Add an mbuf tag with the state of IPv6 options parsing before the frame is queued
    into the direct dispatch handler;
  * Continue processing and complete the frame reception in the correct RSS bucket /
    netisr context.
  
  Testing results are in the phabricator review.
  
  Differential Revision:	https://reviews.freebsd.org/D3563
  Submitted by:	Tiwei Bie <btw at mail.ustc.edu.cn>

Modified:
  head/sys/netinet6/frag6.c
  head/sys/netinet6/in6.h
  head/sys/netinet6/in6_rss.c
  head/sys/netinet6/ip6_input.c
  head/sys/netinet6/ip6_var.h

Modified: head/sys/netinet6/frag6.c
==============================================================================
--- head/sys/netinet6/frag6.c	Fri Nov  6 22:24:41 2015	(r290470)
+++ head/sys/netinet6/frag6.c	Fri Nov  6 23:07:43 2015	(r290471)
@@ -32,6 +32,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_rss.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
@@ -46,6 +48,7 @@ __FBSDID("$FreeBSD$");
 
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -159,6 +162,11 @@ frag6_input(struct mbuf **mp, int *offp,
 	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
 	struct ifnet *dstifp;
 	u_int8_t ecn, ecn0;
+#ifdef RSS
+	struct m_tag *mtag;
+	struct ip6_direct_ctx *ip6dc;
+#endif
+
 #if 0
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
@@ -577,9 +585,31 @@ insert:
 		m->m_pkthdr.len = plen;
 	}
 
+#ifdef RSS
+	mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
+	    M_NOWAIT);
+	if (mtag == NULL)
+		goto dropfrag;
+
+	ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+	ip6dc->ip6dc_nxt = nxt;
+	ip6dc->ip6dc_off = offset;
+
+	m_tag_prepend(m, mtag);
+#endif
+
+	IP6Q_UNLOCK();
 	IP6STAT_INC(ip6s_reassembled);
 	in6_ifstat_inc(dstifp, ifs6_reass_ok);
 
+#ifdef RSS
+	/*
+	 * Queue/dispatch for reprocessing.
+	 */
+	netisr_dispatch(NETISR_IPV6_DIRECT, m);
+	return IPPROTO_DONE;
+#endif
+
 	/*
 	 * Tell launch routine the next header
 	 */
@@ -587,7 +617,6 @@ insert:
 	*mp = m;
 	*offp = offset;
 
-	IP6Q_UNLOCK();
 	return nxt;
 
  dropfrag:

Modified: head/sys/netinet6/in6.h
==============================================================================
--- head/sys/netinet6/in6.h	Fri Nov  6 22:24:41 2015	(r290470)
+++ head/sys/netinet6/in6.h	Fri Nov  6 23:07:43 2015	(r290471)
@@ -382,6 +382,11 @@ struct route_in6 {
 };
 #endif
 
+#ifdef _KERNEL
+#define MTAG_ABI_IPV6		1444287380	/* IPv6 ABI */
+#define IPV6_TAG_DIRECT		0		/* direct-dispatch IPv6 */
+#endif /* _KERNEL */
+
 /*
  * Options for use with [gs]etsockopt at the IPV6 level.
  * First word of comment is data type; bool is stored in int.

Modified: head/sys/netinet6/in6_rss.c
==============================================================================
--- head/sys/netinet6/in6_rss.c	Fri Nov  6 22:24:41 2015	(r290470)
+++ head/sys/netinet6/in6_rss.c	Fri Nov  6 23:07:43 2015	(r290471)
@@ -172,6 +172,7 @@ rss_mbuf_software_hash_v6(const struct m
     uint32_t *hashtype)
 {
 	const struct ip6_hdr *ip6;
+	const struct ip6_frag *ip6f;
 	const struct tcphdr *th;
 	const struct udphdr *uh;
 	uint32_t flowtype;
@@ -222,6 +223,26 @@ rss_mbuf_software_hash_v6(const struct m
 	}
 
 	/*
+	 * Ignore the fragment header if this is an "atomic" fragment
+	 * (offset and m bit set to 0)
+	 */
+	if (proto == IPPROTO_FRAGMENT) {
+		if (m->m_len < off + sizeof(struct ip6_frag)) {
+			RSS_DEBUG("short fragment frame?\n");
+			return (-1);
+		}
+		ip6f = (const struct ip6_frag *)((c_caddr_t)ip6 + off);
+		if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
+			off = ip6_lasthdr(m, off, proto, &nxt);
+			if (off < 0) {
+				RSS_DEBUG("invalid extension header\n");
+				return (-1);
+			}
+			proto = nxt;
+		}
+	}
+
+	/*
 	 * If the mbuf flowid/flowtype matches the packet type,
 	 * and we don't support the 4-tuple version of the given protocol,
 	 * then signal to the owner that it can trust the flowid/flowtype

Modified: head/sys/netinet6/ip6_input.c
==============================================================================
--- head/sys/netinet6/ip6_input.c	Fri Nov  6 22:24:41 2015	(r290470)
+++ head/sys/netinet6/ip6_input.c	Fri Nov  6 23:07:43 2015	(r290471)
@@ -144,6 +144,17 @@ static struct netisr_handler ip6_nh = {
 #endif
 };
 
+#ifdef RSS
+static struct netisr_handler ip6_direct_nh = {
+	.nh_name = "ip6_direct",
+	.nh_handler = ip6_direct_input,
+	.nh_proto = NETISR_IPV6_DIRECT,
+	.nh_m2cpuid = rss_soft_m2cpuid_v6,
+	.nh_policy = NETISR_POLICY_CPU,
+	.nh_dispatch = NETISR_DISPATCH_HYBRID,
+};
+#endif
+
 VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch);
 #define	V_in6_tmpaddrtimer_ch		VNET(in6_tmpaddrtimer_ch)
 
@@ -222,6 +233,9 @@ ip6_init(void)
 		}
 
 	netisr_register(&ip6_nh);
+#ifdef RSS
+	netisr_register(&ip6_direct_nh);
+#endif
 }
 
 /*
@@ -403,6 +417,66 @@ out:
 	return (1);
 }
 
+#ifdef RSS
+/*
+ * IPv6 direct input routine.
+ *
+ * This is called when reinjecting completed fragments where
+ * all of the previous checking and book-keeping has been done.
+ */
+void
+ip6_direct_input(struct mbuf *m)
+{
+	int off, nxt;
+	int nest;
+	struct m_tag *mtag;
+	struct ip6_direct_ctx *ip6dc;
+
+	mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL);
+	KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!"));
+
+	ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+	nxt = ip6dc->ip6dc_nxt;
+	off = ip6dc->ip6dc_off;
+
+	nest = 0;
+
+	m_tag_delete(m, mtag);
+
+	while (nxt != IPPROTO_DONE) {
+		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
+			IP6STAT_INC(ip6s_toomanyhdr);
+			goto bad;
+		}
+
+		/*
+		 * protection against faulty packet - there should be
+		 * more sanity checks in header chain processing.
+		 */
+		if (m->m_pkthdr.len < off) {
+			IP6STAT_INC(ip6s_tooshort);
+			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
+			goto bad;
+		}
+
+#ifdef IPSEC
+		/*
+		 * enforce IPsec policy checking if we are seeing last header.
+		 * note that we do not visit this with protocols with pcb layer
+		 * code - like udp/tcp/raw ip.
+		 */
+		if (ip6_ipsec_input(m, nxt))
+			goto bad;
+#endif /* IPSEC */
+
+		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
+	}
+	return;
+bad:
+	m_freem(m);
+}
+#endif
+
 void
 ip6_input(struct mbuf *m)
 {
@@ -713,6 +787,13 @@ passin:
 		nxt = ip6->ip6_nxt;
 
 	/*
+	 * Use mbuf flags to propagate Router Alert option to
+	 * ICMPv6 layer, as hop-by-hop options have been stripped.
+	 */
+	if (rtalert != ~0)
+		m->m_flags |= M_RTALERT_MLD;
+
+	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
@@ -809,13 +890,6 @@ passin:
 			goto bad;
 #endif /* IPSEC */
 
-		/*
-		 * Use mbuf flags to propagate Router Alert option to
-		 * ICMPv6 layer, as hop-by-hop options have been stripped.
-		 */
-		if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
-			m->m_flags |= M_RTALERT_MLD;
-
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;

Modified: head/sys/netinet6/ip6_var.h
==============================================================================
--- head/sys/netinet6/ip6_var.h	Fri Nov  6 22:24:41 2015	(r290470)
+++ head/sys/netinet6/ip6_var.h	Fri Nov  6 23:07:43 2015	(r290471)
@@ -99,6 +99,14 @@ struct	ip6asfrag {
 #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
 
 /*
+ * IP6 reinjecting structure.
+ */
+struct ip6_direct_ctx {
+	uint32_t	ip6dc_nxt;	/* next header to process */
+	uint32_t	ip6dc_off;	/* offset to next header */
+};
+
+/*
  * Structure attached to inpcb.in6p_moptions and
  * passed to ip6_output when IPv6 multicast options are in use.
  * This structure is lazy-allocated.
@@ -353,6 +361,7 @@ int	ip6proto_register(short);
 int	ip6proto_unregister(short);
 
 void	ip6_input(struct mbuf *);
+void	ip6_direct_input(struct mbuf *);
 void	ip6_freepcbopts(struct ip6_pktopts *);
 
 int	ip6_unknown_opt(u_int8_t *, struct mbuf *, int);


More information about the svn-src-all mailing list