svn commit: r358572 - head/sys/netinet6

Bjoern A. Zeeb bz at FreeBSD.org
Tue Mar 3 11:32:48 UTC 2020


Author: bz
Date: Tue Mar  3 11:32:47 2020
New Revision: 358572
URL: https://svnweb.freebsd.org/changeset/base/358572

Log:
  ip6_output: use new routing KPI when not passed a cached route
  
  Implement the equivalent of r347375 (IPv4) for the IPv6 output path.
  In IPv6 we get passed a cached route (and inp) by udp6_output()
  depending on whether we acquired a write lock on the INP.
  In case we neither bind nor connect a first UDP packet would come in
  with a cached route (wlocked) and all further packets would not.
  In case we bind and do not connect we never write-lock the inp.
  
  When we do not pass in a cached route, rather than providing the
  storage for a route locally and pass it over the old lookup code
  and down the stack, use the new route lookup KPI and acquire all
  details we need to send the packet.
  
  Compared to the IPv4 code the IPv6 code has a couple of possible
  complications: given an option with a routing hdr/caching route there,
  and path mtu (ro_pmtu) case which now equally has to deal with the
  possibility of having a route which is NULL passed in, and the
  fwd_tag in case a firewall changes the next hop (something to
  factor out in the future).
  
  Sponsored by:	Netflix
  Reviewed by:	glebius
  MFC after:	2 weeks
  Differential Revision:	https://reviews.freebsd.org/D23886

Modified:
  head/sys/netinet6/ip6_output.c

Modified: head/sys/netinet6/ip6_output.c
==============================================================================
--- head/sys/netinet6/ip6_output.c	Tue Mar  3 10:02:58 2020	(r358571)
+++ head/sys/netinet6/ip6_output.c	Tue Mar  3 11:32:47 2020	(r358572)
@@ -424,12 +424,12 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev;
-	int tlen, len;
-	struct route_in6 ip6route;
-	struct rtentry *rt = NULL;
-	struct sockaddr_in6 *dst, src_sa, dst_sa;
+	struct route_in6 *ro_pmtu;
+	struct rtentry *rt;
+	struct sockaddr_in6 *dst, sin6, src_sa, dst_sa;
 	struct in6_addr odst;
 	u_char *nexthdrp;
+	int tlen, len;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
@@ -438,7 +438,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr src0, dst0;
 	u_int32_t zone;
-	struct route_in6 *ro_pmtu = NULL;
 	bool hdrsplit;
 	int sw_csum, tso;
 	int needfiblookup;
@@ -631,15 +630,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
 	IP6STAT_INC(ip6s_localout);
 
 	/* Route packet. */
-	if (ro == NULL) {
-		ro = &ip6route;
-		bzero((caddr_t)ro, sizeof(*ro));
-	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
-	dst = (struct sockaddr_in6 *)&ro->ro_dst;
+	if (ro != NULL)
+		dst = (struct sockaddr_in6 *)&ro->ro_dst;
+	else
+		dst = &sin6;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
+
 again:
 	/*
 	 * If specified, try to fill in the traffic class field.
@@ -666,55 +665,127 @@ again:
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
+
+	if (ro == NULL || ro->ro_rt == NULL) {
+		bzero(dst, sizeof(*dst));
+		dst->sin6_family = AF_INET6;
+		dst->sin6_len = sizeof(*dst);
+		dst->sin6_addr = ip6->ip6_dst;
+	} 
 	/*
-	 * Validate route against routing table additions;
-	 * a better/more specific route might have been added.
+	 * Validate route against routing table changes.
 	 * Make sure that the address family is set in route.
 	 */
-	if (inp) {
-		ro->ro_dst.sin6_family = AF_INET6;
-		RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
-	}
-	if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
-	    ro->ro_dst.sin6_family == AF_INET6 &&
-	    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
-		rt = ro->ro_rt;
-		ifp = ro->ro_rt->rt_ifp;
+	rt = NULL;
+	ifp = NULL;
+	mtu = 0;
+	if (ro != NULL) {
+		if (ro->ro_rt != NULL && inp != NULL) {
+			ro->ro_dst.sin6_family = AF_INET6; /* XXX KASSERT? */
+			RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie,
+			    fibnum);
+		}
+		if (ro->ro_rt != NULL && fwd_tag == NULL &&
+		    ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+		    ro->ro_rt->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
+		    ro->ro_dst.sin6_family != AF_INET6 ||
+		    !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)))
+			RO_INVALIDATE_CACHE(ro);
+
+		if (ro->ro_rt != NULL && fwd_tag == NULL &&
+		    (ro->ro_rt->rt_flags & RTF_UP) &&
+		    ro->ro_dst.sin6_family == AF_INET6 &&
+		    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
+			rt = ro->ro_rt;
+			ifp = ro->ro_rt->rt_ifp;
+		} else {
+			if (ro->ro_lle)
+				LLE_FREE(ro->ro_lle);	/* zeros ro_lle */
+			ro->ro_lle = NULL;
+			if (fwd_tag == NULL) {
+				bzero(&dst_sa, sizeof(dst_sa));
+				dst_sa.sin6_family = AF_INET6;
+				dst_sa.sin6_len = sizeof(dst_sa);
+				dst_sa.sin6_addr = ip6->ip6_dst;
+			}
+			error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
+			    &rt, fibnum);
+			if (error != 0) {
+				IP6STAT_INC(ip6s_noroute);
+				if (ifp != NULL)
+					in6_ifstat_inc(ifp, ifs6_out_discard);
+				goto bad;
+			}
+			if (ifp != NULL)
+			    mtu = ifp->if_mtu;
+		}
+		if (rt == NULL) {
+			/*
+			 * If in6_selectroute() does not return a route entry
+			 * dst may not have been updated.
+			 */
+			*dst = dst_sa;	/* XXX */
+		} else {
+			if (rt->rt_flags & RTF_HOST)
+			    mtu = rt->rt_mtu;
+			ia = (struct in6_ifaddr *)(rt->rt_ifa);
+			counter_u64_add(rt->rt_pksent, 1);
+		}
 	} else {
-		if (ro->ro_lle)
-			LLE_FREE(ro->ro_lle);	/* zeros ro_lle */
-		ro->ro_lle = NULL;
+		struct nhop6_extended nh6;
+		struct in6_addr kdst;
+		uint32_t scopeid;
+
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
-		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
-		    &rt, fibnum);
+
+		if (IN6_IS_ADDR_MULTICAST(&dst_sa.sin6_addr) &&
+		    im6o != NULL &&
+		    (ifp = im6o->im6o_multicast_ifp) != NULL) {
+			/* We do not need a route lookup. */
+			*dst = dst_sa;	/* XXX */
+			goto nonh6lookup;
+		}
+
+		in6_splitscope(&dst_sa.sin6_addr, &kdst, &scopeid);
+
+		if (IN6_IS_ADDR_MC_LINKLOCAL(&dst_sa.sin6_addr) ||
+		    IN6_IS_ADDR_MC_NODELOCAL(&dst_sa.sin6_addr)) {
+			if (scopeid > 0) {
+				ifp = in6_getlinkifnet(scopeid);
+				*dst = dst_sa;	/* XXX */
+				goto nonh6lookup;
+			}
+		}
+
+		error = fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0,
+		    &nh6);
 		if (error != 0) {
-			if (ifp != NULL)
-				in6_ifstat_inc(ifp, ifs6_out_discard);
+			IP6STAT_INC(ip6s_noroute);
+			/* No ifp in6_ifstat_inc(ifp, ifs6_out_discard); */
+			error = EHOSTUNREACH;;
 			goto bad;
 		}
+
+		ifp = nh6.nh_ifp;
+		mtu = nh6.nh_mtu;
+		dst->sin6_addr = nh6.nh_addr;
+		ia = nh6.nh_ia;
+		fib6_free_nh_ext(fibnum, &nh6);
+nonh6lookup:
+		;
 	}
-	if (rt == NULL) {
-		/*
-		 * If in6_selectroute() does not return a route entry
-		 * dst may not have been updated.
-		 */
-		*dst = dst_sa;	/* XXX */
-	}
 
 	/* Then rt (for unicast) and ifp must be non-NULL valid values. */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
-	if (rt != NULL) {
-		ia = (struct in6_ifaddr *)(rt->rt_ifa);
-		counter_u64_add(rt->rt_pksent, 1);
-	}
 
 	/* Setup data structures for scope ID checks. */
 	src0 = ip6->ip6_src;
@@ -869,6 +940,9 @@ again:
 	if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
 		    &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
 		goto bad;
+	KASSERT(mtu > 0, ("%s:%d: mtu %ld, ro_pmtu %p ro %p ifp %p "
+	    "alwaysfrag %d fibnum %u\n", __func__, __LINE__, mtu, ro_pmtu, ro,
+	    ifp, alwaysfrag, fibnum));
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
@@ -972,7 +1046,8 @@ again:
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else {
-			RO_INVALIDATE_CACHE(ro);
+			if (ro != NULL)
+				RO_INVALIDATE_CACHE(ro);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 		}
 	}
@@ -980,7 +1055,8 @@ again:
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
-		RO_INVALIDATE_CACHE(ro);
+		if (ro != NULL)
+			RO_INVALIDATE_CACHE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
@@ -1005,7 +1081,10 @@ again:
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
-		dst = (struct sockaddr_in6 *)&ro->ro_dst;
+		if (ro != NULL)
+			dst = (struct sockaddr_in6 *)&ro->ro_dst;
+		else
+			dst = &sin6;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
@@ -1188,8 +1267,6 @@ sendorfree:
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
-	if (ro == &ip6route)
-		RO_RTFREE(ro);
 	return (error);
 
 freehdrs:
@@ -1405,22 +1482,25 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
-	struct sockaddr_in6 *sa6_dst;
+	struct sockaddr_in6 *sa6_dst, sin6;
 	u_long mtu;
 
 	mtu = 0;
-	if (do_lookup) {
+	if (ro_pmtu == NULL || do_lookup) {
 
 		/*
 		 * Here ro_pmtu has final destination address, while
 		 * ro might represent immediate destination.
 		 * Use ro_pmtu destination since mtu might differ.
 		 */
-		sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
-		if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
-			ro_pmtu->ro_mtu = 0;
+		if (ro_pmtu != NULL) {
+			sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
+			if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
+				ro_pmtu->ro_mtu = 0;
+		} else
+			sa6_dst = &sin6;
 
-		if (ro_pmtu->ro_mtu == 0) {
+		if (ro_pmtu == NULL || ro_pmtu->ro_mtu == 0) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
@@ -1428,14 +1508,16 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
 
 			in6_splitscope(dst, &kdst, &scopeid);
 			if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
-			    &nh6) == 0)
-				ro_pmtu->ro_mtu = nh6.nh_mtu;
-		}
-
-		mtu = ro_pmtu->ro_mtu;
+			    &nh6) == 0) {
+				mtu = nh6.nh_mtu;
+				if (ro_pmtu != NULL)
+					ro_pmtu->ro_mtu = mtu;
+			}
+		} else
+			mtu = ro_pmtu->ro_mtu;
 	}
 
-	if (ro_pmtu->ro_rt)
+	if (ro_pmtu != NULL && ro_pmtu->ro_rt != NULL)
 		mtu = ro_pmtu->ro_rt->rt_mtu;
 
 	return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));


More information about the svn-src-head mailing list