svn commit: r347375 - in head/sys: net netinet

Gleb Smirnoff glebius at FreeBSD.org
Wed May 8 23:39:26 UTC 2019


Author: glebius
Date: Wed May  8 23:39:24 2019
New Revision: 347375
URL: https://svnweb.freebsd.org/changeset/base/347375

Log:
  Existense of PCB route caching doesn't allow us to use new fast route
  lookup KPI in ip_output() like it is already used in ip_forward().
  However, when there is no PCB provided we can use fast KPI, gaining
  performance advantage.
  
  Typical case when ip_output() is called without a PCB pointer is a
  sendto(2) on a not connected UDP socket. In practice DNS servers do
  this.
  
  Reviewed by:	melifaro
  Differential Revision:	https://reviews.freebsd.org/D19804

Modified:
  head/sys/net/route.h
  head/sys/net/route_var.h
  head/sys/netinet/in_fib.c
  head/sys/netinet/in_fib.h
  head/sys/netinet/ip_output.c

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h	Wed May  8 23:24:47 2019	(r347374)
+++ head/sys/net/route.h	Wed May  8 23:39:24 2019	(r347375)
@@ -210,6 +210,7 @@ struct rtentry {
 #define	NHF_DEFAULT		0x0080	/* Default route */
 #define	NHF_BROADCAST		0x0100	/* RTF_BROADCAST */
 #define	NHF_GATEWAY		0x0200	/* RTF_GATEWAY */
+#define	NHF_HOST		0x0400	/* RTF_HOST */
 
 /* Nexthop request flags */
 #define	NHR_IFAIF		0x01	/* Return ifa_ifp interface */

Modified: head/sys/net/route_var.h
==============================================================================
--- head/sys/net/route_var.h	Wed May  8 23:24:47 2019	(r347374)
+++ head/sys/net/route_var.h	Wed May  8 23:39:24 2019	(r347375)
@@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
 	uint16_t res;
 
 	res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+	res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
 	res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
 	res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
 	res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;

Modified: head/sys/netinet/in_fib.c
==============================================================================
--- head/sys/netinet/in_fib.c	Wed May  8 23:24:47 2019	(r347374)
+++ head/sys/netinet/in_fib.c	Wed May  8 23:39:24 2019	(r347375)
@@ -96,7 +96,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
     uint32_t flags, struct nhop4_extended *pnh4)
 {
 	struct sockaddr_in *gw;
-	struct in_ifaddr *ia;
 
 	if ((flags & NHR_IFAIF) != 0)
 		pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
@@ -113,10 +112,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
 	gw = (struct sockaddr_in *)rt_key(rte);
 	if (gw->sin_addr.s_addr == 0)
 		pnh4->nh_flags |= NHF_DEFAULT;
-	/* XXX: Set RTF_BROADCAST if GW address is broadcast */
-
-	ia = ifatoia(rte->rt_ifa);
-	pnh4->nh_src = IA_SIN(ia)->sin_addr;
+	pnh4->nh_ia = ifatoia(rte->rt_ifa);
+	pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
 }
 
 /*

Modified: head/sys/netinet/in_fib.h
==============================================================================
--- head/sys/netinet/in_fib.h	Wed May  8 23:24:47 2019	(r347374)
+++ head/sys/netinet/in_fib.h	Wed May  8 23:39:24 2019	(r347375)
@@ -43,12 +43,13 @@ struct nhop4_basic {
 /* Extended nexthop info used for control protocols */
 struct nhop4_extended {
 	struct ifnet	*nh_ifp;	/* Logical egress interface */
+	struct in_ifaddr *nh_ia;	/* Associated address */
 	uint16_t	nh_mtu;		/* nexthop mtu */
 	uint16_t	nh_flags;	/* nhop flags */
 	uint8_t		spare[4];
 	struct in_addr	nh_addr;	/* GW/DST IPv4 address */
 	struct in_addr	nh_src;		/* default source IPv4 address */
-	uint64_t	spare2[2];
+	uint64_t	spare2;
 };
 
 int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Wed May  8 23:24:47 2019	(r347374)
+++ head/sys/netinet/ip_output.c	Wed May  8 23:39:24 2019	(r347375)
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -227,13 +228,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
 	int hlen = sizeof (struct ip);
 	int mtu;
 	int error = 0;
-	struct sockaddr_in *dst;
+	struct sockaddr_in *dst, sin;
 	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
+	struct in_addr src;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
-	struct route iproute;
-	struct rtentry *rte;	/* cache for ro->ro_rt */
 	uint32_t fibnum;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	int no_route_but_check_spd = 0;
@@ -252,11 +252,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
 #endif
 	}
 
-	if (ro == NULL) {
-		ro = &iproute;
-		bzero(ro, sizeof (*ro));
-	}
-
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
@@ -281,26 +276,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
 	/*
 	 * dst/gw handling:
 	 *
-	 * dst can be rewritten but always points to &ro->ro_dst.
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
-	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
-	rte = ro->ro_rt;
-	if (rte == NULL) {
+	if (ro != NULL)
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+	else
+		dst = &sin;
+	if (ro == NULL || ro->ro_rt == NULL) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
+	gw = dst;
 	NET_EPOCH_ENTER(et);
 again:
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 */
-	if (inp)
+	if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
 		RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
 	/*
 	 * If there is a cached route,
@@ -310,15 +307,12 @@ again:
 	 * cache with IPv6.
 	 * Also check whether routing cache needs invalidation.
 	 */
-	rte = ro->ro_rt;
-	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
-		    rte->rt_ifp == NULL ||
-		    !RT_LINK_IS_UP(rte->rt_ifp) ||
-			  dst->sin_family != AF_INET ||
-			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+	if (ro != NULL && ro->ro_rt != NULL &&
+	    ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+	    ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
+	    dst->sin_family != AF_INET ||
+	    dst->sin_addr.s_addr != ip->ip_dst.s_addr))
 		RO_INVALIDATE_CACHE(ro);
-		rte = NULL;
-	}
 	ia = NULL;
 	/*
 	 * If routing to interface only, short circuit routing lookup.
@@ -338,8 +332,10 @@ again:
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
+		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
+		src = IA_SIN(ia)->sin_addr;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
@@ -350,9 +346,11 @@ again:
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
+		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = ifp->if_flags & IFF_BROADCAST ?
 		    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
+		src = IA_SIN(ia)->sin_addr;
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
@@ -360,15 +358,17 @@ again:
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
+		mtu = ifp->if_mtu;
 		IFP_TO_IA(ifp, ia, &in_ifa_tracker);
 		isbroadcast = 0;	/* fool gcc */
-	} else {
-		/*
-		 * We want to do any cloning requested by the link layer,
-		 * as this is probably required in all cases for correct
-		 * operation (as it is for ARP).
-		 */
-		if (rte == NULL) {
+		src = IA_SIN(ia)->sin_addr;
+	} else if (ro != NULL) {
+		if (ro->ro_rt == NULL) {
+			/*
+			 * We want to do any cloning requested by the link
+			 * layer, as this is probably required in all cases
+			 * for correct operation (as it is for ARP).
+			 */
 #ifdef RADIX_MPATH
 			rtalloc_mpath_fib(ro,
 			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
@@ -376,12 +376,47 @@ again:
 #else
 			in_rtalloc_ign(ro, 0, fibnum);
 #endif
-			rte = ro->ro_rt;
+			if (ro->ro_rt == NULL ||
+			    (ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+			    ro->ro_rt->rt_ifp == NULL ||
+			    !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+				/*
+				 * There is no route for this packet, but it is
+				 * possible that a matching SPD entry exists.
+				 */
+				no_route_but_check_spd = 1;
+				mtu = 0; /* Silence GCC warning. */
+				goto sendit;
+#endif
+				IPSTAT_INC(ips_noroute);
+				error = EHOSTUNREACH;
+				goto bad;
+			}
 		}
-		if (rte == NULL ||
-		    (rte->rt_flags & RTF_UP) == 0 ||
-		    rte->rt_ifp == NULL ||
-		    !RT_LINK_IS_UP(rte->rt_ifp)) {
+		ia = ifatoia(ro->ro_rt->rt_ifa);
+		ifp = ro->ro_rt->rt_ifp;
+		counter_u64_add(ro->ro_rt->rt_pksent, 1);
+		rt_update_ro_flags(ro);
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+		if (ro->ro_rt->rt_flags & RTF_HOST)
+			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+		else if (ifp->if_flags & IFF_BROADCAST)
+			isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+		else
+			isbroadcast = 0;
+		if (ro->ro_rt->rt_flags & RTF_HOST)
+			mtu = ro->ro_rt->rt_mtu;
+		else
+			mtu = ifp->if_mtu;
+		src = IA_SIN(ia)->sin_addr;
+	} else {
+		struct nhop4_extended nh;
+
+		bzero(&nh, sizeof(nh));
+		if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) !=
+		    0) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 			/*
 			 * There is no route for this packet, but it is
@@ -395,31 +430,29 @@ again:
 			error = EHOSTUNREACH;
 			goto bad;
 		}
-		ia = ifatoia(rte->rt_ifa);
-		ifp = rte->rt_ifp;
-		counter_u64_add(rte->rt_pksent, 1);
-		rt_update_ro_flags(ro);
-		if (rte->rt_flags & RTF_GATEWAY)
-			gw = (struct sockaddr_in *)rte->rt_gateway;
-		if (rte->rt_flags & RTF_HOST)
-			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
-		else if (ifp->if_flags & IFF_BROADCAST)
-			isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
-		else
-			isbroadcast = 0;
+		ifp = nh.nh_ifp;
+		mtu = nh.nh_mtu;
+		/*
+		 * We are rewriting here dst to be gw actually, contradicting
+		 * comment at the beginning of the function. However, in this
+		 * case we are always dealing with on stack dst.
+		 * In case if pfil(9) sends us back to beginning of the
+		 * function, the dst would be rewritten by ip_output_pfil().
+		 */
+		MPASS(dst == &sin);
+		dst->sin_addr = nh.nh_addr;
+		ia = nh.nh_ia;
+		src = nh.nh_src;
+		isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
+		    (NHF_HOST | NHF_BROADCAST)) ||
+		    ((ifp->if_flags & IFF_BROADCAST) &&
+		    in_ifaddr_broadcast(dst->sin_addr, ia)));
 	}
 
-	/*
-	 * Calculate MTU.  If we have a route that is up, use that,
-	 * otherwise use the interface's MTU.
-	 */
-	if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
-		mtu = rte->rt_mtu;
-	else
-		mtu = ifp->if_mtu;
 	/* Catch a possible divide by zero later. */
-	KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
-	    __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+	KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
+	    __func__, mtu, ro,
+	    (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
@@ -455,11 +488,8 @@ again:
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
-		if (ip->ip_src.s_addr == INADDR_ANY) {
-			/* Interface may have no addresses. */
-			if (ia != NULL)
-				ip->ip_src = IA_SIN(ia)->sin_addr;
-		}
+		if (ip->ip_src.s_addr == INADDR_ANY)
+			ip->ip_src = src;
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
@@ -522,12 +552,8 @@ again:
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
-	if (ip->ip_src.s_addr == INADDR_ANY) {
-		/* Interface may have no addresses. */
-		if (ia != NULL) {
-			ip->ip_src = IA_SIN(ia)->sin_addr;
-		}
-	}
+	if (ip->ip_src.s_addr == INADDR_ANY)
+		ip->ip_src = src;
 
 	/*
 	 * Look for broadcast address and
@@ -587,9 +613,10 @@ sendit:
 
 		case -1: /* Need to try again */
 			/* Reset everything for a new round */
-			RO_RTFREE(ro);
-			ro->ro_prepend = NULL;
-			rte = NULL;
+			if (ro != NULL) {
+				RO_RTFREE(ro);
+				ro->ro_prepend = NULL;
+			}
 			gw = dst;
 			ip = mtod(m, struct ip *);
 			goto again;
@@ -733,15 +760,6 @@ sendit:
 		IPSTAT_INC(ips_fragmented);
 
 done:
-	if (ro == &iproute)
-		RO_RTFREE(ro);
-	else if (rte == NULL)
-		/*
-		 * If the caller supplied a route but somehow the reference
-		 * to it has been released need to prevent the caller
-		 * calling RTFREE on it again.
-		 */
-		ro->ro_rt = NULL;
 	NET_EPOCH_EXIT(et);
 	return (error);
  bad:


More information about the svn-src-head mailing list