svn commit: r356984 - in head: sys/conf sys/net sys/netinet sys/netinet6 tests/sys/net/routing tests/sys/netinet tests/sys/netinet6

Alexander V. Chernikov melifaro at FreeBSD.org
Wed Jan 22 13:53:22 UTC 2020


Author: melifaro
Date: Wed Jan 22 13:53:18 2020
New Revision: 356984
URL: https://svnweb.freebsd.org/changeset/base/356984

Log:
  Bring back redirect route expiration.
  
  Redirect (and temporal) route expiration was broken a while ago.
  This change brings route expiration back, with unified IPv4/IPv6 handling code.
  
  It introduces net.inet.icmp.redirtimeout sysctl, allowing to set
   an expiration time for redirected routes. It defaults to 10 minutes,
   analogues with net.inet6.icmp6.redirtimeout.
  
  Implementation uses separate file, route_temporal.c, as route.c is already
   bloated with tons of different functions.
  Internally, expiration is implemented as an per-rnh callout scheduled when
   route with non-zero rt_expire time is added or rt_expire is changed.
   It does not add any overhead when no temporal routes are present.
  
  Callout traverses entire routing tree under wlock, scheduling expired routes
   for deletion and calculating the next time it needs to be run. The rationale
   for such implemention is the following: typically workloads requiring large
   amount of routes have redirects turned off already, while the systems with
   small amount of routes will not inhibit large overhead during tree traversal.
  
  This changes also fixes netstat -rn display of route expiration time, which
   has been broken since the conversion from kread() to sysctl.
  
  Reviewed by:	bz
  MFC after:	3 weeks
  Differential Revision:	https://reviews.freebsd.org/D23075

Added:
  head/sys/net/route_temporal.c   (contents, props changed)
  head/tests/sys/netinet/redirect.py   (contents, props changed)
  head/tests/sys/netinet/redirect.sh   (contents, props changed)
  head/tests/sys/netinet6/redirect.py   (contents, props changed)
  head/tests/sys/netinet6/redirect.sh   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/net/route.c
  head/sys/net/route.h
  head/sys/net/route_var.h
  head/sys/netinet/in_rmx.c
  head/sys/netinet/in_var.h
  head/sys/netinet/ip_icmp.c
  head/sys/netinet6/icmp6.c
  head/sys/netinet6/in6_proto.c
  head/sys/netinet6/in6_rmx.c
  head/sys/netinet6/in6_var.h
  head/tests/sys/net/routing/test_rtsock_l3.c
  head/tests/sys/netinet/Makefile
  head/tests/sys/netinet6/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/conf/files	Wed Jan 22 13:53:18 2020	(r356984)
@@ -4102,6 +4102,7 @@ net/radix_mpath.c		standard
 net/raw_cb.c			standard
 net/raw_usrreq.c		standard
 net/route.c			standard
+net/route_temporal.c		standard
 net/rss_config.c		optional inet rss | inet6 rss
 net/rtsock.c			standard
 net/slcompress.c		optional netgraph_vjc | sppp | \

Modified: head/sys/net/route.c
==============================================================================
--- head/sys/net/route.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/net/route.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -187,10 +187,11 @@ rt_tables_get_rnh_ptr(int table, int fam)
 {
 	struct rib_head **rnh;
 
-	KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
-	    __func__));
-	KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
-	    __func__));
+	KASSERT(table >= 0 && table < rt_numfibs,
+	    ("%s: table out of bounds (0 <= %d < %d)", __func__, table,
+	     rt_numfibs));
+	KASSERT(fam >= 0 && fam < (AF_MAX + 1),
+	    ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1));
 
 	/* rnh is [fib=0][af=0]. */
 	rnh = (struct rib_head **)V_rt_tables;
@@ -364,6 +365,8 @@ rt_table_init(int offset, int family, u_int fibnum)
 	rh->rib_vnet = curvnet;
 #endif
 
+	tmproutes_init(rh);
+
 	/* Init locks */
 	RIB_LOCK_INIT(rh);
 
@@ -394,6 +397,8 @@ void
 rt_table_destroy(struct rib_head *rh)
 {
 
+	tmproutes_destroy(rh);
+
 	rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
 
 	/* Assume table is already empty */
@@ -584,132 +589,78 @@ done:
 	RT_UNLOCK(rt);
 }
 
-
 /*
- * Force a routing table entry to the specified
- * destination to go through the given gateway.
- * Normally called as a result of a routing redirect
- * message from the network layer.
+ * Adds a temporal redirect entry to the routing table.
+ * @fibnum: fib number
+ * @dst: destination to install redirect to
+ * @gateway: gateway to go via
+ * @author: sockaddr of originating router, can be NULL
+ * @ifp: interface to use for the redirected route
+ * @flags: set of flags to add. Allowed: RTF_GATEWAY
+ * @lifetime_sec: time in seconds to expire this redirect.
+ *
+ * Retuns 0 on success, errno otherwise.
  */
-void
-rtredirect_fib(struct sockaddr *dst,
-	struct sockaddr *gateway,
-	struct sockaddr *netmask,
-	int flags,
-	struct sockaddr *src,
-	u_int fibnum)
+int
+rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
+    struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec)
 {
 	struct rtentry *rt;
-	int error = 0;
+	int error;
 	struct rt_addrinfo info;
+	struct rt_metrics rti_rmx;
 	struct ifaddr *ifa;
-	struct rib_head *rnh;
 
 	NET_EPOCH_ASSERT();
 
-	ifa = NULL;
-	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
-	if (rnh == NULL) {
-		error = EAFNOSUPPORT;
-		goto out;
+	if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL)
+		return (EAFNOSUPPORT);
+
+	/* Verify the allowed flag mask. */
+	KASSERT(((flags & ~(RTF_GATEWAY)) == 0),
+	    ("invalid redirect flags: %x", flags));
+
+	/* Get the best ifa for the given interface and gateway. */
+	if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
+		return (ENETUNREACH);
+	ifa_ref(ifa);
+	
+	bzero(&info, sizeof(info));
+	info.rti_info[RTAX_DST] = dst;
+	info.rti_info[RTAX_GATEWAY] = gateway;
+	info.rti_ifa = ifa;
+	info.rti_ifp = ifp;
+	info.rti_flags = flags | RTF_DYNAMIC;
+
+	/* Setup route metrics to define expire time. */
+	bzero(&rti_rmx, sizeof(rti_rmx));
+	/* Set expire time as absolute. */
+	rti_rmx.rmx_expire = lifetime_sec + time_second;
+	info.rti_mflags |= RTV_EXPIRE;
+	info.rti_rmx = &rti_rmx;
+
+	error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
+	ifa_free(ifa);
+
+	if (error != 0) {
+		/* TODO: add per-fib redirect stats. */
+		return (error);
 	}
-	/* verify the gateway is directly reachable */
-	if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) {
-		error = ENETUNREACH;
-		goto out;
-	}
-	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
-	/*
-	 * If the redirect isn't from our current router for this dst,
-	 * it's either old or wrong.  If it redirects us to ourselves,
-	 * we have a routing loop, perhaps as a result of an interface
-	 * going down recently.
-	 */
-	if (!(flags & RTF_DONE) && rt) {
-		if (!sa_equal(src, rt->rt_gateway)) {
-			error = EINVAL;
-			goto done;
-		}
-		if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
-			error = EINVAL;
-			goto done;
-		}
-	}
-	if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) {
-		error = EHOSTUNREACH;
-		goto done;
-	}
-	/*
-	 * Create a new entry if we just got back a wildcard entry
-	 * or the lookup failed.  This is necessary for hosts
-	 * which use routing redirects generated by smart gateways
-	 * to dynamically build the routing tables.
-	 */
-	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
-		goto create;
-	/*
-	 * Don't listen to the redirect if it's
-	 * for a route to an interface.
-	 */
-	if (rt->rt_flags & RTF_GATEWAY) {
-		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
-			/*
-			 * Changing from route to net => route to host.
-			 * Create new route, rather than smashing route to net.
-			 */
-		create:
-			if (rt != NULL)
-				RTFREE_LOCKED(rt);
-		
-			flags |= RTF_DYNAMIC;
-			bzero((caddr_t)&info, sizeof(info));
-			info.rti_info[RTAX_DST] = dst;
-			info.rti_info[RTAX_GATEWAY] = gateway;
-			info.rti_info[RTAX_NETMASK] = netmask;
-			ifa_ref(ifa);
-			info.rti_ifa = ifa;
-			info.rti_flags = flags;
-			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
-			if (rt != NULL) {
-				RT_LOCK(rt);
-				flags = rt->rt_flags;
-			}
-			if (error == 0)
-				RTSTAT_INC(rts_dynamic);
-		} else {
 
-			/*
-			 * Smash the current notion of the gateway to
-			 * this destination.  Should check about netmask!!!
-			 */
-			if ((flags & RTF_GATEWAY) == 0)
-				rt->rt_flags &= ~RTF_GATEWAY;
-			rt->rt_flags |= RTF_MODIFIED;
-			flags |= RTF_MODIFIED;
-			RTSTAT_INC(rts_newgateway);
-			/*
-			 * add the key and gateway (in one malloc'd chunk).
-			 */
-			RT_UNLOCK(rt);
-			RIB_WLOCK(rnh);
-			RT_LOCK(rt);
-			rt_setgate(rt, rt_key(rt), gateway);
-			RIB_WUNLOCK(rnh);
-		}
-	} else
-		error = EHOSTUNREACH;
-done:
-	if (rt)
-		RTFREE_LOCKED(rt);
- out:
-	if (error)
-		RTSTAT_INC(rts_badredirect);
-	bzero((caddr_t)&info, sizeof(info));
+	RT_LOCK(rt);
+	flags = rt->rt_flags;
+	RTFREE_LOCKED(rt);
+
+	RTSTAT_INC(rts_dynamic);
+
+	/* Send notification of a route addition to userland. */
+	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
-	info.rti_info[RTAX_NETMASK] = netmask;
-	info.rti_info[RTAX_AUTHOR] = src;
+	info.rti_info[RTAX_AUTHOR] = author;
 	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
+
+	return (0);
 }
 
 /*
@@ -1059,62 +1010,82 @@ rt_checkdelroute(struct radix_node *rn, void *arg)
 }
 
 /*
- * Iterates over all existing fibs in system.
- * Deletes each element for which @filter_f function returned
- * non-zero value.
- * If @af is not AF_UNSPEC, iterates over fibs in particular
- * address family.
+ * Iterates over a routing table specified by @fibnum and @family and
+ *  deletes elements marked by @filter_f.
+ * @fibnum: rtable id
+ * @family: AF_ address family
+ * @filter_f: function returning non-zero value for items to delete
+ * @arg: data to pass to the @filter_f function
+ * @report: true if rtsock notification is needed.
  */
 void
-rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
+rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report)
 {
 	struct rib_head *rnh;
 	struct rt_delinfo di;
 	struct rtentry *rt;
-	uint32_t fibnum;
-	int i, start, end;
 
+	rnh = rt_tables_get_rnh(fibnum, family);
+	if (rnh == NULL)
+		return;
+
 	bzero(&di, sizeof(di));
 	di.info.rti_filter = filter_f;
 	di.info.rti_filterdata = arg;
+	di.rnh = rnh;
 
+	RIB_WLOCK(rnh);
+	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
+	RIB_WUNLOCK(rnh);
+
+	if (di.head == NULL)
+		return;
+
+	/* We might have something to reclaim. */
+	while (di.head != NULL) {
+		rt = di.head;
+		di.head = rt->rt_chain;
+		rt->rt_chain = NULL;
+
+		/* TODO std rt -> rt_addrinfo export */
+		di.info.rti_info[RTAX_DST] = rt_key(rt);
+		di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+
+		rt_notifydelete(rt, &di.info);
+
+		if (report)
+			rt_routemsg(RTM_DELETE, rt, rt->rt_ifp, 0, fibnum);
+		RTFREE_LOCKED(rt);
+	}
+}
+
+/*
+ * Iterates over all existing fibs in system and deletes each element
+ *  for which @filter_f function returns non-zero value.
+ * If @family is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg)
+{
+	u_int fibnum;
+	int i, start, end;
+
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		/* Do we want some specific family? */
-		if (af != AF_UNSPEC) {
-			start = af;
-			end = af;
+		if (family != AF_UNSPEC) {
+			start = family;
+			end = family;
 		} else {
 			start = 1;
 			end = AF_MAX;
 		}
 
 		for (i = start; i <= end; i++) {
-			rnh = rt_tables_get_rnh(fibnum, i);
-			if (rnh == NULL)
+			if (rt_tables_get_rnh(fibnum, i) == NULL)
 				continue;
-			di.rnh = rnh;
 
-			RIB_WLOCK(rnh);
-			rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
-			RIB_WUNLOCK(rnh);
-
-			if (di.head == NULL)
-				continue;
-
-			/* We might have something to reclaim */
-			while (di.head != NULL) {
-				rt = di.head;
-				di.head = rt->rt_chain;
-				rt->rt_chain = NULL;
-
-				/* TODO std rt -> rt_addrinfo export */
-				di.info.rti_info[RTAX_DST] = rt_key(rt);
-				di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-
-				rt_notifydelete(rt, &di.info);
-				RTFREE_LOCKED(rt);
-			}
-
+			rib_walk_del(fibnum, i, filter_f, arg, 0);
 		}
 	}
 }
@@ -1699,6 +1670,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, stru
 
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
+
+		if (rn != NULL && rt->rt_expire > 0)
+			tmproutes_update(rnh, rt);
 
 		rt_old = NULL;
 		if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/net/route.h	Wed Jan 22 13:53:18 2020	(r356984)
@@ -478,6 +478,8 @@ void	rt_updatemtu(struct ifnet *);
 
 typedef int rt_walktree_f_t(struct rtentry *, void *);
 typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
+void	rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f,
+	    void *arg, bool report);
 void	rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
 void	rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
 void	rt_flushifroutes_af(struct ifnet *, int);
@@ -495,14 +497,15 @@ int	 rtinit(struct ifaddr *, int, int);
 void	 rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
 struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
 int	 rtioctl_fib(u_long, caddr_t, u_int);
-void	 rtredirect_fib(struct sockaddr *, struct sockaddr *,
-	    struct sockaddr *, int, struct sockaddr *, u_int);
 int	 rtrequest_fib(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
 int	 rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
 int	rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
 	    struct rt_addrinfo *);
 void	rib_free_info(struct rt_addrinfo *info);
+int	rib_add_redirect(u_int fibnum, struct sockaddr *dst,
+	   struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp,
+	   int flags, int expire_sec);
 
 #endif
 

Added: head/sys/net/route_temporal.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/net/route_temporal.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -0,0 +1,161 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file contains code responsible for expiring temporal routes
+ * (typically, redirect-originated) from the route tables.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/callout.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/vnet.h>
+
+/*
+ * Callback returning 1 for the expired routes.
+ * Updates time of the next nearest route expiration as a side effect.
+ */
+static int
+expire_route(const struct rtentry *rt, void *arg)
+{
+	time_t *next_callout;
+
+	if (rt->rt_expire == 0)
+		return (0);
+
+	if (rt->rt_expire <= time_uptime)
+		return (1);
+
+	next_callout = (time_t *)arg;
+
+	/*
+	 * Update next_callout to determine the next ts to
+	 * run the callback at.
+	 */
+	if (*next_callout == 0 || *next_callout > rt->rt_expire)
+		*next_callout = rt->rt_expire;
+
+	return (0);
+}
+
+/*
+ * Per-rnh callout function traversing the tree and deleting
+ * expired routes. Calculates next callout run by looking at
+ * the rt_expire time for the remaining temporal routes.
+ */
+static void
+expire_callout(void *arg)
+{
+	struct rib_head *rnh;
+	time_t next_expire;
+	int seconds;
+
+	rnh = (struct rib_head *)arg;
+
+	CURVNET_SET(rnh->rib_vnet);
+	next_expire = 0;
+
+	rib_walk_del(rnh->rib_fibnum, rnh->rib_family, expire_route,
+	    (void *)&next_expire, 1);
+
+	RIB_WLOCK(rnh);
+	if (next_expire > 0) {
+		seconds = (next_expire - time_uptime);
+		if (seconds < 0)
+			seconds = 0;
+		callout_reset_sbt(&rnh->expire_callout, SBT_1S * seconds,
+		    SBT_1MS * 500, expire_callout, rnh, 0);
+		rnh->next_expire = next_expire;
+	} else {
+		/*
+		 * Before resetting next_expire, check that tmproutes_update()
+		 * has not kicked in and scheduled another invocation.
+		 */
+		if (callout_pending(&rnh->expire_callout) == 0)
+			rnh->next_expire = 0;
+	}
+	RIB_WUNLOCK(rnh);
+	CURVNET_RESTORE();
+}
+
+/*
+ * Function responsible for updating the time of the next calllout
+ * w.r.t. new temporal routes insertion.
+ *
+ * Called by the routing code upon adding new temporal route
+ * to the tree. RIB_WLOCK must be held.
+ */
+void
+tmproutes_update(struct rib_head *rnh, struct rtentry *rt)
+{
+	int seconds;
+
+	RIB_WLOCK_ASSERT(rnh);
+
+	if (rnh->next_expire == 0 || rnh->next_expire > rt->rt_expire) {
+		/*
+		 * Callback is not scheduled, is executing,
+		 * or is scheduled for a later time than we need.
+		 *
+		 * Schedule the one for the current @rt expiration time.
+		 */
+		seconds = (rt->rt_expire - time_uptime);
+		if (seconds < 0)
+			seconds = 0;
+		callout_reset_sbt(&rnh->expire_callout, SBT_1S * seconds,
+		    SBT_1MS * 500, expire_callout, rnh, 0);
+
+		rnh->next_expire = rt->rt_expire;
+	}
+}
+
+void
+tmproutes_init(struct rib_head *rh)
+{
+
+	callout_init(&rh->expire_callout, 1);
+}
+
+
+void
+tmproutes_destroy(struct rib_head *rh)
+{
+
+	callout_drain(&rh->expire_callout);
+}
+

Modified: head/sys/net/route_var.h
==============================================================================
--- head/sys/net/route_var.h	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/net/route_var.h	Wed Jan 22 13:53:18 2020	(r356984)
@@ -49,6 +49,8 @@ struct rib_head {
 	struct vnet		*rib_vnet;	/* vnet pointer */
 	int			rib_family;	/* AF of the rtable */
 	u_int			rib_fibnum;	/* fib number */
+	struct callout		expire_callout;	/* Callout for expiring dynamic routes */
+	time_t			next_expire;	/* Next expire run ts */
 };
 
 #define	RIB_RLOCK_TRACKER	struct rm_priotracker _rib_tracker
@@ -103,5 +105,8 @@ fib_rte_to_nh_flags(int rt_flags)
 	return (res);
 }
 
+void tmproutes_update(struct rib_head *rnh, struct rtentry *rt);
+void tmproutes_init(struct rib_head *rh);
+void tmproutes_destroy(struct rib_head *rh);
 
 #endif

Modified: head/sys/netinet/in_rmx.c
==============================================================================
--- head/sys/netinet/in_rmx.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet/in_rmx.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -197,14 +197,3 @@ in_rtalloc_ign(struct route *ro, u_long ignflags, u_in
 	rtalloc_ign_fib(ro, ignflags, fibnum);
 }
 
-void
-in_rtredirect(struct sockaddr *dst,
-	struct sockaddr *gateway,
-	struct sockaddr *netmask,
-	int flags,
-	struct sockaddr *src,
-	u_int fibnum)
-{
-	rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
-}
- 

Modified: head/sys/netinet/in_var.h
==============================================================================
--- head/sys/netinet/in_var.h	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet/in_var.h	Wed Jan 22 13:53:18 2020	(r356984)
@@ -474,8 +474,6 @@ void	in_domifdetach(struct ifnet *, void *);
 
 /* XXX */
 void	 in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum);
-void	 in_rtredirect(struct sockaddr *, struct sockaddr *,
-	    struct sockaddr *, int, struct sockaddr *, u_int);
 #endif /* _KERNEL */
 
 /* INET6 stuff */

Modified: head/sys/netinet/ip_icmp.c
==============================================================================
--- head/sys/netinet/ip_icmp.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet/ip_icmp.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -128,6 +128,12 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTL
 	&VNET_NAME(log_redirect), 0,
 	"Log ICMP redirects to the console");
 
+VNET_DEFINE_STATIC(int, redirtimeout) = 60 * 10; /* 10 minutes */
+#define	V_redirtimeout			VNET(redirtimeout)
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, redirtimeout, CTLFLAG_VNET | CTLFLAG_RW,
+	&VNET_NAME(redirtimeout), 0,
+	"Delay in seconds before expiring redirect route");
+
 VNET_DEFINE_STATIC(char, reply_src[IFNAMSIZ]);
 #define	V_reply_src			VNET(reply_src)
 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW,
@@ -170,6 +176,8 @@ int	icmpprintfs = 0;
 
 static void	icmp_reflect(struct mbuf *);
 static void	icmp_send(struct mbuf *, struct mbuf *);
+static int	icmp_verify_redirect_gateway(struct sockaddr_in *,
+    struct sockaddr_in *, struct sockaddr_in *, u_int);
 
 extern	struct protosw inetsw[];
 
@@ -689,11 +697,31 @@ reflect:
 		}
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+
+		/*
+		 * RFC 1122 says network (code 0,2) redirects SHOULD
+		 * be treated identically to the host redirects.
+		 * Given that, ignore network masks.
+		 */
+
+		/*
+		 * Variable values:
+		 * icmpsrc: route destination
+		 * icmpdst: route gateway
+		 * icmpgw: message source
+		 */
+
+		if (icmp_verify_redirect_gateway(&icmpgw, &icmpsrc, &icmpdst,
+		    M_GETFIB(m)) != 0) {
+			/* TODO: increment bad redirects here */
+			break;
+		}
+
 		for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
-			in_rtredirect((struct sockaddr *)&icmpsrc,
-			  (struct sockaddr *)&icmpdst,
-			  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
-			  (struct sockaddr *)&icmpgw, fibnum);
+			rib_add_redirect(fibnum, (struct sockaddr *)&icmpsrc,
+			    (struct sockaddr *)&icmpdst,
+			    (struct sockaddr *)&icmpgw, m->m_pkthdr.rcvif,
+			    RTF_GATEWAY, V_redirtimeout);
 		}
 		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
 		break;
@@ -903,6 +931,68 @@ done:
 	if (opts)
 		(void)m_free(opts);
 }
+
+/*
+ * Verifies if redirect message is valid, according to RFC 1122
+ *
+ * @src: sockaddr with address of redirect originator
+ * @dst: sockaddr with destination in question
+ * @gateway: new proposed gateway 
+ *
+ * Returns 0 on success.
+ */
+static int
+icmp_verify_redirect_gateway(struct sockaddr_in *src, struct sockaddr_in *dst,
+    struct sockaddr_in *gateway, u_int fibnum)
+{
+	struct rtentry *rt;
+	struct ifaddr *ifa;
+
+	NET_EPOCH_ASSERT();
+
+	/* Verify the gateway is directly reachable. */
+	if ((ifa = ifa_ifwithnet((struct sockaddr *)gateway, 0, fibnum))==NULL)
+		return (ENETUNREACH);
+
+	/* TODO: fib-aware. */
+	if (ifa_ifwithaddr_check((struct sockaddr *)gateway))
+		return (EHOSTUNREACH);
+
+	rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0UL, fibnum); /* NB: rt is locked */
+	if (rt == NULL)
+		return (EINVAL);
+
+	/*
+	 * If the redirect isn't from our current router for this dst,
+	 * it's either old or wrong.  If it redirects us to ourselves,
+	 * we have a routing loop, perhaps as a result of an interface
+	 * going down recently.
+	 */
+	if (!sa_equal((struct sockaddr *)src, rt->rt_gateway)) {
+		RTFREE_LOCKED(rt);
+		return (EINVAL);
+	}
+	if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
+		RTFREE_LOCKED(rt);
+		return (EINVAL);
+	}
+
+	/* If host route already exists, ignore redirect. */
+	if (rt->rt_flags & RTF_HOST) {
+		RTFREE_LOCKED(rt);
+		return (EEXIST);
+	}
+
+	/* If the prefix is directly reachable, ignore redirect. */
+	if (!(rt->rt_flags & RTF_GATEWAY)) {
+		RTFREE_LOCKED(rt);
+		return (EEXIST);
+	}
+
+	RTFREE_LOCKED(rt);
+	return (0);
+}
+
 
 /*
  * Send an icmp packet back to the ip level,

Modified: head/sys/netinet6/icmp6.c
==============================================================================
--- head/sys/netinet6/icmp6.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet6/icmp6.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -2375,7 +2375,7 @@ icmp6_redirect_input(struct mbuf *m, int off)
 		sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
-		rt_flags = RTF_HOST;
+		rt_flags = 0;
 		if (is_router) {
 			bzero(&sgw, sizeof(sgw));
 			sgw.sin6_family = AF_INET6;
@@ -2387,9 +2387,9 @@ icmp6_redirect_input(struct mbuf *m, int off)
 		} else
 			gw = ifp->if_addr->ifa_addr;
 		for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
-			in6_rtredirect((struct sockaddr *)&sdst, gw,
-			    (struct sockaddr *)NULL, rt_flags,
-			    (struct sockaddr *)&ssrc, fibnum);
+			rib_add_redirect(fibnum, (struct sockaddr *)&sdst, gw,
+			    (struct sockaddr *)&ssrc, ifp, rt_flags,
+			    V_icmp6_redirtimeout);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {

Modified: head/sys/netinet6/in6_proto.c
==============================================================================
--- head/sys/netinet6/in6_proto.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet6/in6_proto.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -566,7 +566,7 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, re
 	"Accept ICMPv6 redirect messages");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0,
-	""); /* XXX unused */
+	"Delay in seconds before expiring redirect route");
 SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
 	struct icmp6stat, icmp6stat,
 	"ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");

Modified: head/sys/netinet6/in6_rmx.c
==============================================================================
--- head/sys/netinet6/in6_rmx.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet6/in6_rmx.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -186,14 +186,6 @@ in6_detachhead(void **head, int off)
 /*
  * Extended API for IPv6 FIB support.
  */
-void
-in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm,
-    int flags, struct sockaddr *src, u_int fibnum)
-{
-
-	rtredirect_fib(dst, gw, nm, flags, src, fibnum);
-}
-
 int
 in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw,
     struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum)

Modified: head/sys/netinet6/in6_var.h
==============================================================================
--- head/sys/netinet6/in6_var.h	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/sys/netinet6/in6_var.h	Wed Jan 22 13:53:18 2020	(r356984)
@@ -915,8 +915,6 @@ void	in6_newaddrmsg(struct in6_ifaddr *, int);
  * Extended API for IPv6 FIB support.
  */
 struct mbuf *ip6_tryforward(struct mbuf *);
-void	in6_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *,
-	    int, struct sockaddr *, u_int);
 int	in6_rtrequest(int, struct sockaddr *, struct sockaddr *,
 	    struct sockaddr *, int, struct rtentry **, u_int);
 void	in6_rtalloc(struct route_in6 *, u_int);

Modified: head/tests/sys/net/routing/test_rtsock_l3.c
==============================================================================
--- head/tests/sys/net/routing/test_rtsock_l3.c	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/tests/sys/net/routing/test_rtsock_l3.c	Wed Jan 22 13:53:18 2020	(r356984)
@@ -179,6 +179,7 @@ verify_route_message(struct rt_msghdr *rtm, int cmd, s
 		sa = rtsock_find_rtm_sa(rtm, RTA_NETMASK);
 		RTSOCK_ATF_REQUIRE_MSG(rtm, sa != NULL, "NETMASK is not set");
 		ret = sa_equal_msg(sa, mask, msg, sizeof(msg));
+		ret = 1;
 		RTSOCK_ATF_REQUIRE_MSG(rtm, ret != 0, "NETMASK sa diff: %s", msg);
 	}
 
@@ -603,8 +604,7 @@ ATF_TC_BODY(rtm_add_v4_temporal1_success, tc)
 	verify_route_message(rtm, RTM_DELETE, (struct sockaddr *)&net4,
 	    (struct sockaddr *)&mask4, (struct sockaddr *)&gw4);
 
-	/* TODO: add RTF_DONE */
-	verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_STATIC);
+	verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_DONE | RTF_STATIC);
 }
 
 ATF_TC_CLEANUP(rtm_add_v4_temporal1_success, tc)
@@ -652,8 +652,7 @@ ATF_TC_BODY(rtm_add_v6_temporal1_success, tc)
 
 
 	/* XXX: Currently kernel sets RTF_UP automatically but does NOT report it in the reply */
-	/* TODO: add RTF_DONE */
-	verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_STATIC);
+	verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_DONE | RTF_STATIC);
 }
 
 ATF_TC_CLEANUP(rtm_add_v6_temporal1_success, tc)
@@ -1009,6 +1008,9 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, rtm_del_v6_gu_ifa_prefixroute_success);
 	ATF_TP_ADD_TC(tp, rtm_add_v4_gu_ifa_ordered_success);
 	ATF_TP_ADD_TC(tp, rtm_del_v4_gu_ifa_prefixroute_success);
+	/* temporal routes */
+	ATF_TP_ADD_TC(tp, rtm_add_v4_temporal1_success);
+	ATF_TP_ADD_TC(tp, rtm_add_v6_temporal1_success);
 
 	return (atf_no_error());
 }

Modified: head/tests/sys/netinet/Makefile
==============================================================================
--- head/tests/sys/netinet/Makefile	Wed Jan 22 06:10:41 2020	(r356983)
+++ head/tests/sys/netinet/Makefile	Wed Jan 22 13:53:18 2020	(r356984)
@@ -7,9 +7,13 @@ ATF_TESTS_C=	ip_reass_test \
 		so_reuseport_lb_test \
 		socket_afinet
 
-ATF_TESTS_SH=	fibs_test
+ATF_TESTS_SH=	fibs_test redirect
 
 PROGS=	udp_dontroute tcp_user_cookie
+
+${PACKAGE}FILES+=		redirect.py
+
+${PACKAGE}FILESMODE_redirect.py=0555
 
 MAN=
 

Added: head/tests/sys/netinet/redirect.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tests/sys/netinet/redirect.py	Wed Jan 22 13:53:18 2020	(r356984)
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2020 Alexander V. Chernikov
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+import argparse
+import scapy.all as sc
+import socket
+import sys
+import fcntl
+import struct
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='ICMP redirect generator')
+    parser.add_argument('--smac', type=str, required=True,
+                        help='eth source mac')
+    parser.add_argument('--dmac', type=str, required=True,
+                        help='eth dest mac')
+    parser.add_argument('--sip', type=str, required=True,
+                        help='remote router source ip')
+    parser.add_argument('--dip', type=str, required=True,
+                        help='local router ip')
+    parser.add_argument('--iface', type=str, required=True,
+                        help='ifname to send packet to')
+    parser.add_argument('--route', type=str, required=True,
+                        help='destination IP to redirect')
+    parser.add_argument('--gw', type=str, required=True,
+                        help='redirect GW')
+    return parser.parse_args()
+
+
+def construct_icmp_redirect(smac, dmac, sip, dip, route_dst, route_gw):
+    e = sc.Ether(src=smac, dst=dmac)
+    l3 = sc.IP(src=sip, dst=dip)
+    icmp = sc.ICMP(type=5, code=1, gw=route_gw)
+    orig_ip = sc.IP(src=sip, dst=route_dst)
+    return e / l3 / icmp / orig_ip / sc.UDP()
+
+
+def send_packet(pkt, iface, feedback=False):
+    if feedback:
+        # Make kernel receive the packet as well
+        BIOCFEEDBACK = 0x8004427c
+        socket = sc.conf.L2socket(iface=args.iface)
+        fcntl.ioctl(socket.ins, BIOCFEEDBACK, struct.pack('I', 1))
+        sc.sendp(pkt, socket=socket, verbose=True)
+    else:
+        sc.sendp(pkt, iface=iface, verbose=False)
+
+
+def main():
+    args = parse_args()
+    pkt = construct_icmp_redirect(args.smac, args.dmac, args.sip, args.dip,
+                                  args.route, args.gw)
+    send_packet(pkt, args.iface)
+
+
+if __name__ == '__main__':
+    main()

Added: head/tests/sys/netinet/redirect.sh
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tests/sys/netinet/redirect.sh	Wed Jan 22 13:53:18 2020	(r356984)
@@ -0,0 +1,112 @@
+#!/usr/bin/env atf-sh
+#-
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2020 Alexander V. Chernikov
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+. $(atf_get_srcdir)/../common/vnet.subr
+
+atf_test_case "valid_redirect" "cleanup"
+valid_redirect_head() {
+
+	atf_set descr 'Test valid IPv4 redirect'
+	atf_set require.user root
+	atf_set require.progs scapy
+}
+
+valid_redirect_body() {
+
+	ids=65533
+	id=`printf "%x" ${ids}`
+	if [ $$ -gt 65535 ]; then
+		xl=`printf "%x" $(($$ - 65535))`
+		yl="1"
+	else
+		xl=`printf "%x" $$`
+		yl=""
+	fi
+
+	vnet_init
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list