git: 40503b792f7e - main - routing: populate fibs with interface routes after growing net.fibs.

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Thu, 11 Aug 2022 12:51:16 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=40503b792f7ecb9fcb349b2519036f85fa62fb4c

commit 40503b792f7ecb9fcb349b2519036f85fa62fb4c
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-08-07 20:26:19 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2022-08-11 12:48:08 +0000

    routing: populate fibs with interface routes after growing net.fibs.
    
    Currently it is possible to extend number of fibs in runtime, but this
     functionality is of limited use when net.add_addrs_all_fibs is
     non-zero, as the routing tables are created empty.
    
    This change automatically populate newly-created fibs with the kernel-originated
     interface routes (filtered by RTF_PINNED flag) if net.add_addrs_all_fibs
     is set.
    
    ```
    -> sysctl net.add_addr_allfibs=1
    net.add_addr_allfibs: 0 -> 1
    -> sysctl net.fibs
    net.fibs: 2
    -> sysctl net.fibs=3
    net.fibs: 2 -> 3
    
    BEFORE:
    -> setfib 2 netstat -rn
    Routing tables (fib: 2)
    
    AFTER:
    -> setfib 2 netstat -rn
    Routing tables (fib: 2)
    
    Internet:
    Destination        Gateway            Flags     Netif Expire
    10.0.0.0/24        link#1             U        vtnet0
    10.0.0.5           link#1             UHS         lo0
    127.0.0.1          link#2             UH          lo0
    
    Internet6:
    Destination                       Gateway                       Flags     Netif Expire
    ::1                               link#2                        UHS         lo0
    2a01:4f9:3a:fa00::/64             link#1                        U        vtnet0
    2a01:4f9:3a:fa00:5054:ff:fe15:4a3b link#1                       UHS         lo0
    fe80::%vtnet0/64                  link#1                        U        vtnet0
    fe80::5054:ff:fe15:4a3b%vtnet0    link#1                        UHS         lo0
    fe80::%lo0/64                     link#2                        U           lo0
    fe80::1%lo0                       link#2                        UHS         lo0
    ```
    
    Differential Revision: https://reviews.freebsd.org/D36075
    MFC after:      1 month
---
 sys/net/route/nhop.h          |  1 +
 sys/net/route/nhop_ctl.c      | 16 +++++++++--
 sys/net/route/route_ctl.c     | 65 +++++++++++++++++++++++++++++++++++++++++++
 sys/net/route/route_ifaddrs.c | 45 ++++++++++++++++++++++++++++++
 sys/net/route/route_tables.c  | 16 +++++++++++
 sys/net/route/route_var.h     |  9 +++++-
 6 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h
index bd3c3825ed86..ee4f79d2bb47 100644
--- a/sys/net/route/nhop.h
+++ b/sys/net/route/nhop.h
@@ -206,6 +206,7 @@ struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
 int nhop_get_upper_family(const struct nhop_object *nh);
 int nhop_get_neigh_family(const struct nhop_object *nh);
 uint32_t nhop_get_fibnum(const struct nhop_object *nh);
+void nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum);
 uint32_t nhop_get_expire(const struct nhop_object *nh);
 void nhop_set_expire(struct nhop_object *nh, uint32_t expire);
 
diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c
index 824bf12a903d..46a5c7befd65 100644
--- a/sys/net/route/nhop_ctl.c
+++ b/sys/net/route/nhop_ctl.c
@@ -312,14 +312,20 @@ nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
  */
 struct nhop_object *
 nhop_get_nhop(struct nhop_object *nh, int *perror)
+{
+	struct rib_head *rnh = nhop_get_rh(nh);
+
+	return (nhop_get_nhop_internal(rnh, nh, perror));
+}
+
+struct nhop_object *
+nhop_get_nhop_internal(struct rib_head *rnh, struct nhop_object *nh, int *perror)
 {
 	struct nhop_priv *tmp_priv;
 	int error;
 
 	nh->nh_aifp = get_aifp(nh);
 
-	struct rib_head *rnh = nhop_get_rh(nh);
-
 	/* Give the protocols chance to augment nexthop properties */
 	error = rnh->rnh_augment_nh(rnh->rib_fibnum, nh);
 	if (error != 0) {
@@ -893,6 +899,12 @@ nhop_get_fibnum(const struct nhop_object *nh)
 	return (nh->nh_priv->nh_fibnum);
 }
 
+void
+nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum)
+{
+	nh->nh_priv->nh_fibnum = fibnum;
+}
+
 uint32_t
 nhop_get_expire(const struct nhop_object *nh)
 {
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index e2e48302a8c2..35c817cb93c5 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -596,6 +596,71 @@ rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
 	return (0);
 }
 
+/*
+ * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
+ * @rt: route to copy.
+ * @rnd_src: nhop and weight. Multipath routes are not supported
+ * @rh_dst: target rtable.
+ * @rc: operation result storage
+ *
+ * Return 0 on success.
+ */
+int
+rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
+    struct rib_head *rh_dst, struct rib_cmd_info *rc)
+{
+	struct nhop_object *nh_src = rnd_src->rnd_nhop;
+	int error;
+
+	MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+		char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
+		nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
+		rt_print_buf(rt, rtbuf, sizeof(rtbuf));
+		FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
+		    rtbuf, nhbuf, nhop_get_fibnum(nh_src));
+#endif
+	struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
+	if (nh == NULL) {
+		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
+		return (ENOMEM);
+	}
+	nhop_copy(nh, rnd_src->rnd_nhop);
+	nhop_set_fibnum(nh, rh_dst->rib_fibnum);
+	nh = nhop_get_nhop_internal(rh_dst, nh, &error);
+	if (error != 0) {
+		FIB_RH_LOG(LOG_INFO, rh_dst,
+		    "unable to finalize new nexthop: error %d", error);
+		return (ENOMEM);
+	}
+
+	struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
+	if (rt_new == NULL) {
+		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
+		nhop_free(nh);
+		return (ENOMEM);
+	}
+
+	struct route_nhop_data rnd = {
+		.rnd_nhop = nh,
+		.rnd_weight = rnd_src->rnd_weight
+	};
+	int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
+	error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
+
+	if (error != 0) {
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+		char buf[NHOP_PRINT_BUFSIZE];
+		rt_print_buf(rt_new, buf, sizeof(buf));
+		FIB_RH_LOG(LOG_DEBUG, rh_dst, "Unable to add route %s: error %d", buf, error);
+#endif
+		nhop_free(nh);
+		rt_free_immediate(rt_new);
+	}
+	return (error);
+}
+
 /*
  * Adds route defined by @info into the kernel table specified by @fibnum and
  * sa_family in @info->rti_info[RTAX_DST].
diff --git a/sys/net/route/route_ifaddrs.c b/sys/net/route/route_ifaddrs.c
index 15ee13201059..a456ffa28696 100644
--- a/sys/net/route/route_ifaddrs.c
+++ b/sys/net/route/route_ifaddrs.c
@@ -195,4 +195,49 @@ ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 	return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
 }
 
+static bool
+match_kernel_route(const struct rtentry *rt, struct nhop_object *nh)
+{
+	if (!NH_IS_NHGRP(nh) && (nhop_get_rtflags(nh) & RTF_PINNED) &&
+	    nh->nh_aifp->if_fib == nhop_get_fibnum(nh))
+		return (true);
+	return (false);
+}
+
+static int
+pick_kernel_route(struct rtentry *rt, void *arg)
+{
+	struct nhop_object *nh = rt->rt_nhop;
+	struct rib_head *rh_dst = (struct rib_head *)arg;
+
+	if (match_kernel_route(rt, nh)) {
+		struct rib_cmd_info rc = {};
+		struct route_nhop_data rnd = {
+			.rnd_nhop = nh,
+			.rnd_weight = rt->rt_weight,
+		};
+		rib_copy_route(rt, &rnd, rh_dst, &rc);
+	}
+	return (0);
+}
+
+/*
+ * Tries to copy kernel routes matching pattern from @rh_src to @rh_dst.
+ *
+ * Note: as this function acquires locks for both @rh_src and @rh_dst,
+ *  it needs to be called under RTABLES_LOCK() to avoid deadlocking
+ * with multiple ribs.
+ */
+void
+rib_copy_kernel_routes(struct rib_head *rh_src, struct rib_head *rh_dst)
+{
+	struct epoch_tracker et;
+
+	if (V_rt_add_addr_allfibs == 0)
+		return;
+
+	NET_EPOCH_ENTER(et);
+	rib_walk_ext_internal(rh_src, false, pick_kernel_route, NULL, rh_dst);
+	NET_EPOCH_EXIT(et);
+}
 
diff --git a/sys/net/route/route_tables.c b/sys/net/route/route_tables.c
index 54b467f9801f..f5fe4b5d2a81 100644
--- a/sys/net/route/route_tables.c
+++ b/sys/net/route/route_tables.c
@@ -162,6 +162,20 @@ sys_setfib(struct thread *td, struct setfib_args *uap)
 	return (error);
 }
 
+/*
+ * If required, copy interface routes from existing tables to the
+ * newly-created routing table.
+ */
+static void
+populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh)
+{
+	for (int i = 0; i < V_rt_numfibs; i++) {
+		struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family];
+		if ((rh_src != NULL) && (rh_src != rh))
+			rib_copy_kernel_routes(rh_src, rh);
+	}
+}
+
 /*
  * Grows up the number of routing tables in the current fib.
  * Function creates new index array for all rtables and allocates
@@ -214,6 +228,8 @@ grow_rtables(uint32_t num_tables)
 			if (rh == NULL)
 				log(LOG_ERR, "unable to create routing table for %d.%d\n",
 				    dom->dom_family, i);
+			else
+				populate_kernel_routes(new_rt_tables, rh);
 			*prnh = rh;
 		}
 	}
diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h
index 2cc5ffb7b17d..2f21d959387b 100644
--- a/sys/net/route/route_var.h
+++ b/sys/net/route/route_var.h
@@ -78,7 +78,7 @@ struct rib_head {
 };
 
 #define	RIB_RLOCK_TRACKER	struct rm_priotracker _rib_tracker
-#define	RIB_LOCK_INIT(rh)	rm_init(&(rh)->rib_lock, "rib head lock")
+#define	RIB_LOCK_INIT(rh)	rm_init_flags(&(rh)->rib_lock, "rib head lock", RM_DUPOK)
 #define	RIB_LOCK_DESTROY(rh)	rm_destroy(&(rh)->rib_lock)
 #define	RIB_RLOCK(rh)		rm_rlock(&(rh)->rib_lock, &_rib_tracker)
 #define	RIB_RUNLOCK(rh)		rm_runlock(&(rh)->rib_lock, &_rib_tracker)
@@ -221,6 +221,8 @@ struct rtentry *lookup_prefix(struct rib_head *rnh,
     const struct rt_addrinfo *info, struct route_nhop_data *rnd);
 struct rtentry *lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
     struct route_nhop_data *rnd);
+int rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
+    struct rib_head *rh_dst, struct rib_cmd_info *rc);
 
 bool nhop_can_multipath(const struct nhop_object *nh);
 bool match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw);
@@ -239,6 +241,9 @@ struct rtentry *rt_alloc(struct rib_head *rnh, const struct sockaddr *dst,
 void rib_init_subscriptions(struct rib_head *rnh);
 void rib_destroy_subscriptions(struct rib_head *rnh);
 
+/* route_ifaddrs.c */
+void rib_copy_kernel_routes(struct rib_head *rh_src, struct rib_head *rh_dst);
+
 /* Nexhops */
 void nhops_init(void);
 int nhops_init_rib(struct rib_head *rh);
@@ -247,6 +252,8 @@ void nhop_ref_object(struct nhop_object *nh);
 int nhop_try_ref_object(struct nhop_object *nh);
 void nhop_ref_any(struct nhop_object *nh);
 void nhop_free_any(struct nhop_object *nh);
+struct nhop_object *nhop_get_nhop_internal(struct rib_head *rnh,
+    struct nhop_object *nh, int *perror);
 
 
 int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,