git: 0d60e88b41fe - main - routing: refactor control cmds #1

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Wed, 10 Aug 2022 18:20:29 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=0d60e88b41fe1c090f9b28ea133d0787827f84fc

commit 0d60e88b41fe1c090f9b28ea133d0787827f84fc
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-08-02 12:44:20 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2022-08-10 18:20:20 +0000

    routing: refactor control cmds #1
    
    This and the follow-up routing-related changes target to remove or
     reduce `struct rt_addrinfo` usage and use recently-landed nhop(9)
     KPI instead.
    Traditionally `rt_addrinfo` structure has been used to propagate all necessary
    information between the protocol/rtsock and a routing layer. Many
    functions inside routing subsystem uses it internally. However, using
    this structure became somewhat complicated, as there are too many ways
    of specifying a single state and verifying data consistency is hard.
    For example, arerouting flgs consistent with mask/gateway sockaddr pointers?
    Is mask really a host mask? Are sockaddr "valid" (e.g. properly zeroed, masked,
    have proper length)? Are they mutable? Is the suggested interface specified
     by the interface index embedded into the sockadd_dl gateway, or passed
     as RTAX_IFP parameter, or directly provided by rti_ifp or it needs to
     be derived from the ifa?
    These (and other similar) questions have to be considered every time when
     a function has `rt_addrinfo` pointer as an argument.
    
    The new approach is to bring more control back to the protocols and
    construct the desired routing objects themselves - in the end, it's the
    protocol/subsystem who knows the desired outcome.
    
    This specific diff changes the following:
    * add explicit basic low-level radix operations:
     add_route() (renamed from add_route_nhop())
     delete_route() (factored from change_route_nhop())
     change_route() (renamed from change_route_nhop)
    * remove "info" parameter from change_route_conditional() as a part
     of reducing rt_addrinfo usage in the internal KPIs
    * add lookup_prefix_rt() wrapper for doing re-lookups after
     RIB lock/unlock
    
    Differential Revision: https://reviews.freebsd.org/D36070
    MFC after:      2 weeks
---
 sys/net/route/mpath_ctl.c |   7 +-
 sys/net/route/route_ctl.c | 220 ++++++++++++++++++++++------------------------
 sys/net/route/route_var.h |   8 +-
 3 files changed, 113 insertions(+), 122 deletions(-)

diff --git a/sys/net/route/mpath_ctl.c b/sys/net/route/mpath_ctl.c
index db3c757e818d..b3f716875fa9 100644
--- a/sys/net/route/mpath_ctl.c
+++ b/sys/net/route/mpath_ctl.c
@@ -117,13 +117,12 @@ add_route_mpath(struct rib_head *rnh, struct rt_addrinfo *info,
 			 * Refresh @rnd_orig data and retry.
 			 */
 			RIB_RLOCK(rnh);
-			lookup_prefix(rnh, info, rnd_orig);
+			lookup_prefix_rt(rnh, rt, rnd_orig);
 			RIB_RUNLOCK(rnh);
 			continue;
 		}
 
-		error = change_route_conditional(rnh, rt, info, rnd_orig,
-		    &rnd_new, rc);
+		error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
 		if (error != EAGAIN)
 			break;
 		RTSTAT_INC(rts_add_retry);
@@ -188,7 +187,7 @@ del_route_mpath(struct rib_head *rh, struct rt_addrinfo *info,
 			nhop_free_any(rnd.rnd_nhop);
 			return (ESRCH);
 		}
-		error = change_route_nhop(rh, rt, &rnd, rc);
+		error = change_route(rh, rt, &rnd, rc);
 	}
 	return (error);
 }
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index 394260e1421c..133f9d34d854 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -77,15 +77,16 @@ struct rib_subscription {
 	struct epoch_context			epoch_ctx;
 };
 
-static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
+static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
-static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
-    struct route_nhop_data *rnd, struct rib_cmd_info *rc);
-static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
+static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
     struct rib_cmd_info *rc);
-static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
 
+static int add_route(struct rib_head *rnh, struct rtentry *rt,
+    struct route_nhop_data *rnd, struct rib_cmd_info *rc);
+static int delete_route(struct rib_head *rnh, struct rtentry *rt,
+    struct rib_cmd_info *rc);
 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
 
@@ -519,8 +520,7 @@ lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
 
 	RIB_LOCK_ASSERT(rnh);
 
-	rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst),
-	    __DECONST(void *, netmask), &rnh->head);
+	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
 	if (rt != NULL) {
 		rnd->rnd_nhop = rt->rt_nhop;
 		rnd->rnd_weight = rt->rt_weight;
@@ -532,6 +532,13 @@ lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
 	return (rt);
 }
 
+struct rtentry *
+lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
+    struct route_nhop_data *rnd)
+{
+	return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
+}
+
 /*
  * Runs exact prefix match based on dst/netmask from @info.
  * Assumes RIB lock is held.
@@ -583,7 +590,7 @@ rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
 	bzero(rc, sizeof(struct rib_cmd_info));
 	rc->rc_cmd = RTM_ADD;
 
-	error = add_route(rnh, info, rc);
+	error = add_route_byinfo(rnh, info, rc);
 	if (error == 0)
 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 
@@ -701,7 +708,7 @@ create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
 }
 
 static int
-add_route(struct rib_head *rnh, struct rt_addrinfo *info,
+add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	struct nhop_object *nh_orig;
@@ -719,7 +726,7 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
 	nh = rt->rt_nhop;
 
 	RIB_WLOCK(rnh);
-	error = add_route_nhop(rnh, rt, &rnd_add, rc);
+	error = add_route(rnh, rt, &rnd_add, rc);
 	if (error == 0) {
 		RIB_WUNLOCK(rnh);
 		return (0);
@@ -740,7 +747,7 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
 	/* Check if new route has higher preference */
 	if (can_override_nhop(info, nh_orig) > 0) {
 		/* Update nexthop to the new route */
-		change_route_nhop(rnh, rt_orig, &rnd_add, rc);
+		change_route(rnh, rt_orig, &rnd_add, rc);
 		RIB_WUNLOCK(rnh);
 		uma_zfree(V_rtzone, rt);
 		nhop_free(nh_orig);
@@ -808,10 +815,32 @@ rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc
 		rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask);
 		info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst;
 	}
-	error = del_route(rnh, info, rc);
+
+	RIB_WLOCK(rnh);
+	error = rt_unlinkrte(rnh, info, rc);
+	RIB_WUNLOCK(rnh);
+
 	info->rti_info[RTAX_DST] = dst_orig;
 
-	return (error);
+	if (error != 0)
+		return (error);
+
+	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
+
+	if (rc->rc_cmd == RTM_DELETE)
+		rtfree(rc->rc_rt);
+#ifdef ROUTE_MPATH
+	else {
+		/*
+		 * Deleting 1 path may result in RTM_CHANGE to
+		 * a different mpath group/nhop.
+		 * Free old mpath group.
+		 */
+		nhop_free_any(rc->rc_nh_old);
+	}
+#endif
+
+	return (0);
 }
 
 /*
@@ -827,7 +856,6 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info
 {
 	struct rtentry *rt;
 	struct nhop_object *nh;
-	struct radix_node *rn;
 	struct route_nhop_data rnd;
 	int error;
 
@@ -850,65 +878,7 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info
 	if (can_override_nhop(info, nh) < 0)
 		return (EADDRINUSE);
 
-	/*
-	 * Remove the item from the tree and return it.
-	 * Complain if it is not there and do no more processing.
-	 */
-	rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
-	if (rn == NULL)
-		return (ESRCH);
-
-	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
-		panic ("rtrequest delete");
-
-	rt = RNTORT(rn);
-	rt->rte_flags &= ~RTF_UP;
-
-	/* Finalize notification */
-	rib_bump_gen(rnh);
-	rnh->rnh_prefixes--;
-
-	rc->rc_cmd = RTM_DELETE;
-	rc->rc_rt = rt;
-	rc->rc_nh_old = rt->rt_nhop;
-	rc->rc_nh_weight = rt->rt_weight;
-	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
-
-	return (0);
-}
-
-static int
-del_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct rib_cmd_info *rc)
-{
-	int error;
-
-	RIB_WLOCK(rnh);
-	error = rt_unlinkrte(rnh, info, rc);
-	RIB_WUNLOCK(rnh);
-	if (error != 0)
-		return (error);
-
-	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
-
-	/*
-	 * If the caller wants it, then it can have it,
-	 * the entry will be deleted after the end of the current epoch.
-	 */
-	if (rc->rc_cmd == RTM_DELETE)
-		rtfree(rc->rc_rt);
-#ifdef ROUTE_MPATH
-	else {
-		/*
-		 * Deleting 1 path may result in RTM_CHANGE to
-		 * a different mpath group/nhop.
-		 * Free old mpath group.
-		 */
-		nhop_free_any(rc->rc_nh_old);
-	}
-#endif
-
-	return (0);
+	return (delete_route(rnh, rt, rc));
 }
 
 int
@@ -965,7 +935,7 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
 	RIB_RUNLOCK(rnh);
 
 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
-		error = change_route(rnh, info, &rnd_orig, rc);
+		error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
 		if (error != EAGAIN)
 			break;
 	}
@@ -1005,8 +975,9 @@ change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
 
 #ifdef ROUTE_MPATH
 static int
-change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
+change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
+    struct rib_cmd_info *rc)
 {
 	int error = 0, found_idx = 0;
 	struct nhop_object *nh_orig = NULL, *nh_new;
@@ -1049,15 +1020,16 @@ change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info,
 	if (error != 0)
 		return (error);
 
-	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
+	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
 
 	return (error);
 }
 #endif
 
 static int
-change_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
+change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
+    struct rib_cmd_info *rc)
 {
 	int error = 0;
 	struct nhop_object *nh_orig;
@@ -1069,14 +1041,14 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
 
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(nh_orig))
-		return (change_mpath_route(rnh, info, rnd_orig, rc));
+		return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
 #endif
 
 	rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
 	error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
 	if (error != 0)
 		return (error);
-	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
+	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
 
 	return (error);
 }
@@ -1086,11 +1058,10 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
  * Returns 0 on success and stores operation results in @rc.
  */
 static int
-add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+add_route(struct rib_head *rnh, struct rtentry *rt,
     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
 {
 	struct radix_node *rn;
-	int error = 0;
 
 	RIB_WLOCK_ASSERT(rnh);
 
@@ -1113,12 +1084,42 @@ add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
 		rc->rc_nh_weight = rnd->rnd_weight;
 
 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
-	} else {
-		/* Existing route or memory allocation failure */
-		error = EEXIST;
+		return (0);
 	}
 
-	return (error);
+	/* Existing route or memory allocation failure. */
+	return (EEXIST);
+}
+
+/*
+ * Unconditionally deletes @rt from @rnh.
+ */
+static int
+delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
+{
+	RIB_WLOCK_ASSERT(rnh);
+
+	/* Route deletion requested. */
+	struct radix_node *rn;
+
+	rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
+	if (rn == NULL)
+		return (ESRCH);
+	rt = RNTORT(rn);
+	rt->rte_flags &= ~RTF_UP;
+
+	rib_bump_gen(rnh);
+	rnh->rnh_prefixes--;
+
+	rc->rc_cmd = RTM_DELETE;
+	rc->rc_rt = rt;
+	rc->rc_nh_old = rt->rt_nhop;
+	rc->rc_nh_new = NULL;
+	rc->rc_nh_weight = rt->rt_weight;
+
+	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
+
+	return (0);
 }
 
 /*
@@ -1126,7 +1127,7 @@ add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
  * Returns 0 on success.
  */
 int
-change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+change_route(struct rib_head *rnh, struct rtentry *rt,
     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
 {
 	struct nhop_object *nh_orig;
@@ -1135,29 +1136,18 @@ change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
 
 	nh_orig = rt->rt_nhop;
 
-	if (rnd->rnd_nhop != NULL) {
-		/* Changing nexthop & weight to a new one */
-		rt->rt_nhop = rnd->rnd_nhop;
-		rt->rt_weight = rnd->rnd_weight;
-		if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
-			tmproutes_update(rnh, rt, rnd->rnd_nhop);
-	} else {
-		/* Route deletion requested. */
-		struct radix_node *rn;
-
-		rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
-		if (rn == NULL)
-			return (ESRCH);
-		rt = RNTORT(rn);
-		rt->rte_flags &= ~RTF_UP;
-	}
+	if (rnd->rnd_nhop == NULL)
+		return (delete_route(rnh, rt, rc));
+
+	/* Changing nexthop & weight to a new one */
+	rt->rt_nhop = rnd->rnd_nhop;
+	rt->rt_weight = rnd->rnd_weight;
+	if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
+		tmproutes_update(rnh, rt, rnd->rnd_nhop);
 
 	/* Finalize notification */
 	rib_bump_gen(rnh);
-	if (rnd->rnd_nhop == NULL)
-		rnh->rnh_prefixes--;
-
-	rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
+	rc->rc_cmd = RTM_CHANGE;
 	rc->rc_rt = rt;
 	rc->rc_nh_old = nh_orig;
 	rc->rc_nh_new = rnd->rnd_nhop;
@@ -1175,8 +1165,8 @@ change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
  */
 int
 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
-    struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
-    struct route_nhop_data *rnd_new, struct rib_cmd_info *rc)
+    struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
+    struct rib_cmd_info *rc)
 {
 	struct rtentry *rt_new;
 	int error = 0;
@@ -1192,12 +1182,12 @@ change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
 #endif
 	RIB_WLOCK(rnh);
 
-	rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
-	    info->rti_info[RTAX_NETMASK], &rnh->head);
+	struct route_nhop_data rnd;
+	rt_new = lookup_prefix_rt(rnh, rt, &rnd);
 
 	if (rt_new == NULL) {
 		if (rnd_orig->rnd_nhop == NULL)
-			error = add_route_nhop(rnh, rt, rnd_new, rc);
+			error = add_route(rnh, rt, rnd_new, rc);
 		else {
 			/*
 			 * Prefix does not exist, which was not our assumption.
@@ -1214,7 +1204,7 @@ change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
 			 * Nhop/mpath group hasn't changed. Flip
 			 * to the new precalculated one and return
 			 */
-			error = change_route_nhop(rnh, rt_new, rnd_new, rc);
+			error = change_route(rnh, rt_new, rnd_new, rc);
 		} else {
 			/* Update and retry */
 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h
index 403e432ea836..12bf1b4093cb 100644
--- a/sys/net/route/route_var.h
+++ b/sys/net/route/route_var.h
@@ -212,13 +212,15 @@ void tmproutes_destroy(struct rib_head *rh);
 
 /* route_ctl.c */
 struct route_nhop_data;
-int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+int change_route(struct rib_head *rnh, struct rtentry *rt,
     struct route_nhop_data *rnd, struct rib_cmd_info *rc);
 int change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
-    struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
-    struct route_nhop_data *nhd_new, struct rib_cmd_info *rc);
+    struct route_nhop_data *nhd_orig, struct route_nhop_data *nhd_new,
+    struct rib_cmd_info *rc);
 struct rtentry *lookup_prefix(struct rib_head *rnh,
     const struct rt_addrinfo *info, struct route_nhop_data *rnd);
+struct rtentry *lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
+    struct route_nhop_data *rnd);
 
 bool nhop_can_multipath(const struct nhop_object *nh);
 bool match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw);