git: 596417283722 - main - Simplify ifa/ifp refcounting in the routing stack.

Alexander V. Chernikov melifaro at FreeBSD.org
Mon Feb 22 23:38:12 UTC 2021


The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=596417283722ee62ed17aed1c875ad90c01cbb0e

commit 596417283722ee62ed17aed1c875ad90c01cbb0e
Author:     Alexander V. Chernikov <melifaro at FreeBSD.org>
AuthorDate: 2021-02-22 21:42:27 +0000
Commit:     Alexander V. Chernikov <melifaro at FreeBSD.org>
CommitDate: 2021-02-22 23:37:59 +0000

    Simplify ifa/ifp refcounting in the routing stack.
    
    The routing stack control depends on quite a tree of functions to
     determine the proper attributes of a route such as a source address (ifa)
     or transmit ifp of a route.
    
    When actually inserting a route, the stack needs to ensure that ifa and ifp
     points to the entities that are still valid.
    Validity means slightly more than just pointer validity - stack need guarantee
     that the provided objects are not scheduled for deletion.
    
    Currently, callers either ignore it (most ifp parts, historically) or try to
     use refcounting (ifa parts). Even in case of ifa refcounting it's not always
     implemented in fully-safe manner. For example, some codepaths inside
     rt_getifa_fib() are referencing ifa while not holding any locks, resulting in
     possibility of referencing scheduled-for-deletion ifa.
    
    Instead of trying to fix all of the callers by enforcing proper refcounting,
     switch to a different model.
    As the rib_action() already requires epoch, do not require any stability guarantees
     other than the epoch-provided one.
    Use newly-added conditional versions of the refcounting functions
     (ifa_try_ref(), if_try_ref()) and fail if any of these fails.
    
    Reviewed by:    donner
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D28837
---
 sys/net/route.c               | 14 +++--------
 sys/net/route/nhop_ctl.c      | 58 ++++++++++++++++++++++++++-----------------
 sys/net/route/route_ctl.c     | 17 ++-----------
 sys/net/route/route_ifaddrs.c | 12 ++-------
 4 files changed, 42 insertions(+), 59 deletions(-)

diff --git a/sys/net/route.c b/sys/net/route.c
index a68e46c37861..d2b405fafcbf 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -207,7 +207,6 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
 	/* Get the best ifa for the given interface and gateway. */
 	if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
 		return (ENETUNREACH);
-	ifa_ref(ifa);
 
 	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
@@ -224,7 +223,6 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
 	info.rti_rmx = &rti_rmx;
 
 	error = rib_action(fibnum, RTM_ADD, &info, &rc);
-	ifa_free(ifa);
 
 	if (error != 0) {
 		/* TODO: add per-fib redirect stats. */
@@ -518,8 +516,7 @@ rt_flushifroutes(struct ifnet *ifp)
 }
 
 /*
- * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
- * it will be referenced so the caller must free it.
+ * Look up rt_addrinfo for a specific fib.
  *
  * Assume basic consistency checks are executed by callers:
  * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
@@ -528,8 +525,7 @@ int
 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 {
 	const struct sockaddr *dst, *gateway, *ifpaddr, *ifaaddr;
-	struct epoch_tracker et;
-	int needref, error, flags;
+	int error, flags;
 
 	dst = info->rti_info[RTAX_DST];
 	gateway = info->rti_info[RTAX_GATEWAY];
@@ -542,8 +538,6 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 	 * when protocol address is ambiguous.
 	 */
 	error = 0;
-	needref = (info->rti_ifa == NULL);
-	NET_EPOCH_ENTER(et);
 
 	/* If we have interface specified by the ifindex in the address, use it */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
@@ -598,13 +592,11 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
 							fibnum);
 	}
-	if (needref && info->rti_ifa != NULL) {
+	if (info->rti_ifa != NULL) {
 		if (info->rti_ifp == NULL)
 			info->rti_ifp = info->rti_ifa->ifa_ifp;
-		ifa_ref(info->rti_ifa);
 	} else
 		error = ENETUNREACH;
-	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c
index 7de553799fab..92b43871d604 100644
--- a/sys/net/route/nhop_ctl.c
+++ b/sys/net/route/nhop_ctl.c
@@ -84,7 +84,7 @@ static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
     struct nhop_priv **pnh_priv);
 static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
     struct nhop_priv *nh_priv);
-static struct ifnet *get_aifp(const struct nhop_object *nh, int reference);
+static struct ifnet *get_aifp(const struct nhop_object *nh);
 static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp);
 
 static void destroy_nhop_epoch(epoch_context_t ctx);
@@ -120,12 +120,10 @@ nhops_init(void)
  * this interface ifp instead of loopback. This is needed to support
  * link-local IPv6 loopback communications.
  *
- * If @reference is non-zero, found ifp is referenced.
- *
  * Returns found ifp.
  */
 static struct ifnet *
-get_aifp(const struct nhop_object *nh, int reference)
+get_aifp(const struct nhop_object *nh)
 {
 	struct ifnet *aifp = NULL;
 
@@ -138,21 +136,15 @@ get_aifp(const struct nhop_object *nh, int reference)
 	 */
 	if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) &&
 			nh->gw_sa.sa_family == AF_LINK) {
-		if (reference)
-			aifp = ifnet_byindex_ref(nh->gwl_sa.sdl_index);
-		else
-			aifp = ifnet_byindex(nh->gwl_sa.sdl_index);
+		aifp = ifnet_byindex(nh->gwl_sa.sdl_index);
 		if (aifp == NULL) {
 			DPRINTF("unable to get aifp for %s index %d",
 				if_name(nh->nh_ifp), nh->gwl_sa.sdl_index);
 		}
 	}
 
-	if (aifp == NULL) {
+	if (aifp == NULL)
 		aifp = nh->nh_ifp;
-		if (reference)
-			if_ref(aifp);
-	}
 
 	return (aifp);
 }
@@ -297,7 +289,7 @@ fill_nhop_from_info(struct nhop_priv *nh_priv, struct rt_addrinfo *info)
 	nh->nh_ifp = info->rti_ifa->ifa_ifp;
 	nh->nh_ifa = info->rti_ifa;
 	/* depends on the gateway */
-	nh->nh_aifp = get_aifp(nh, 0);
+	nh->nh_aifp = get_aifp(nh);
 
 	/*
 	 * Note some of the remaining data is set by the
@@ -438,7 +430,7 @@ alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
 		nh->nh_ifa = info->rti_ifa;
 	if (info->rti_ifp != NULL)
 		nh->nh_ifp = info->rti_ifp;
-	nh->nh_aifp = get_aifp(nh, 0);
+	nh->nh_aifp = get_aifp(nh);
 
 	return (0);
 }
@@ -512,6 +504,26 @@ alloc_nhop_structure()
 	return (nh_priv);
 }
 
+static bool
+reference_nhop_deps(struct nhop_object *nh)
+{
+	if (!ifa_try_ref(nh->nh_ifa))
+		return (false);
+	nh->nh_aifp = get_aifp(nh);
+	if (!if_try_ref(nh->nh_aifp)) {
+		ifa_free(nh->nh_ifa);
+		return (false);
+	}
+	DPRINTF("AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp);
+	if (!if_try_ref(nh->nh_ifp)) {
+		ifa_free(nh->nh_ifa);
+		if_rele(nh->nh_aifp);
+		return (false);
+	}
+
+	return (true);
+}
+
 /*
  * Alocates/references the remaining bits of nexthop data and links
  *  it to the hash table.
@@ -522,9 +534,7 @@ static int
 finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
     struct nhop_priv *nh_priv)
 {
-	struct nhop_object *nh;
-
-	nh = nh_priv->nh;
+	struct nhop_object *nh = nh_priv->nh;
 
 	/* Allocate per-cpu packet counter */
 	nh->nh_pksent = counter_u64_alloc(M_NOWAIT);
@@ -535,15 +545,17 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
 		return (ENOMEM);
 	}
 
+	if (!reference_nhop_deps(nh)) {
+		counter_u64_free(nh->nh_pksent);
+		uma_zfree(nhops_zone, nh);
+		RTSTAT_INC(rts_nh_alloc_failure);
+		DPRINTF("nh_alloc_finalize failed - reference failure");
+		return (EAGAIN);
+	}
+
 	/* Save vnet to ease destruction */
 	nh_priv->nh_vnet = curvnet;
 
-	/* Reference external objects and calculate (referenced) ifa */
-	if_ref(nh->nh_ifp);
-	ifa_ref(nh->nh_ifa);
-	nh->nh_aifp = get_aifp(nh, 1);
-	DPRINTF("AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp);
-
 	refcount_init(&nh_priv->nh_refcnt, 1);
 
 	/* Please see nhop_free() comments on the initial value */
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index 9aedfb9d5855..e07a94295b89 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -589,12 +589,9 @@ create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
 		error = rt_getifa_fib(info, rnh->rib_fibnum);
 		if (error)
 			return (error);
-	} else {
-		ifa_ref(info->rti_ifa);
 	}
 
 	error = nhop_create_from_info(rnh, info, &nh);
-	ifa_free(info->rti_ifa);
 	if (error != 0)
 		return (error);
 
@@ -912,7 +909,6 @@ static int
 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
     struct nhop_object *nh_orig, struct nhop_object **nh_new)
 {
-	int free_ifa = 0;
 	int error;
 
 	/*
@@ -926,24 +922,15 @@ change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
 	    (info->rti_info[RTAX_IFA] != NULL &&
 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
 		error = rt_getifa_fib(info, rnh->rib_fibnum);
-		if (info->rti_ifa != NULL)
-			free_ifa = 1;
 
 		if (error != 0) {
-			if (free_ifa) {
-				ifa_free(info->rti_ifa);
-				info->rti_ifa = NULL;
-			}
-
+			info->rti_ifa = NULL;
 			return (error);
 		}
 	}
 
 	error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
-	if (free_ifa) {
-		ifa_free(info->rti_ifa);
-		info->rti_ifa = NULL;
-	}
+	info->rti_ifa = NULL;
 
 	return (error);
 }
diff --git a/sys/net/route/route_ifaddrs.c b/sys/net/route/route_ifaddrs.c
index 6e264327d66d..e6d97f3c74f0 100644
--- a/sys/net/route/route_ifaddrs.c
+++ b/sys/net/route/route_ifaddrs.c
@@ -143,7 +143,6 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
 	struct rt_addrinfo info;
 	struct sockaddr_dl null_sdl;
 	struct ifnet *ifp;
-	struct ifaddr *rti_ifa = NULL;
 
 	ifp = ifa->ifa_ifp;
 
@@ -153,12 +152,8 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
 		info.rti_ifp = V_loif;
 	if (cmd == RTM_ADD) {
 		/* explicitly specify (loopback) ifa */
-		if (info.rti_ifp != NULL) {
-			rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
-			if (rti_ifa != NULL)
-				ifa_ref(rti_ifa);
-			info.rti_ifa = rti_ifa;
-		}
+		if (info.rti_ifp != NULL)
+			info.rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
 	}
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ia;
@@ -168,9 +163,6 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
 	error = rib_action(ifp->if_fib, cmd, &info, &rc);
 	NET_EPOCH_EXIT(et);
 
-	if (rti_ifa != NULL)
-		ifa_free(rti_ifa);
-
 	if (error == 0 ||
 	    (cmd == RTM_ADD && error == EEXIST) ||
 	    (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH)))


More information about the dev-commits-src-main mailing list