svn commit: r364940 - head/sys/net/route

Alexander V. Chernikov melifaro at FreeBSD.org
Fri Aug 28 21:59:11 UTC 2020


Author: melifaro
Date: Fri Aug 28 21:59:10 2020
New Revision: 364940
URL: https://svnweb.freebsd.org/changeset/base/364940

Log:
  Further split nhop creation and rtable operations.
  
  As nexthops are immutable, some operations such as route attribute changes
   require nexthop fetching, forking, modification and route switching.
  These operations are not atomic, so they may need to be retried multiple
   times in presence of multiple speakers changing the same route.
  
  This change introduces "synchronisation" primitive: route_update_conditional(),
   simplifying logic for route changes and upcoming multipath operations.
  
  Differential Revision:	https://reviews.freebsd.org/D26216

Modified:
  head/sys/net/route/route_ctl.c
  head/sys/net/route/route_var.h

Modified: head/sys/net/route/route_ctl.c
==============================================================================
--- head/sys/net/route/route_ctl.c	Fri Aug 28 20:37:57 2020	(r364939)
+++ head/sys/net/route/route_ctl.c	Fri Aug 28 21:59:10 2020	(r364940)
@@ -78,9 +78,15 @@ struct rib_subscription {
 
 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
+static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd,
+    struct rib_cmd_info *rc);
 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
-static int change_route(struct rib_head *, struct rt_addrinfo *,
+static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
+static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd,
     struct rib_cmd_info *rc);
 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
     struct rib_cmd_info *rc);
@@ -202,14 +208,18 @@ rib_add_route(uint32_t fibnum, struct rt_addrinfo *inf
 	return (add_route(rnh, info, rc));
 }
 
+/*
+ * Creates rtentry and nexthop based on @info data.
+ * Return 0 and fills in rtentry into @prt on success,
+ * return errno otherwise.
+ */
 static int
-add_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct rib_cmd_info *rc)
+create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct rtentry **prt)
 {
 	struct sockaddr *dst, *ndst, *gateway, *netmask;
-	struct rtentry *rt, *rt_old;
+	struct rtentry *rt;
 	struct nhop_object *nh;
-	struct radix_node *rn;
 	struct ifaddr *ifa;
 	int error, flags;
 
@@ -276,8 +286,29 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *in
 	rt->rt_weight = 1;
 
 	rt_setmetrics(info, rt);
-	rt_old = NULL;
 
+	*prt = rt;
+	return (0);
+}
+
+static int
+add_route(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct rib_cmd_info *rc)
+{
+	struct sockaddr *ndst, *netmask;
+	struct route_nhop_data rnd;
+	struct nhop_object *nh;
+	struct rtentry *rt;
+	int error;
+
+	error = create_rtentry(rnh, info, &rt);
+	if (error != 0)
+		return (error);
+
+	rnd.rnd_nhop = rt->rt_nhop;
+	rnd.rnd_weight = rt->rt_weight;
+	nh = rt->rt_nhop;
+
 	RIB_WLOCK(rnh);
 #ifdef RADIX_MPATH
 	/* do not permit exactly the same dst/mask/gw pair */
@@ -290,76 +321,42 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *in
 		return (EEXIST);
 	}
 #endif
+	error = add_route_nhop(rnh, rt, info, &rnd, rc);
+	if (error == 0) {
+		rt = NULL;
+		nh = NULL;
+	} else if ((error == EEXIST) && ((info->rti_flags & RTF_PINNED) != 0)) {
+		struct rtentry *rt_orig;
+		struct nhop_object *nh_orig;
+		struct radix_node *rn;
 
-	rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
-
-	if (rn != NULL) {
-		/* Most common usecase */
-		if (rt->rt_expire > 0)
-			tmproutes_update(rnh, rt);
-
-		/* Finalize notification */
-		rnh->rnh_gen++;
-
-		rc->rc_rt = rt;
-		rc->rc_nh_new = nh;
-		rc->rc_nh_weight = rt->rt_weight;
-
-		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
-	} else if ((info->rti_flags & RTF_PINNED) != 0) {
-
-		/*
-		 * Force removal and re-try addition
-		 * TODO: better multipath&pinned support
-		 */
-		struct sockaddr *info_dst = info->rti_info[RTAX_DST];
-		info->rti_info[RTAX_DST] = ndst;
-		/* Do not delete existing PINNED(interface) routes */
-		info->rti_flags &= ~RTF_PINNED;
-		rt_old = rt_unlinkrte(rnh, info, &error);
-		info->rti_flags |= RTF_PINNED;
-		info->rti_info[RTAX_DST] = info_dst;
-		if (rt_old != NULL) {
-			rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
-			    rt->rt_nodes);
-
-			/* Finalize notification */
-			rnh->rnh_gen++;
-
-			if (rn != NULL) {
-				rc->rc_cmd = RTM_CHANGE;
-				rc->rc_rt = rt;
-				rc->rc_nh_old = rt_old->rt_nhop;
-				rc->rc_nh_new = nh;
-				rc->rc_nh_weight = rt->rt_weight;
-			} else {
-				rc->rc_cmd = RTM_DELETE;
-				rc->rc_rt = rt_old;
-				rc->rc_nh_old = rt_old->rt_nhop;
-				rc->rc_nh_weight = rt_old->rt_weight;
+		ndst = (struct sockaddr *)rt_key(rt);
+		netmask = info->rti_info[RTAX_NETMASK];
+		rn = rnh->rnh_lookup(ndst, netmask, &rnh->head);
+		rt_orig = (struct rtentry *)rn;
+		if (rt_orig != NULL) {
+			nh_orig = rt_orig->rt_nhop;
+			if ((nhop_get_rtflags(nh_orig) & RTF_PINNED) == 0) {
+				/* Current nexhop is not PINNED, can update */
+				error = change_route_nhop(rnh, rt_orig,
+				    info, &rnd, rc);
+				if (error == 0)
+					nh = NULL;
 			}
-			rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
-		}
+		} else
+			error = ENOBUFS;
 	}
 	RIB_WUNLOCK(rnh);
 
-	if ((rn != NULL) || (rt_old != NULL))
+	if (error == 0)
 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 
-	if (rt_old != NULL)
-		rtfree(rt_old);
-
-	/*
-	 * If it still failed to go into the tree,
-	 * then un-make it (this should be a function)
-	 */
-	if (rn == NULL) {
+	if (nh != NULL)
 		nhop_free(nh);
+	if (rt != NULL)
 		uma_zfree(V_rtzone, rt);
-		return (EEXIST);
-	}
 
-	return (0);
+	return (error);
 }
 
 
@@ -508,7 +505,11 @@ int
 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
+	RIB_RLOCK_TRACKER;
+	struct route_nhop_data rnd_orig;
 	struct rib_head *rnh;
+	struct rtentry *rt;
+	int error;
 
 	NET_EPOCH_ASSERT();
 
@@ -519,18 +520,18 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *
 	bzero(rc, sizeof(struct rib_cmd_info));
 	rc->rc_cmd = RTM_CHANGE;
 
-	return (change_route(rnh, info, rc));
-}
+	/* Check if updated gateway exists */
+	if ((info->rti_flags & RTF_GATEWAY) &&
+	    (info->rti_info[RTAX_GATEWAY] == NULL))
+		return (EINVAL);
 
-static int
-change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct rib_cmd_info *rc)
-{
-	RIB_RLOCK_TRACKER;
-	struct rtentry *rt = NULL;
-	int error = 0;
-	int free_ifa = 0;
-	struct nhop_object *nh, *nh_orig;
+	/*
+	 * route change is done in multiple steps, with dropping and
+	 * reacquiring lock. In the situations with multiple processes
+	 * changes the same route in can lead to the case when route
+	 * is changed between the steps. Address it by retrying the operation
+	 * multiple times before failing.
+	 */
 
 	RIB_RLOCK(rnh);
 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
@@ -554,12 +555,33 @@ change_route_one(struct rib_head *rnh, struct rt_addri
 		}
 	}
 #endif
-	nh_orig = rt->rt_nhop;
+	rnd_orig.rnd_nhop = rt->rt_nhop;
+	rnd_orig.rnd_weight = rt->rt_weight;
 
 	RIB_RUNLOCK(rnh);
 
-	rt = NULL;
+	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
+		error = change_route(rnh, info, &rnd_orig, rc);
+		if (error != EAGAIN)
+			break;
+	}
+
+	return (error);
+}
+
+static int
+change_route(struct rib_head *rnh, struct rt_addrinfo *info,
+    struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
+{
+	int error = 0;
+	int free_ifa = 0;
+	struct nhop_object *nh, *nh_orig;
+	struct route_nhop_data rnd_new;
+
 	nh = NULL;
+	nh_orig = rnd_orig->rnd_nhop;
+	if (nh_orig == NULL)
+		return (ESRCH);
 
 	/*
 	 * New gateway could require new ifaddr, ifp;
@@ -593,71 +615,168 @@ change_route_one(struct rib_head *rnh, struct rt_addri
 	if (error != 0)
 		return (error);
 
-	RIB_WLOCK(rnh);
+	rnd_new.rnd_nhop = nh;
+	if (info->rti_mflags & RTV_WEIGHT)
+		rnd_new.rnd_weight = info->rti_rmx->rmx_weight;
+	else
+		rnd_new.rnd_weight = rnd_orig->rnd_weight;
 
-	/* Lookup rtentry once again and check if nexthop is still the same */
-	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
-	    info->rti_info[RTAX_NETMASK], &rnh->head);
+	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
 
-	if (rt == NULL) {
-		RIB_WUNLOCK(rnh);
-		nhop_free(nh);
-		return (ESRCH);
-	}
+	return (error);
+}
 
-	if (rt->rt_nhop != nh_orig) {
-		RIB_WUNLOCK(rnh);
-		nhop_free(nh);
-		return (EAGAIN);
+/*
+ * Insert @rt with nhop data from @rnd_new to @rnh.
+ * Returns 0 on success.
+ */
+static int
+add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd,
+    struct rib_cmd_info *rc)
+{
+	struct sockaddr *ndst, *netmask;
+	struct radix_node *rn;
+	int error = 0;
+
+	RIB_WLOCK_ASSERT(rnh);
+
+	ndst = (struct sockaddr *)rt_key(rt);
+	netmask = info->rti_info[RTAX_NETMASK];
+
+	rt->rt_nhop = rnd->rnd_nhop;
+	rt->rt_weight = rnd->rnd_weight;
+	rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
+
+	if (rn != NULL) {
+		if (rt->rt_expire > 0)
+			tmproutes_update(rnh, rt);
+
+		/* Finalize notification */
+		rnh->rnh_gen++;
+
+		rc->rc_cmd = RTM_ADD;
+		rc->rc_rt = rt;
+		rc->rc_nh_old = NULL;
+		rc->rc_nh_new = rnd->rnd_nhop;
+		rc->rc_nh_weight = rnd->rnd_weight;
+
+		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
+	} else {
+		/* Existing route or memory allocation failure */
+		error = EEXIST;
 	}
 
-	/* Proceed with the update */
+	return (error);
+}
 
-	/* Provide notification to the protocols.*/
-	rt->rt_nhop = nh;
-	rt_setmetrics(info, rt);
+/*
+ * Switch @rt nhop/weigh to the ones specified in @rnd.
+ *  Conditionally set rt_expire if set in @info.
+ * Returns 0 on success.
+ */
+static int
+change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd,
+    struct rib_cmd_info *rc)
+{
+	struct nhop_object *nh_orig;
 
+	RIB_WLOCK_ASSERT(rnh);
+
+	nh_orig = rt->rt_nhop;
+
+	if (rnd->rnd_nhop != NULL) {
+		/* Changing expiration & nexthop & weight to a new one */
+		rt_setmetrics(info, rt);
+		rt->rt_nhop = rnd->rnd_nhop;
+		rt->rt_weight = rnd->rnd_weight;
+		if (rt->rt_expire > 0)
+			tmproutes_update(rnh, rt);
+	} else {
+		/* Route deletion requested. */
+		struct sockaddr *ndst, *netmask;
+		struct radix_node *rn;
+
+		ndst = (struct sockaddr *)rt_key(rt);
+		netmask = info->rti_info[RTAX_NETMASK];
+		rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head);
+		if (rn == NULL)
+			return (ESRCH);
+	}
+
 	/* Finalize notification */
 	rnh->rnh_gen++;
 
+	rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
 	rc->rc_rt = rt;
 	rc->rc_nh_old = nh_orig;
-	rc->rc_nh_new = rt->rt_nhop;
-	rc->rc_nh_weight = rt->rt_weight;
+	rc->rc_nh_new = rnd->rnd_nhop;
+	rc->rc_nh_weight = rnd->rnd_weight;
 
 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 
-	RIB_WUNLOCK(rnh);
-
-	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
-
-	nhop_free(nh_orig);
-
 	return (0);
 }
 
-static int
-change_route(struct rib_head *rnh, struct rt_addrinfo *info,
-    struct rib_cmd_info *rc)
+/*
+ * Conditionally update route nhop/weight IFF data in @nhd_orig is
+ *  consistent with the current route data.
+ * Nexthop in @nhd_new is consumed.
+ */
+int
+change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
+    struct route_nhop_data *rnd_new, struct rib_cmd_info *rc)
 {
-	int error;
+	struct rtentry *rt_new;
+	int error = 0;
 
-	/* Check if updated gateway exists */
-	if ((info->rti_flags & RTF_GATEWAY) &&
-	    (info->rti_info[RTAX_GATEWAY] == NULL))
-		return (EINVAL);
+	RIB_WLOCK(rnh);
 
-	/*
-	 * route change is done in multiple steps, with dropping and
-	 * reacquiring lock. In the situations with multiple processes
-	 * changes the same route in can lead to the case when route
-	 * is changed between the steps. Address it by retrying the operation
-	 * multiple times before failing.
-	 */
-	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
-		error = change_route_one(rnh, info, rc);
-		if (error != EAGAIN)
-			break;
+	rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
+	    info->rti_info[RTAX_NETMASK], &rnh->head);
+
+	if (rt_new == NULL) {
+		if (rnd_orig->rnd_nhop == NULL)
+			error = add_route_nhop(rnh, rt, info, rnd_new, rc);
+		else {
+			/*
+			 * Prefix does not exist, which was not our assumption.
+			 * Update @rnd_orig with the new data and return
+			 */
+			rnd_orig->rnd_nhop = NULL;
+			rnd_orig->rnd_weight = 0;
+			error = EAGAIN;
+		}
+	} else {
+		/* Prefix exists, try to update */
+		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
+
+			/*
+			 * Nhop/mpath group hasn't changed. Flip
+			 * to the new precalculated one and return
+			 */
+			error = change_route_nhop(rnh, rt_new, info, rnd_new, rc);
+		} else {
+			/* Update and retry */
+			rnd_orig->rnd_nhop = rt_new->rt_nhop;
+			rnd_orig->rnd_weight = rt_new->rt_weight;
+			error = EAGAIN;
+		}
+	}
+
+	RIB_WUNLOCK(rnh);
+
+	if (error == 0) {
+		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
+
+		if (rnd_orig->rnd_nhop != NULL)
+			nhop_free_any(rnd_orig->rnd_nhop);
+
+	} else {
+		if (rnd_new->rnd_nhop != NULL)
+			nhop_free_any(rnd_new->rnd_nhop);
 	}
 
 	return (error);

Modified: head/sys/net/route/route_var.h
==============================================================================
--- head/sys/net/route/route_var.h	Fri Aug 28 20:37:57 2020	(r364939)
+++ head/sys/net/route/route_var.h	Fri Aug 28 21:59:10 2020	(r364940)
@@ -226,6 +226,14 @@ void tmproutes_init(struct rib_head *rh);
 void tmproutes_destroy(struct rib_head *rh);
 
 /* route_ctl.c */
+struct route_nhop_data {
+	struct nhop_object	*rnd_nhop;
+	uint32_t		rnd_weight;
+};
+int change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
+    struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
+    struct route_nhop_data *nhd_new, struct rib_cmd_info *rc);
+
 void vnet_rtzone_init(void);
 void vnet_rtzone_destroy(void);
 


More information about the svn-src-all mailing list