git: 83add84c00ed - stable/13 - [fib algo] Delay algo init at fib growth to to allow to reliably use rib KPI.

Alexander V. Chernikov melifaro at FreeBSD.org
Tue May 4 21:43:08 UTC 2021


The branch stable/13 has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=83add84c00edfe0e690568a2bb23fcaf3ccfbb20

commit 83add84c00edfe0e690568a2bb23fcaf3ccfbb20
Author:     Alexander V. Chernikov <melifaro at FreeBSD.org>
AuthorDate: 2021-04-25 10:22:45 +0000
Commit:     Alexander V. Chernikov <melifaro at FreeBSD.org>
CommitDate: 2021-05-04 21:30:35 +0000

    [fib algo] Delay algo init at fib growth to to allow to reliably use rib KPI.
    
    Currently, most of the rib(9) KPI does not use rnh pointers, using
     fibnum and family parameters to determine the rib pointer instead.
    This works well except for the case when we initialize new rib pointers
     during fib growth.
    In that case, there is no mapping between fib/family and the new rib,
     as an entirely new rib pointer array is populated.
    
    Address this by delaying fib algo initialization till after switching
     to the new pointer array and updating the number of fibs.
    Set datapath pointer to the dummy function, so the potential callers
     won't crash the kernel in the brief moment when the rib exists, but
     no fib algo is attached.
    
    This change allows to avoid creating duplicates of existing rib functions,
     with altered signature.
    
    Differential Revision: https://reviews.freebsd.org/D29969
    MFC after:      1 week
    
    (cherry picked from commit 8a0d57baecf70aa7f49b0b1468f6264c75d8e6df)
---
 sys/net/route/fib_algo.c     | 89 ++++++++++++++++++++++++++++++++------------
 sys/net/route/route_tables.c | 17 +++++----
 sys/net/route/route_var.h    |  3 +-
 3 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/sys/net/route/fib_algo.c b/sys/net/route/fib_algo.c
index 9fdc80001986..837ea7c2b43b 100644
--- a/sys/net/route/fib_algo.c
+++ b/sys/net/route/fib_algo.c
@@ -1538,6 +1538,12 @@ SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo,
     set_algo_inet6_sysctl_handler, "A", "Set IPv6 lookup algo");
 #endif
 
+static struct nhop_object *
+dummy_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
+{
+	return (NULL);
+}
+
 static void
 destroy_fdh_epoch(epoch_context_t ctx)
 {
@@ -1556,8 +1562,15 @@ alloc_fib_dp_array(uint32_t num_tables, bool waitok)
 	sz = sizeof(struct fib_dp_header);
 	sz += sizeof(struct fib_dp) * num_tables;
 	fdh = malloc(sz, M_RTABLE, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO);
-	if (fdh != NULL)
+	if (fdh != NULL) {
 		fdh->fdh_num_tables = num_tables;
+		/*
+		 * Set dummy lookup function ptr always returning NULL, so
+		 * we can delay algo init.
+		 */
+		for (uint32_t i = 0; i < num_tables; i++)
+			fdh->fdh_idx[i].f = dummy_lookup;
+	}
 	return (fdh);
 }
 
@@ -1933,19 +1946,18 @@ fib_check_best_algo(struct rib_head *rh, struct fib_lookup_module *orig_flm)
  * Called when new route table is created.
  * Selects, allocates and attaches fib algo for the table.
  */
-int
-fib_select_algo_initial(struct rib_head *rh)
+static bool
+fib_select_algo_initial(struct rib_head *rh, struct fib_dp *dp)
 {
 	struct fib_lookup_module *flm;
 	struct fib_data *fd = NULL;
 	enum flm_op_result result;
 	struct epoch_tracker et;
-	int error = 0;
 
 	flm = fib_check_best_algo(rh, NULL);
 	if (flm == NULL) {
 		RH_PRINTF(LOG_CRIT, rh, "no algo selected");
-		return (ENOENT);
+		return (false);
 	}
 	RH_PRINTF(LOG_INFO, rh, "selected algo %s", flm->flm_name);
 
@@ -1956,29 +1968,58 @@ fib_select_algo_initial(struct rib_head *rh)
 	NET_EPOCH_EXIT(et);
 
 	RH_PRINTF(LOG_DEBUG, rh, "result=%d fd=%p", result, fd);
-	if (result == FLM_SUCCESS) {
-
-		/*
-		 * Attach datapath directly to avoid multiple reallocations
-		 * during fib growth
-		 */
-		struct fib_dp_header *fdp;
-		struct fib_dp **pdp;
-
-		pdp = get_family_dp_ptr(rh->rib_family);
-		if (pdp != NULL) {
-			fdp = get_fib_dp_header(*pdp);
-			fdp->fdh_idx[fd->fd_fibnum] = fd->fd_dp;
-			FD_PRINTF(LOG_INFO, fd, "datapath attached");
-		}
-	} else {
-		error = EINVAL;
+	if (result == FLM_SUCCESS)
+		*dp = fd->fd_dp;
+	else
 		RH_PRINTF(LOG_CRIT, rh, "unable to setup algo %s", flm->flm_name);
-	}
 
 	fib_unref_algo(flm);
 
-	return (error);
+	return (result == FLM_SUCCESS);
+}
+
+/*
+ * Sets up fib algo instances for the non-initialized RIBs in the @family.
+ * Allocates temporary datapath index to amortize datapaint index updates
+ * with large @num_tables.
+ */
+void
+fib_setup_family(int family, uint32_t num_tables)
+{
+	struct fib_dp_header *new_fdh = alloc_fib_dp_array(num_tables, false);
+	if (new_fdh == NULL) {
+		ALGO_PRINTF(LOG_CRIT, "Unable to setup framework for %s", print_family(family));
+		return;
+	}
+
+	for (int i = 0; i < num_tables; i++) {
+		struct rib_head *rh = rt_tables_get_rnh(i, family);
+		if (rh->rib_algo_init)
+			continue;
+		if (!fib_select_algo_initial(rh, &new_fdh->fdh_idx[i]))
+			continue;
+
+		rh->rib_algo_init = true;
+	}
+
+	FIB_MOD_LOCK();
+	struct fib_dp **pdp = get_family_dp_ptr(family);
+	struct fib_dp_header *old_fdh = get_fib_dp_header(*pdp);
+
+	/* Update the items not touched by the new init, from the old data pointer */
+	for (int i = 0; i < num_tables; i++) {
+		if (new_fdh->fdh_idx[i].f == dummy_lookup)
+			new_fdh->fdh_idx[i] = old_fdh->fdh_idx[i];
+	}
+
+	/* Ensure all index writes have completed */
+	atomic_thread_fence_rel();
+	/* Set new datapath pointer */
+	*pdp = &new_fdh->fdh_idx[0];
+
+	FIB_MOD_UNLOCK();
+
+	fib_epoch_call(destroy_fdh_epoch, &old_fdh->fdh_epoch_ctx);
 }
 
 /*
diff --git a/sys/net/route/route_tables.c b/sys/net/route/route_tables.c
index e3743d55404d..c9a9d6a915eb 100644
--- a/sys/net/route/route_tables.c
+++ b/sys/net/route/route_tables.c
@@ -214,13 +214,6 @@ grow_rtables(uint32_t num_tables)
 			if (rh == NULL)
 				log(LOG_ERR, "unable to create routing table for %d.%d\n",
 				    dom->dom_family, i);
-#ifdef FIB_ALGO
-			if (fib_select_algo_initial(rh) != 0) {
-				log(LOG_ERR, "unable to select algo for table %d.%d\n",
-				    dom->dom_family, i);
-				// TODO: detach table
-			}
-#endif
 			*prnh = rh;
 		}
 	}
@@ -238,9 +231,17 @@ grow_rtables(uint32_t num_tables)
 	atomic_thread_fence_rel();
 	epoch_wait_preempt(net_epoch_preempt);
 
-	/* Finally, set number of fibs to a new value */
+	/* Set number of fibs to a new value */
 	V_rt_numfibs = num_tables;
 
+#ifdef FIB_ALGO
+	/* Attach fib algo to the new rtables */
+	for (dom = domains; dom; dom = dom->dom_next) {
+		if (dom->dom_rtattach != NULL)
+			fib_setup_family(dom->dom_family, num_tables);
+	}
+#endif
+
 	if (old_rt_tables != NULL)
 		free(old_rt_tables, M_RTABLE);
 }
diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h
index f12931476fd3..ad397104209b 100644
--- a/sys/net/route/route_var.h
+++ b/sys/net/route/route_var.h
@@ -76,6 +76,7 @@ struct rib_head {
 #endif
 	uint32_t		rib_dying:1;	/* rib is detaching */
 	uint32_t		rib_algo_fixed:1;/* fixed algorithm */
+	uint32_t		rib_algo_init:1;/* algo init done */
 	struct nh_control	*nh_control;	/* nexthop subsystem data */
 	CK_STAILQ_HEAD(, rib_subscription)	rnh_subscribers;/* notification subscribers */
 };
@@ -328,7 +329,7 @@ int rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum);
 
 /* lookup_framework.c */
 void fib_grow_rtables(uint32_t new_num_tables);
-int fib_select_algo_initial(struct rib_head *rh);
+void fib_setup_family(int family, uint32_t num_tables);
 void fib_destroy_rib(struct rib_head *rh);
 void vnet_fib_init(void);
 void vnet_fib_destroy(void);


More information about the dev-commits-src-all mailing list