git: c0256b31efcc - main - routing: Add support for metric

From: Pouria Mousavizadeh Tehrani <pouria_at_FreeBSD.org>
Date: Wed, 20 May 2026 20:58:46 UTC
The branch main has been updated by pouria:

URL: https://cgit.FreeBSD.org/src/commit/?id=c0256b31efcccb6964822b5aadb183e8a6d45507

commit c0256b31efcccb6964822b5aadb183e8a6d45507
Author:     Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
AuthorDate: 2026-05-02 20:04:26 +0000
Commit:     Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
CommitDate: 2026-05-20 20:55:00 +0000

    routing: Add support for metric
    
    In our routing stack implementation, metric is an attribute
    of the nexthop, not the route itself.
    Store metric in nhop_priv which is control-plane data of
    nexthop, filter the nexthops by metric and populate the mpath
    slots in nexthop group with only the lowest metric nexthops
    for use in the forwarding path.
    
    `cmp_priv()` compares nhops based on priv hash.
    Add metric compare logic to it and only return nexthops
    with different metrics if the input nexthop's metric is
    zero (wildcard).
    Also, add support for metric via rtsock by introducing rmx_metric.
    
    Finally, remove the upper 8-bit reservation of weight for
    administrative distance.
    
    Reviewed by:    adrian
    Discussed with: markj
    Relnotes:       yes
    Differential Revision: https://reviews.freebsd.org/D56322
---
 sys/net/route.h               |  7 +++--
 sys/net/route/nhgrp_ctl.c     | 61 +++++++++++++++++++++++++++++++------------
 sys/net/route/nhop.h          |  2 ++
 sys/net/route/nhop_ctl.c      | 39 ++++++++++++++++++++++++---
 sys/net/route/nhop_var.h      |  3 ++-
 sys/net/route/route_ctl.c     |  5 +---
 sys/net/route/route_helpers.c |  1 +
 sys/net/rtsock.c              |  1 +
 8 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/sys/net/route.h b/sys/net/route.h
index 34df3297d6d4..d8cc12a39c61 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -90,7 +90,8 @@ struct rt_metrics {
 	u_long	rmx_pksent;	/* packets sent using this route */
 	u_long	rmx_weight;	/* route weight */
 	u_long	rmx_nhidx;	/* route nexhop index */
-	u_long	rmx_filler[2];	/* will be used for T/TCP later */
+	u_long	rmx_metric;	/* route metric */
+	u_long	rmx_filler[1];
 };
 
 /*
@@ -103,7 +104,8 @@ struct rt_metrics {
 
 /* default route weight */
 #define	RT_DEFAULT_WEIGHT	1
-#define	RT_MAX_WEIGHT		16777215	/* 3 bytes */
+#define	RT_DEFAULT_METRIC	1
+#define	RT_WILDCARD_METRIC	0
 
 /*
  * Keep a generation count of routing table, incremented on route addition,
@@ -300,6 +302,7 @@ struct rt_msghdr {
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
 #define RTV_WEIGHT	0x100	/* init or lock _weight */
+#define RTV_METRIC	0x200	/* init or lock _metric */
 
 #ifndef NETLINK_COMPAT
 
diff --git a/sys/net/route/nhgrp_ctl.c b/sys/net/route/nhgrp_ctl.c
index 7230e02195ee..d0f954888997 100644
--- a/sys/net/route/nhgrp_ctl.c
+++ b/sys/net/route/nhgrp_ctl.c
@@ -135,6 +135,7 @@ sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items)
  * comparable.
  * Assumes @wn is sorted by weight ascending and each weight is > 0.
  * Returns number of slots or 0 if precise calculation failed.
+ * Only calculate for nexthops with specified metric and ignore the rest.
  *
  * Some examples:
  * note: (i, X) pair means (nhop=i, weight=X):
@@ -144,17 +145,26 @@ sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items)
  */
 static uint32_t
 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
-    uint64_t *ptotal)
+    uint32_t metric, uint64_t *ptotal)
 {
-	uint32_t i, last, xmin;
+	uint32_t i, x, last, xmin = 0;
 	uint64_t total = 0;
 
 	// Get sorted array of weights in .storage field
 	sort_weightened_nhops_weights(wn, num_items);
 
+	/* start with lowest metric */
+	for (x = 0; x < num_items; x++) {
+		if (nhop_get_metric(wn[x].nh) == metric) {
+			xmin = wn[x].storage;
+			break;
+		}
+	}
 	last = 0;
-	xmin = wn[0].storage;
-	for (i = 0; i < num_items; i++) {
+	for (i = x; i < num_items; i++) {
+		if (nhop_get_metric(wn[i].nh) != metric)
+			continue;
+
 		total += wn[i].storage;
 		if ((wn[i].storage != last) &&
 		    ((wn[i].storage - last < xmin) || xmin == 0)) {
@@ -176,7 +186,8 @@ calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
 
 /*
  * Calculate minimum number of slots required to fit the existing
- * set of weights while maintaining weight coefficients.
+ * set of weights while maintaining weight coefficients
+ * after filtering by metric.
  *
  * Assume @wn is sorted by weight ascending and each weight is > 0.
  *
@@ -184,12 +195,13 @@ calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
  *  RIB_MAX_MPATH_WIDTH in case of any failure.
  */
 static uint32_t
-calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items)
+calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items,
+    uint32_t metric)
 {
 	uint32_t v;
 	uint64_t total;
 
-	v = calc_min_mpath_slots_fast(wn, num_items, &total);
+	v = calc_min_mpath_slots_fast(wn, num_items, metric, &total);
 	if (total == 0)
 		return (0);
 	if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
@@ -224,6 +236,9 @@ get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
 /*
  * Compile actual list of nexthops to be used by datapath from
  *  the nexthop group @dst.
+ * Since we only need nexthops with lowest metric, only process
+ * nexthops with specified metric. The metric argument is taken
+ * from input and is expected to be the lowest metric in weightened_nhop.
  *
  * For example, compiling control plane list of 2 nexthops
  *  [(200, A), (100, B)] would result in the datapath array
@@ -231,7 +246,7 @@ get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
  */
 static void
 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
-    uint32_t num_slots)
+    uint32_t num_slots, uint32_t metric)
 {
 	struct nhgrp_object *dst;
 	int i, slot_idx, remaining_slots;
@@ -239,14 +254,20 @@ compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
 
 	slot_idx  = 0;
 	dst = dst_priv->nhg;
-	/* Calculate sum of all weights */
+	/* Calculate sum of all weights with lowest metric */
 	remaining_sum = 0;
-	for (i = 0; i < dst_priv->nhg_nh_count; i++)
-		remaining_sum += x[i].weight;
+	for (i = 0; i < dst_priv->nhg_nh_count; i++) {
+		if (nhop_get_metric(x[i].nh) == metric)
+			remaining_sum += x[i].weight;
+	}
+
 	remaining_slots = num_slots;
-	FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d",
-	    remaining_sum, remaining_slots);
+	FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d, lowest_metric: %u",
+	    remaining_sum, remaining_slots, metric);
 	for (i = 0; i < dst_priv->nhg_nh_count; i++) {
+		if (nhop_get_metric(x[i].nh) != metric)
+			continue;
+
 		/* Calculate number of slots for the current nexthop */
 		if (remaining_sum > 0) {
 			nh_weight = (uint64_t)x[i].weight;
@@ -275,13 +296,13 @@ compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
  * Returns group with refcount=1 or NULL.
  */
 static struct nhgrp_priv *
-alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
+alloc_nhgrp(struct weightened_nhop *wn, int num_nhops, uint32_t min_metric)
 {
 	uint32_t nhgrp_size;
 	struct nhgrp_object *nhg;
 	struct nhgrp_priv *nhg_priv;
 
-	nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
+	nhgrp_size = calc_min_mpath_slots(wn, num_nhops, min_metric);
 	if (nhgrp_size == 0) {
 		/* Zero weights, abort */
 		return (NULL);
@@ -314,7 +335,7 @@ alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
 	FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u",
 	    num_nhops, nhgrp_size);
 
-	compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
+	compile_nhgrp(nhg_priv, wn, nhg->nhg_size, min_metric);
 
 	return (nhg_priv);
 }
@@ -464,6 +485,8 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho
 	struct nhgrp_priv *nhg_priv;
 	struct nh_control *ctl;
 
+	MPASS((num_nhops != 0));
+
 	if (rh == NULL) {
 		*perror = E2BIG;
 		return (NULL);
@@ -487,6 +510,7 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho
 	/* Sort nexthops & check there are no duplicates */
 	sort_weightened_nhops(wn, num_nhops);
 	uint32_t last_id = 0;
+	uint32_t min_metric = nhop_get_metric(wn[0].nh);
 	for (int i = 0; i < num_nhops; i++) {
 		if (wn[i].nh->nh_priv->nh_control != ctl) {
 			*perror = EINVAL;
@@ -497,9 +521,12 @@ nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nho
 			return (NULL);
 		}
 		last_id = wn[i].nh->nh_priv->nh_idx;
+
+		if (nhop_get_metric(wn[i].nh) < min_metric)
+			min_metric = nhop_get_metric(wn[i].nh);
 	}
 
-	if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) {
+	if ((nhg_priv = alloc_nhgrp(wn, num_nhops, min_metric)) == NULL) {
 		*perror = ENOMEM;
 		return (NULL);
 	}
diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h
index 1e0dba158f04..6c62ae2f2f5f 100644
--- a/sys/net/route/nhop.h
+++ b/sys/net/route/nhop.h
@@ -219,6 +219,8 @@ uint32_t nhop_get_fibnum(const struct nhop_object *nh);
 void nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum);
 uint32_t nhop_get_expire(const struct nhop_object *nh);
 void nhop_set_expire(struct nhop_object *nh, uint32_t expire);
+uint32_t nhop_get_metric(const struct nhop_object *nh);
+void nhop_set_metric(struct nhop_object *nh, uint32_t metric);
 struct rib_head *nhop_get_rh(const struct nhop_object *nh);
 
 struct nhgrp_object;
diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c
index 596ec9e25d1a..9ef5bbc74a92 100644
--- a/sys/net/route/nhop_ctl.c
+++ b/sys/net/route/nhop_ctl.c
@@ -149,13 +149,17 @@ get_aifp(const struct nhop_object *nh)
 }
 
 int
-cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two)
+cmp_priv(const struct nhop_priv *key, const struct nhop_priv *search)
 {
 
-	if (memcmp(_one->nh, _two->nh, NHOP_END_CMP) != 0)
+	if (memcmp(key->nh, search->nh, NHOP_END_CMP) != 0)
 		return (0);
 
-	if (memcmp(_one, _two, NH_PRIV_END_CMP) != 0)
+	if (memcmp(key, search, NH_PRIV_END_CMP) != 0)
+		return (0);
+
+	if (key->nh_metric != RT_WILDCARD_METRIC &&
+	    key->nh_metric != search->nh_metric)
 		return (0);
 
 	return (1);
@@ -171,6 +175,19 @@ set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info)
 		nhop_set_mtu(nh, info->rti_rmx->rmx_mtu, true);
 }
 
+static void
+set_nhop_metric_from_info(struct nhop_object *nh, const struct rt_addrinfo *info)
+{
+	uint32_t metric;
+
+	if (info->rti_mflags & RTV_METRIC)
+		metric = info->rti_rmx->rmx_metric;
+	else
+		metric = RT_DEFAULT_METRIC;
+
+	nhop_set_metric(nh, metric);
+}
+
 /*
  * Fills in shorted link-level sockadd version suitable to be stored inside the
  *  nexthop gateway buffer.
@@ -288,6 +305,7 @@ nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
 	nhop_set_rtflags(nh, info->rti_flags);
 
 	set_nhop_mtu_from_info(nh, info);
+	set_nhop_metric_from_info(nh, info);
 	nhop_set_src(nh, info->rti_ifa);
 
 	/*
@@ -1044,6 +1062,21 @@ nhop_set_origin(struct nhop_object *nh, uint8_t origin)
 	nh->nh_priv->nh_origin = origin;
 }
 
+uint32_t
+nhop_get_metric(const struct nhop_object *nh)
+{
+	return (nh->nh_priv->nh_metric);
+}
+
+void
+nhop_set_metric(struct nhop_object *nh, uint32_t metric)
+{
+	if (metric != RT_WILDCARD_METRIC)
+		nh->nh_priv->nh_metric = metric;
+	else
+		nh->nh_priv->nh_metric = RT_DEFAULT_METRIC;
+}
+
 void
 nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
 {
diff --git a/sys/net/route/nhop_var.h b/sys/net/route/nhop_var.h
index b8d7732551d3..0a07f4f3f901 100644
--- a/sys/net/route/nhop_var.h
+++ b/sys/net/route/nhop_var.h
@@ -81,6 +81,7 @@ struct nhop_priv {
 	/* nhop lookup comparison end */
 	uint32_t		nh_idx;		/* nexthop index */
 	uint32_t		nh_fibnum;	/* nexthop fib */
+	uint32_t		nh_metric;	/* nexthop metric */
 	void			*cb_func;	/* function handling additional rewrite caps */
 	u_int			nh_refcnt;	/* number of references, refcount(9)  */
 	u_int			nh_linked;	/* refcount(9), == 2 if linked to the list */
@@ -106,6 +107,6 @@ int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
 struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
 
 /* nhop_ctl.c */
-int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
+int cmp_priv(const struct nhop_priv *key, const struct nhop_priv *search);
 
 #endif
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index 7612c2bdfb58..cfa3b52b63a6 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -196,10 +196,7 @@ get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
 		weight = info->rti_rmx->rmx_weight;
 	else
 		weight = default_weight;
-	/* Keep upper 1 byte for adm distance purposes */
-	if (weight > RT_MAX_WEIGHT)
-		weight = RT_MAX_WEIGHT;
-	else if (weight == 0)
+	if (weight == 0)
 		weight = default_weight;
 
 	return (weight);
diff --git a/sys/net/route/route_helpers.c b/sys/net/route/route_helpers.c
index 303ff018e9e0..f09a8bbbd3e5 100644
--- a/sys/net/route/route_helpers.c
+++ b/sys/net/route/route_helpers.c
@@ -458,6 +458,7 @@ rib_add_default_route(uint32_t fibnum, int family, struct ifnet *ifp,
 	nhop_set_transmit_ifp(nh, ifp);
 	nhop_set_src(nh, ifa);
 	nhop_set_pxtype_flag(nh, NHF_DEFAULT);
+	nhop_set_metric(nh, RT_DEFAULT_METRIC);
 	rnd.rnd_nhop = nhop_get_nhop(nh, &error);
 
 	if (error == 0)
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index 562cf6d426c9..47da83b5561b 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -1328,6 +1328,7 @@ rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
 	bzero(out, sizeof(*out));
 	out->rmx_mtu = nh->nh_mtu;
 	out->rmx_weight = rt->rt_weight;
+	out->rmx_metric = nhop_get_metric(nh);
 	out->rmx_nhidx = nhop_get_idx(nh);
 	/* Kernel -> userland timebase conversion. */
 	out->rmx_expire = nhop_get_expire(nh) ?