misc/143703: Patch: ECMP Phase 1 fixes for FreeBSD 7.2

Balaji G balajig81 at gmail.com
Tue Feb 9 17:50:01 UTC 2010


>Number:         143703
>Category:       misc
>Synopsis:       Patch: ECMP Phase 1 fixes for FreeBSD 7.2
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          update
>Submitter-Id:   current-users
>Arrival-Date:   Tue Feb 09 17:50:01 UTC 2010
>Closed-Date:
>Last-Modified:
>Originator:     Balaji G
>Release:        FreeBSD 7.2
>Organization:
Home
>Environment:
FreeBSD  7.2-RELEASE FreeBSD 7.2-RELEASE #7: Sun Feb  7 13:19:58 IST 2010     root@:/usr/obj/usr/home/balaji/7.2.0/sys/MYKERNEL  i386

>Description:
The Patch contains the ECMP Phase 1 fixes back ported to 7.2 release. I am working on bringing in the remaining changes too. This patch installs ECMP routes in the RIB
>How-To-Repeat:
Create two static routes with the same destination and different gateway 
>Fix:


Patch attached with submission follows:

diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c net/radix.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c	2010-02-04 22:40:29.000000000 +0530
+++ net/radix.c	2010-02-09 21:10:19.731903885 +0530
@@ -48,6 +48,8 @@
 #include <net/radix.h>
 #endif
 
+#include <net/radix_mpath.h>
+
 static int	rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 		    walktree_f_t *f, void *w);
 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
@@ -630,6 +632,21 @@
 	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
 	if (keyduplicated) {
 		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
+
+			/* permit multipath, if enabled for the family */
+			if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
+				/*
+				 * go down to the end of multipaths, so that
+				 * new entry goes into the end of rn_dupedkey
+				 * chain.
+				 */
+				do {
+					t = tt;
+					tt = tt->rn_dupedkey;
+				} while (tt && t->rn_mask == tt->rn_mask);
+				break;
+			}
+
 			if (tt->rn_mask == netmask)
 				return (0);
 			if (netmask == 0 ||
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.h net/radix.h
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.h	2010-02-05 08:38:18.000000000 +0530
+++ net/radix.h	2010-02-09 21:17:10.373883032 +0530
@@ -130,9 +130,9 @@
 	void	(*rnh_close)	/* do something when the last ref drops */
 		(struct radix_node *rn, struct radix_node_head *head);
 	struct	radix_node rnh_nodes[3];	/* empty tree for common case */
-	/* ECMP Changes Begin */
+	
 	int	rnh_multipath;			/* multipath capable ? */ 
-	/* ECMP Changes  End */
+	
 #ifdef _KERNEL
 	struct	mtx rnh_mtx;			/* locks entire radix tree */
 #endif
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c net/radix_mpath.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c	2010-02-05 08:30:52.000000000 +0530
+++ net/radix_mpath.c	2010-02-09 21:12:14.681638068 +0530
@@ -54,7 +54,7 @@
 /*
  * give some jitter to hash, to avoid synchronization between routers
  */
-static uint32_t hashjitter;
+static uint32_t hashjitter; 
 
 int
 rn_mpath_capable(struct radix_node_head *rnh)
@@ -258,7 +258,6 @@
 	return 0;
 }
 
-#if 0
 void
 rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
 {
@@ -317,10 +316,9 @@
 	} 
 	RT_UNLOCK(ro->ro_rt);
 }
-#endif
 
 extern int	in6_inithead(void **head, int off);
-extern int	in_inithead(void **head, int off);
+extern int	in_inthead(void **head, int off);
 
 #ifdef INET
 int
@@ -352,5 +350,5 @@
 	} else
 		return 0;
 }
-
 #endif
+
Only in net: radix_mpath.h
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c net/route.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c	2010-02-04 22:40:29.000000000 +0530
+++ net/route.c	2010-02-09 21:16:21.754622762 +0530
@@ -886,6 +886,111 @@
 	return (rtrequest1_fib(req, info, ret_nrt, 0));
 }
 
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+    struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+	/*
+	 * if we got multipath routes, we require users to specify
+	 * a matching RTAX_GATEWAY.
+	 */
+	struct rtentry *rt, *rto = NULL;
+	register struct radix_node *rn;
+	int error = 0;
+
+	rn = rnh->rnh_matchaddr(dst, rnh);
+	if (rn == NULL)
+		return (ESRCH);
+	rto = rt = RNTORT(rn);
+	rt = rt_mpath_matchgate(rt, gateway);
+	if (rt == NULL)
+		return (ESRCH);
+	/*
+	 * this is the first entry in the chain
+	 */
+	if (rto == rt) {
+		rn = rn_mpath_next((struct radix_node *)rt);
+		/*
+		 * there is another entry, now it's active
+		 */
+		if (rn) {
+			rto = RNTORT(rn);
+			RT_LOCK(rto);
+			rto->rt_flags |= RTF_UP;
+			RT_UNLOCK(rto);
+		} else if (rt->rt_flags & RTF_GATEWAY) {
+			/*
+			 * For gateway routes, we need to 
+			 * make sure that we we are deleting
+			 * the correct gateway. 
+			 * rt_mpath_matchgate() does not 
+			 * check the case when there is only
+			 * one route in the chain.  
+			 */
+			if (gateway &&
+			    (rt->rt_gateway->sa_len != gateway->sa_len ||
+				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
+				error = ESRCH;
+			else {
+				/*
+				 * remove from tree before returning it
+				 * to the caller
+				 */
+				rn = rnh->rnh_deladdr(dst, netmask, rnh);
+				KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
+				goto gwdelete;
+			}
+			
+		}
+		/*
+		 * use the normal delete code to remove
+		 * the first entry
+		 */
+		if (req != RTM_DELETE) 
+			goto nondelete;
+
+		error = ENOENT;
+		goto done;
+	}
+		
+	/*
+	 * if the entry is 2nd and on up
+	 */
+	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+		panic ("rtrequest1: rt_mpath_deldup");
+gwdelete:
+	RT_LOCK(rt);
+	RT_ADDREF(rt);
+	if (req == RTM_DELETE) {
+		rt->rt_flags &= ~RTF_UP;
+		/*
+		 * One more rtentry floating around that is not
+		 * linked to the routing table. rttrash will be decremented
+		 * when RTFREE(rt) is eventually called.
+		 */
+		rttrash++;
+	}
+	
+nondelete:
+	if (req != RTM_DELETE)
+		panic("unrecognized request %d", req);
+	
+
+	/*
+	 * If the caller wants it, then it can have it,
+	 * but it's up to it to free the rtentry as we won't be
+	 * doing it.
+	 */
+	if (ret_nrt) {
+		*ret_nrt = rt;
+		RT_UNLOCK(rt);
+	} else
+		RTFREE_LOCKED(rt);
+done:
+	return (error);
+}
+
+
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
@@ -923,6 +1028,17 @@
 	}
 	switch (req) {
 	case RTM_DELETE:
+
+		if (rn_mpath_capable(rnh)) {
+			error = rn_mpath_update(req, info, rnh, ret_nrt);
+			/*
+			 * "bad" holds true for the success case
+			 * as well
+			 */
+			if (error != ENOENT)
+				goto bad;
+		}
+
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
@@ -1046,6 +1162,18 @@
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
 
+		/* do not permit exactly the same dst/mask/gw pair */
+		if (rn_mpath_capable(rnh) &&
+			rt_mpath_conflict(rnh, rt, netmask)) {
+			if (rt->rt_ifa) {
+				IFAFREE(rt->rt_ifa);
+			}
+			Free(rt_key(rt));
+			RT_LOCK_DESTROY(rt);
+			uma_zfree(rtzone, rt);
+			senderr(EEXIST);
+		}
+
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 		if (rn == NULL) {
@@ -1456,6 +1584,27 @@
 				/* this table doesn't exist but others might */
 				continue;
 			RADIX_NODE_HEAD_LOCK(rnh);
+			if (rn_mpath_capable(rnh)) {
+
+				rn = rnh->rnh_matchaddr(dst, rnh);
+				if (rn == NULL) 
+					error = ESRCH;
+				else {
+					rt = RNTORT(rn);
+					/*
+					 * for interface route the
+					 * rt->rt_gateway is sockaddr_intf
+					 * for cloning ARP entries, so
+					 * rt_mpath_matchgate must use the
+					 * interface address
+					 */
+					rt = rt_mpath_matchgate(rt,
+					    ifa->ifa_addr);
+					if (!rt) 
+						error = ESRCH;
+				}
+			}
+			else
 			rn = rnh->rnh_lookup(dst, netmask, rnh);
 			error = (rn == NULL ||
 			    (rn->rn_flags & RNF_ROOT) ||
@@ -1482,6 +1631,20 @@
 			 * notify any listening routing agents of the change
 			 */
 			RT_LOCK(rt);
+			/*
+			 * in case address alias finds the first address
+			 * e.g. ifconfig bge0 192.103.54.246/24
+			 * e.g. ifconfig bge0 192.103.54.247/24
+			 * the address set in the route is 192.103.54.246
+			 * so we need to replace it with 192.103.54.247
+			 */
+			if (memcmp(rt->rt_ifa->ifa_addr,
+			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
+				IFAFREE(rt->rt_ifa);
+				IFAREF(ifa);
+				rt->rt_ifp = ifa->ifa_ifp;
+				rt->rt_ifa = ifa;
+			}
 			rt_newaddrmsg(cmd, ifa, error, rt);
 			if (cmd == RTM_DELETE) {
 				/*
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h net/route.h
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h	2010-02-04 22:40:29.000000000 +0530
+++ net/route.h	2010-02-09 21:18:10.257871360 +0530
@@ -58,6 +58,7 @@
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_pksent;	/* packets sent using this route */
+	u_long  rmx_weight; 
 };
 
 struct rt_metrics {
@@ -101,6 +102,9 @@
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #endif
+
+#include <net/radix_mpath.h>
+
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c net/rtsock.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c	2010-02-04 22:40:29.000000000 +0530
+++ net/rtsock.c	2010-02-09 21:18:37.274871425 +0530
@@ -536,6 +536,24 @@
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			senderr(ESRCH);
 		}
+
+		/*
+		 * for RTM_CHANGE/LOCK, if we got multipath routes,
+		 * we require users to specify a matching RTAX_GATEWAY.
+		 *
+		 * for RTM_GET, gate is optional even with multipath.
+		 * if gate == NULL the first match is returned.
+		 * (no need to call rt_mpath_matchgate if gate == NULL)
+		 */
+		if (rn_mpath_capable(rnh) &&
+		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
+			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
+			if (!rt) {
+				RADIX_NODE_HEAD_UNLOCK(rnh);
+				senderr(ESRCH);
+			}
+		}
+
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_UNLOCK(rnh);


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list