kern/173477: mpath bugfixes
Ingo Flaschberger
if at FreeBSD.org
Thu Nov 8 16:50:01 UTC 2012
>Number: 173477
>Category: kern
>Synopsis: mpath bugfixes
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Thu Nov 08 16:50:01 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator: Ingo Flaschberger
>Release: 9.1 Stable
>Organization:
crossip communications gmbh
>Environment:
9.1-PRERELEASE
>Description:
Severall mpath bugfixes:
*) if mpath is enabled, the interface loopbackroute could not be deleted
(introduced SVN rev 226241)
*) route selection crashes when 3 mpath routes are installed and deleted:
1: route to gw1 weight 3
2: roote to gw2 weight 2
3: interface route metric 1
and deleted in 2-1 order (already freed rm_leaf returned)
*) added correct mpath selection on interface-routes (in_lltable_rtcheck)
*) added mpath to fastforward
*) do correct equal cost mpath route selection based on weight (rtalloc_mpath_fib_flags)
>How-To-Repeat:
Mpath test-script:
em0: interface must be up
em3: up and there must be a pingable host with 10.11.11.1/24
Routingtable have to be the same before and after running the script.
#!/bin/sh
ifconfig em0 192.168.2.100/24
read "Press [Enter] key"
ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test1 failed
else
echo test1 ok
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test2 failed
else
echo test2 ok
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test3 failed
else
echo test3 ok
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test4 failed
else
echo test4 ok
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test5 failed
else
echo test5 ok
fi
read "Press [Enter] key"
ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test6 ok
else
echo test6 failed
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test7 ok
else
echo test7 failed
fi
read "Press [Enter] key"
ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test8 failed
else
echo test8 ok
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test9 failed
else
echo test9 ok
fi
read "Press [Enter] key"
ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test10 ok
else
echo test10 failed
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test11 ok
else
echo test11 failed
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test12 ok
else
echo test12 failed
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test13 ok
else
echo test13 failed
fi
read "Press [Enter] key"
route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test14 ok
else
echo test14 failed
fi
read "Press [Enter] key"
ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test15 failed
else
echo test15 ok
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test16 failed
else
echo test16 ok
fi
read "Press [Enter] key"
route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test17 failed
else
echo test17 ok
fi
read "Press [Enter] key"
ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
echo test18 ok
else
echo test18 failed
fi
>Fix:
Patch attached with submission follows:
diff -u -r sys_org/contrib/ipfilter/netinet/ip_pool.c /router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c
--- sys_org/contrib/ipfilter/netinet/ip_pool.c 2012-11-08 15:15:22.000000000 +0100
+++ /router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c 2012-10-29 16:19:05.000000000 +0100
@@ -620,7 +620,7 @@
RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
ipo->ipo_head->rnh_deladdr(&ipe->ipn_addr, &ipe->ipn_mask,
- ipo->ipo_head);
+ ipo->ipo_head, NULL);
RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
ip_pool_node_deref(ipe);
@@ -751,7 +751,7 @@
RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
while ((n = ipo->ipo_list) != NULL) {
ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask,
- ipo->ipo_head);
+ ipo->ipo_head, NULL);
*n->ipn_pnext = n->ipn_next;
if (n->ipn_next)
@@ -963,7 +963,7 @@
struct radix_node_head *rnh = p;
struct radix_node *d;
- d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
+ d = rnh->rnh_deladdr(n->rn_key, NULL, rnh, NULL);
if (d != NULL) {
FreeS(d, max_keylen + 2 * sizeof (*d));
}
diff -u -r sys_org/kern/vfs_export.c /router/usr/src/sys/kern/vfs_export.c
--- sys_org/kern/vfs_export.c 2012-11-08 15:15:13.000000000 +0100
+++ /router/usr/src/sys/kern/vfs_export.c 2012-10-29 16:16:33.000000000 +0100
@@ -228,7 +228,7 @@
struct radix_node_head *rnh = (struct radix_node_head *) w;
struct ucred *cred;
- (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
+ (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh, NULL);
cred = ((struct netcred *)rn)->netc_anon;
if (cred != NULL)
crfree(cred);
diff -u -r sys_org/net/if.c /router/usr/src/sys/net/if.c
--- sys_org/net/if.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/if.c 2012-10-30 00:34:40.000000000 +0100
@@ -70,6 +70,7 @@
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/radix.h>
+#include "opt_mpath.h"
#include <net/route.h>
#include <net/vnet.h>
@@ -1485,6 +1486,9 @@
{
int error = 0;
struct rt_addrinfo info;
+#ifdef RADIX_MPATH
+ struct ifaddr *new_ifa;
+#else
struct sockaddr_dl null_sdl;
bzero(&null_sdl, sizeof(null_sdl));
@@ -1492,14 +1496,25 @@
null_sdl.sdl_family = AF_LINK;
null_sdl.sdl_type = ifa->ifa_ifp->if_type;
null_sdl.sdl_index = ifa->ifa_ifp->if_index;
+#endif
bzero(&info, sizeof(info));
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
info.rti_info[RTAX_DST] = ia;
+#ifdef RADIX_MPATH
+ info.rti_ifp = V_loif;
+
+ /* link_rtrequest modifies ifa - do this also */
+ new_ifa = ifaof_ifpforaddr( ia, V_loif);
+
+ /* rt_mpath_matchgate matches ifa_addr and not gateway */
+ info.rti_info[RTAX_GATEWAY] = new_ifa->ifa_addr;
+#else
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
+#endif
error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
if (error != 0)
- log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+ log(LOG_INFO, "ifa_del_loopback_route: deletion failed err: %d\n", error);
return (error);
}
diff -u -r sys_org/net/radix.c /router/usr/src/sys/net/radix.c
--- sys_org/net/radix.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.c 2012-11-08 15:20:04.000000000 +0100
@@ -312,7 +312,7 @@
* lot of confusion.
*/
if (t->rn_flags & RNF_ROOT)
- t = t->rn_dupedkey;
+ t = t->rn_dupedkey;
return t;
on1:
test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
@@ -723,12 +723,20 @@
x = t->rn_right;
/* Promote general routes from below */
if (x->rn_bit < 0) {
- for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
- if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
- *mp = m = rn_new_radix_mask(x, 0);
- if (m)
- mp = &m->rm_mklist;
- }
+ struct radix_node *xx = NULL;
+ for (mp = &t->rn_mklist; x; xx = x, x = x->rn_dupedkey) {
+ if (xx && xx->rn_mklist && xx->rn_mask == x->rn_mask &&
+ x->rn_mklist == 0) {
+ /* multipath route, bump refcount on first mklist */
+ x->rn_mklist = xx->rn_mklist;
+ x->rn_mklist->rm_refs++;
+ }
+ if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
+ *mp = m = rn_new_radix_mask(x, 0);
+ if (m)
+ mp = &m->rm_mklist;
+ }
+ }
} else if (x->rn_mklist) {
/*
* Skip over masks whose index is > that of new node
@@ -760,11 +768,30 @@
break;
if (m->rm_flags & RNF_NORMAL) {
mmask = m->rm_leaf->rn_mask;
- if (tt->rn_flags & RNF_NORMAL) {
-#if !defined(RADIX_MPATH)
+ if (keyduplicated) {
+ if (m->rm_leaf->rn_parent == tt)
+ /* new route is better */
+ m->rm_leaf = tt;
+#ifdef DIAGNOSTIC
+ else {
+ for (t = m->rm_leaf; t;
+ t = t->rn_dupedkey)
+ if (t == tt)
+ break;
+ if (t == NULL) {
+ log(LOG_ERR, "Non-unique "
+ "normal route on dupedkey, "
+ "mask not entered\n");
+ return tt;
+ }
+ }
+#endif
+ m->rm_refs++;
+ tt->rn_mklist = m;
+ return tt;
+ } else if (tt->rn_flags & RNF_NORMAL) {
log(LOG_ERR,
"Non-unique normal route, mask not entered\n");
-#endif
return tt;
}
} else
@@ -783,9 +810,10 @@
}
struct radix_node *
-rn_delete(v_arg, netmask_arg, head)
+rn_delete(v_arg, netmask_arg, head, rn)
void *v_arg, *netmask_arg;
struct radix_node_head *head;
+ struct radix_node *rn;
{
register struct radix_node *t, *p, *x, *tt;
struct radix_mask *m, *saved_m, **mp;
@@ -815,18 +843,41 @@
if ((tt = tt->rn_dupedkey) == 0)
return (0);
}
+#ifdef RADIX_MPATH
+ if (rn) {
+ while (tt != rn)
+ if ((tt = tt->rn_dupedkey) == 0)
+ return (0);
+ }
+#endif
if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
goto on1;
if (tt->rn_flags & RNF_NORMAL) {
- if (m->rm_leaf != tt || m->rm_refs > 0) {
- log(LOG_ERR, "rn_delete: inconsistent annotation\n");
- return 0; /* dangling ref could cause disaster */
- }
+ if (m->rm_leaf != tt && m->rm_refs == 0) {
+ log(LOG_ERR, "rn_delete: inconsistent normal "
+ "annotation\n");
+ return (0);
+ }
+ if (m->rm_leaf != tt) {
+ if (--m->rm_refs >= 0)
+ goto on1;
+ }
+ /* tt is currently the head of the possible multipath chain */
+ if (m->rm_refs > 0) {
+ if (tt->rn_dupedkey == NULL ||
+ tt->rn_dupedkey->rn_mklist != m) {
+ log(LOG_ERR, "rn_delete: inconsistent "
+ "dupedkey list\n");
+ return (0);
+ }
+ m->rm_leaf = tt->rn_dupedkey;
+ --m->rm_refs;
+ goto on1;
+ }
+ /* else tt is last and only route */
} else {
- if (m->rm_mask != tt->rn_mask) {
- log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+ if (m->rm_mask != tt->rn_mask)
goto on1;
- }
if (--m->rm_refs >= 0)
goto on1;
}
@@ -875,15 +926,10 @@
else
t->rn_right = x;
} else {
- /* find node in front of tt on the chain */
- for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
- p = p->rn_dupedkey;
- if (p) {
- p->rn_dupedkey = tt->rn_dupedkey;
- if (tt->rn_dupedkey) /* parent */
- tt->rn_dupedkey->rn_parent = p;
- /* parent */
- } else log(LOG_ERR, "rn_delete: couldn't find us\n");
+ x = saved_tt;
+ t->rn_dupedkey = tt->rn_dupedkey;
+ if (tt->rn_dupedkey)
+ tt->rn_dupedkey->rn_parent = t;
}
t = tt + 1;
if (t->rn_flags & RNF_ACTIVE) {
@@ -931,8 +977,16 @@
if (m == x->rn_mklist) {
struct radix_mask *mm = m->rm_mklist;
x->rn_mklist = 0;
- if (--(m->rm_refs) < 0)
+ if (--(m->rm_refs) < 0) {
MKFree(m);
+ } else if (m->rm_flags & RNF_NORMAL) {
+ /*
+ * don't progress because this
+ * a multipath route. Next
+ * route will use the same m.
+ */
+ mm = m;
+ }
m = mm;
}
if (m)
@@ -1107,7 +1161,7 @@
rn = rn->rn_left;
next = rn;
/* Process leaves */
- while ((rn = base)) {
+ while ((rn = base) != NULL) {
base = rn->rn_dupedkey;
if (!(rn->rn_flags & RNF_ROOT)
&& (error = (*f)(rn, w)))
diff -u -r sys_org/net/radix.h /router/usr/src/sys/net/radix.h
--- sys_org/net/radix.h 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.h 2012-10-29 16:15:23.000000000 +0100
@@ -116,7 +116,8 @@
(void *v, void *mask,
struct radix_node_head *head, struct radix_node nodes[]);
struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
- (void *v, void *mask, struct radix_node_head *head);
+ (void *v, void *mask, struct radix_node_head *head,
+ struct radix_node *rn);
struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
@@ -169,7 +170,8 @@
*rn_addmask(void *, int, int),
*rn_addroute (void *, void *, struct radix_node_head *,
struct radix_node [2]),
- *rn_delete(void *, void *, struct radix_node_head *),
+ *rn_delete(void *, void *, struct radix_node_head *,
+ struct radix_node *),
*rn_lookup (void *v_arg, void *m_arg,
struct radix_node_head *head),
*rn_match(void *, struct radix_node_head *);
diff -u -r sys_org/net/radix_mpath.c /router/usr/src/sys/net/radix_mpath.c
--- sys_org/net/radix_mpath.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.c 2012-10-30 01:33:18.000000000 +0100
@@ -77,20 +77,6 @@
return NULL;
}
-uint32_t
-rn_mpath_count(struct radix_node *rn)
-{
- uint32_t i = 0;
- struct rtentry *rt;
-
- while (rn != NULL) {
- rt = (struct rtentry *)rn;
- i += rt->rt_rmx.rmx_weight;
- rn = rn_mpath_next(rn);
- }
- return (i);
-}
-
struct rtentry *
rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
{
@@ -122,33 +108,6 @@
return (struct rtentry *)rn;
}
-/*
- * go through the chain and unlink "rt" from the list
- * the caller will free "rt"
- */
-int
-rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
-{
- struct radix_node *t, *tt;
-
- if (!headrt || !rt)
- return (0);
- t = (struct radix_node *)headrt;
- tt = rn_mpath_next(t);
- while (tt) {
- if (tt == (struct radix_node *)rt) {
- t->rn_dupedkey = tt->rn_dupedkey;
- tt->rn_dupedkey = NULL;
- tt->rn_flags &= ~RNF_ACTIVE;
- tt[1].rn_flags &= ~RNF_ACTIVE;
- return (1);
- }
- t = tt;
- tt = rn_mpath_next((struct radix_node *)t);
- }
- return (0);
-}
-
/*
* check if we have the same key/mask/gateway on the table already.
*/
@@ -256,12 +215,21 @@
}
void
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) {
+ rtalloc_mpath_fib_flags( ro, hash, fibnum, 0);
+}
+
+/*
+ * flag RTF_GATEWAY returns only interface routes,
+ * only one interface-route is possible
+ */
+void
+rtalloc_mpath_fib_flags(struct route *ro, uint32_t hash, u_int fibnum, int flags)
{
struct radix_node *rn0, *rn;
- u_int32_t n;
+ u_int32_t n = 0;
struct rtentry *rt;
- int64_t weight;
+ int64_t lowest_weight;
/*
* XXX we don't attempt to lookup cached route again; what should
@@ -269,29 +237,52 @@
*/
if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
&& RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
- return;
+ return;
ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
/* if the route does not exist or it is not multipath, don't care */
if (ro->ro_rt == NULL)
return;
if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
+ if (flags & RTF_GATEWAY)
+ return;
RT_UNLOCK(ro->ro_rt);
return;
}
/* beyond here, we use rn as the master copy */
rn0 = rn = (struct radix_node *)ro->ro_rt;
- n = rn_mpath_count(rn0);
- /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+ /* find count of lowest weight route */
+ for (rt = ro->ro_rt, lowest_weight = 9223372036854775807; rn != NULL;){
+ if( rt->rt_flags & RTF_UP) {
+ if ((flags & RTF_GATEWAY) &&
+ (!(rt->rt_flags & RTF_GATEWAY)) &&
+ (!(rt->rt_flags & RTF_HOST)) )
+ goto end; /* only 1 interface route possible! */
+ if( lowest_weight > rt->rt_rmx.rmx_weight) {
+ lowest_weight = rt->rt_rmx.rmx_weight;
+ n = 1;
+ } else if( lowest_weight == rt->rt_rmx.rmx_weight)
+ n++;
+ }
+ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+ break;
+ rn = rn->rn_dupedkey;
+ rt = (struct rtentry *)rn;
+ }
+ /* select now one of the lowest weight routes */
+ /* gw selection by Modulo-N Hash (RFC2991) */
hash += hashjitter;
hash %= n;
- for (weight = abs((int32_t)hash), rt = ro->ro_rt;
- weight >= rt->rt_rmx.rmx_weight && rn;
- weight -= rt->rt_rmx.rmx_weight) {
-
- /* stay within the multipath routes */
+ for ( rt = ro->ro_rt, rn = rn0, n = 0; rn != NULL; ) {
+ if( rt->rt_flags & RTF_UP) {
+ if ( rt->rt_rmx.rmx_weight == lowest_weight) {
+ if (n == hash)
+ break;
+ n++;
+ }
+ }
if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
break;
rn = rn->rn_dupedkey;
@@ -300,19 +291,22 @@
/* XXX try filling rt_gwroute and avoid unreachable gw */
/* gw selection has failed - there must be only zero weight routes */
- if (!rn) {
+ if (!rn || (flags & RTF_GATEWAY)) {
RT_UNLOCK(ro->ro_rt);
ro->ro_rt = NULL;
return;
}
+
+end:
if (ro->ro_rt != rt) {
RTFREE_LOCKED(ro->ro_rt);
ro->ro_rt = (struct rtentry *)rn;
RT_LOCK(ro->ro_rt);
RT_ADDREF(ro->ro_rt);
- }
- RT_UNLOCK(ro->ro_rt);
+ }
+ if (!(flags & RTF_GATEWAY))
+ RT_UNLOCK(ro->ro_rt);
}
extern int in6_inithead(void **head, int off);
diff -u -r sys_org/net/radix_mpath.h /router/usr/src/sys/net/radix_mpath.h
--- sys_org/net/radix_mpath.h 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.h 2012-10-30 01:33:28.000000000 +0100
@@ -46,12 +46,12 @@
struct sockaddr;
int rn_mpath_capable(struct radix_node_head *);
struct radix_node *rn_mpath_next(struct radix_node *);
-u_int32_t rn_mpath_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
struct sockaddr *);
void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
+void rtalloc_mpath_fib_flags(struct route *, u_int32_t, u_int, int);
struct radix_node *rn_mpath_lookup(void *, void *,
struct radix_node_head *);
int rt_mpath_deldup(struct rtentry *, struct rtentry *);
diff -u -r sys_org/net/route.c /router/usr/src/sys/net/route.c
--- sys_org/net/route.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/route.c 2012-11-08 15:24:13.000000000 +0100
@@ -904,7 +904,7 @@
* Remove the item from the tree; it should be there,
* but when callers invoke us blindly it may not (sigh).
*/
- rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
+ rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh, NULL);
if (rn == NULL) {
error = ESRCH;
goto bad;
@@ -942,112 +942,6 @@
return (error);
}
-#ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
- struct radix_node_head *rnh, struct rtentry **ret_nrt)
-{
- /*
- * if we got multipath routes, we require users to specify
- * a matching RTAX_GATEWAY.
- */
- struct rtentry *rt, *rto = NULL;
- register struct radix_node *rn;
- int error = 0;
-
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn == NULL)
- return (ESRCH);
- rto = rt = RNTORT(rn);
- rt = rt_mpath_matchgate(rt, gateway);
- if (rt == NULL)
- return (ESRCH);
- /*
- * this is the first entry in the chain
- */
- if (rto == rt) {
- rn = rn_mpath_next((struct radix_node *)rt);
- /*
- * there is another entry, now it's active
- */
- if (rn) {
- rto = RNTORT(rn);
- RT_LOCK(rto);
- rto->rt_flags |= RTF_UP;
- RT_UNLOCK(rto);
- } else if (rt->rt_flags & RTF_GATEWAY) {
- /*
- * For gateway routes, we need to
- * make sure that we we are deleting
- * the correct gateway.
- * rt_mpath_matchgate() does not
- * check the case when there is only
- * one route in the chain.
- */
- if (gateway &&
- (rt->rt_gateway->sa_len != gateway->sa_len ||
- memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
- error = ESRCH;
- else {
- /*
- * remove from tree before returning it
- * to the caller
- */
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
- KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
- goto gwdelete;
- }
-
- }
- /*
- * use the normal delete code to remove
- * the first entry
- */
- if (req != RTM_DELETE)
- goto nondelete;
-
- error = ENOENT;
- goto done;
- }
-
- /*
- * if the entry is 2nd and on up
- */
- if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
- panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
- RT_LOCK(rt);
- RT_ADDREF(rt);
- if (req == RTM_DELETE) {
- rt->rt_flags &= ~RTF_UP;
- /*
- * One more rtentry floating around that is not
- * linked to the routing table. rttrash will be decremented
- * when RTFREE(rt) is eventually called.
- */
- V_rttrash++;
- }
-
-nondelete:
- if (req != RTM_DELETE)
- panic("unrecognized request %d", req);
-
-
- /*
- * If the caller wants it, then it can have it,
- * but it's up to it to free the rtentry as we won't be
- * doing it.
- */
- if (ret_nrt) {
- *ret_nrt = rt;
- RT_UNLOCK(rt);
- } else
- RTFREE_LOCKED(rt);
-done:
- return (error);
-}
-#endif
-
int
rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
u_int fibnum)
@@ -1100,23 +994,26 @@
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
+ if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL)
+ senderr(ESRCH);
+ rt = RNTORT(rn);
#ifdef RADIX_MPATH
+ /*
+ * if we got multipath routes, we require users to specify
+ * a matching RTAX_GATEWAY.
+ */
if (rn_mpath_capable(rnh)) {
- error = rn_mpath_update(req, info, rnh, ret_nrt);
- /*
- * "bad" holds true for the success case
- * as well
- */
- if (error != ENOENT)
- goto bad;
- error = 0;
+ rt = rt_mpath_matchgate( rt, gateway);
+ rn = (struct radix_node *)rt;
+ if (!rt)
+ senderr(ESRCH);
}
#endif
/*
* Remove the item from the tree and return it.
* Complain if it is not there and do no more processing.
*/
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
+ rn = rnh->rnh_deladdr(dst, netmask, rnh, rn);
if (rn == NULL)
senderr(ESRCH);
if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
@@ -1212,7 +1109,7 @@
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
rt->rt_rmx.rmx_weight = 1;
-
+
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
if (rn_mpath_capable(rnh) &&
@@ -1373,7 +1270,7 @@
*/
if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
caddr_t new;
-
+
R_Malloc(new, caddr_t, dlen + glen);
if (new == NULL)
return ENOBUFS;
@@ -1506,9 +1403,8 @@
RADIX_NODE_HEAD_LOCK(rnh);
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh)) {
-
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn == NULL)
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
+ if (rn == NULL)
error = ESRCH;
else {
rt = RNTORT(rn);
@@ -1523,6 +1419,7 @@
ifa->ifa_addr);
if (!rt)
error = ESRCH;
+ rn = (struct radix_node *)rt;
}
}
else
diff -u -r sys_org/netatalk/at_rmx.c /router/usr/src/sys/netatalk/at_rmx.c
--- sys_org/netatalk/at_rmx.c 2012-11-08 15:15:09.000000000 +0100
+++ /router/usr/src/sys/netatalk/at_rmx.c 2012-10-29 16:20:11.000000000 +0100
@@ -91,10 +91,10 @@
}
static struct radix_node *
-at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head, struct radix_node *rn)
{
- return (rn_delete(v_arg, netmask_arg, head));
+ return (rn_delete(v_arg, netmask_arg, head, rn));
}
/*
diff -u -r sys_org/netinet/in.c /router/usr/src/sys/netinet/in.c
--- sys_org/netinet/in.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/in.c 2012-10-30 03:09:39.000000000 +0100
@@ -1397,13 +1397,23 @@
in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
{
struct rtentry *rt;
+#ifdef RADIX_MPATH
+ struct route ro;
+#endif
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
+#ifdef RADIX_MPATH
+ /* ensure to select a interface route */
+ bzero( &ro, sizeof(ro));
+ bcopy( __DECONST(struct sockaddr *, l3addr), &ro.ro_dst, sizeof(struct sockaddr));
+ rtalloc_mpath_fib_flags( (struct route *)&ro, 0, RT_DEFAULT_FIB, RTF_GATEWAY);
+ rt = ro.ro_rt;
+#else
/* XXX rtalloc1 should take a const param */
rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
-
+#endif
if (rt == NULL)
return (EINVAL);
diff -u -r sys_org/netinet/ip_fastfwd.c /router/usr/src/sys/netinet/ip_fastfwd.c
--- sys_org/netinet/ip_fastfwd.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ip_fastfwd.c 2012-11-08 15:32:49.000000000 +0100
@@ -78,6 +78,7 @@
#include "opt_ipfw.h"
#include "opt_ipstealth.h"
+#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -113,7 +115,11 @@
&VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
static struct sockaddr_in *
+#ifdef RADIX_MPATH
+ip_findroute(struct route *ro, uint32_t hash, struct in_addr dest, struct mbuf *m)
+#else
ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+#endif
{
struct sockaddr_in *dst;
struct rtentry *rt;
@@ -126,7 +132,11 @@
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr.s_addr = dest.s_addr;
+#ifdef RADIX_MPATH
+ rtalloc_mpath_fib(ro, hash, M_GETFIB(m));
+#else
in_rtalloc_ign(ro, 0, M_GETFIB(m));
+#endif
/*
* Route there and interface still up?
@@ -420,7 +440,12 @@
/*
* Find route to destination.
*/
+#ifdef RADIX_MPATH
+ if ((dst = ip_findroute(&ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+ dest, m)) == NULL)
+#else
if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
return NULL; /* icmp unreach already sent */
ifp = ro.ro_rt->rt_ifp;
@@ -491,7 +516,13 @@
}
#endif /* IPFIREWALL_FORWARD */
RTFREE(ro.ro_rt);
+#ifdef RADIX_MPATH
+ if ((dst = ip_findroute(&ro,
+ ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), dest, m))
+ == NULL)
+#else
if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
return NULL; /* icmp unreach already sent */
ifp = ro.ro_rt->rt_ifp;
}
diff -u -r sys_org/netinet/ipfw/ip_fw_table.c /router/usr/src/sys/netinet/ipfw/ip_fw_table.c
--- sys_org/netinet/ipfw/ip_fw_table.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ipfw/ip_fw_table.c 2012-10-29 16:07:26.000000000 +0100
@@ -379,7 +379,7 @@
return (EINVAL);
}
- ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
+ ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh, NULL);
IPFW_WUNLOCK(ch);
if (ent == NULL)
@@ -396,7 +396,7 @@
struct table_entry *ent;
ent = (struct table_entry *)
- rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+ rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh, NULL);
if (ent != NULL)
free(ent, M_IPFW_TBL);
return (0);
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list