svn commit: r185747 - in head/sys: contrib/pf/net kern net netinet6

Kip Macy kmacy at FreeBSD.org
Sun Dec 7 13:15:44 PST 2008


Author: kmacy
Date: Sun Dec  7 21:15:43 2008
New Revision: 185747
URL: http://svn.freebsd.org/changeset/base/185747

Log:
   - convert radix node head lock from mutex to rwlock
   - make radix node head lock not recursive
   - fix LOR in rtexpunge
   - fix LOR in rtredirect
  
  Reviewed by:	sam

Modified:
  head/sys/contrib/pf/net/pf_table.c
  head/sys/kern/subr_witness.c
  head/sys/kern/vfs_export.c
  head/sys/net/radix.c
  head/sys/net/radix.h
  head/sys/net/route.c
  head/sys/net/route.h
  head/sys/net/rtsock.c
  head/sys/netinet6/in6_rmx.c
  head/sys/netinet6/nd6_rtr.c

Modified: head/sys/contrib/pf/net/pf_table.c
==============================================================================
--- head/sys/contrib/pf/net/pf_table.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/contrib/pf/net/pf_table.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #ifdef __FreeBSD__
 #include <sys/malloc.h>
 #endif

Modified: head/sys/kern/subr_witness.c
==============================================================================
--- head/sys/kern/subr_witness.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/kern/subr_witness.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -505,7 +505,7 @@ static struct witness_order_list_entry o
 	 * Routing
 	 */
 	{ "so_rcv", &lock_class_mtx_sleep },
-	{ "radix node head", &lock_class_mtx_sleep },
+	{ "radix node head", &lock_class_rw },
 	{ "rtentry", &lock_class_mtx_sleep },
 	{ "ifaddr", &lock_class_mtx_sleep },
 	{ NULL, NULL },

Modified: head/sys/kern/vfs_export.c
==============================================================================
--- head/sys/kern/vfs_export.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/kern/vfs_export.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
+#include <sys/rwlock.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
@@ -425,10 +426,10 @@ vfs_export_lookup(struct mount *mp, stru
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
-				RADIX_NODE_HEAD_LOCK(rnh);
+				RADIX_NODE_HEAD_RLOCK(rnh);
 				np = (struct netcred *)
 				    (*rnh->rnh_matchaddr)(saddr, rnh);
-				RADIX_NODE_HEAD_UNLOCK(rnh);
+				RADIX_NODE_HEAD_RUNLOCK(rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}

Modified: head/sys/net/radix.c
==============================================================================
--- head/sys/net/radix.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/net/radix.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -38,6 +38,7 @@
 #ifdef	_KERNEL
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>

Modified: head/sys/net/radix.h
==============================================================================
--- head/sys/net/radix.h	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/net/radix.h	Sun Dec  7 21:15:43 2008	(r185747)
@@ -36,6 +36,7 @@
 #ifdef _KERNEL
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
+#include <sys/_rwlock.h>
 #endif
 
 #ifdef MALLOC_DECLARE
@@ -132,7 +133,7 @@ struct radix_node_head {
 	struct	radix_node rnh_nodes[3];	/* empty tree for common case */
 	int	rnh_multipath;			/* multipath capable ? */
 #ifdef _KERNEL
-	struct	mtx rnh_mtx;			/* locks entire radix tree */
+	struct	rwlock rnh_lock;		/* locks entire radix tree */
 #endif
 };
 
@@ -146,11 +147,17 @@ struct radix_node_head {
 #define Free(p) free((caddr_t)p, M_RTABLE);
 
 #define	RADIX_NODE_HEAD_LOCK_INIT(rnh)	\
-    mtx_init(&(rnh)->rnh_mtx, "radix node head", NULL, MTX_DEF | MTX_RECURSE)
-#define	RADIX_NODE_HEAD_LOCK(rnh)	mtx_lock(&(rnh)->rnh_mtx)
-#define	RADIX_NODE_HEAD_UNLOCK(rnh)	mtx_unlock(&(rnh)->rnh_mtx)
-#define	RADIX_NODE_HEAD_DESTROY(rnh)	mtx_destroy(&(rnh)->rnh_mtx)
-#define	RADIX_NODE_HEAD_LOCK_ASSERT(rnh) mtx_assert(&(rnh)->rnh_mtx, MA_OWNED)
+    rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
+#define	RADIX_NODE_HEAD_LOCK(rnh)	rw_wlock(&(rnh)->rnh_lock)
+#define	RADIX_NODE_HEAD_UNLOCK(rnh)	rw_wunlock(&(rnh)->rnh_lock)
+#define	RADIX_NODE_HEAD_RLOCK(rnh)	rw_rlock(&(rnh)->rnh_lock)
+#define	RADIX_NODE_HEAD_RUNLOCK(rnh)	rw_runlock(&(rnh)->rnh_lock)
+#define	RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh)	rw_try_upgrade(&(rnh)->rnh_lock)
+
+
+#define	RADIX_NODE_HEAD_DESTROY(rnh)	rw_destroy(&(rnh)->rnh_lock)
+#define	RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED)
+#define	RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
 #endif /* _KERNEL */
 
 void	 rn_init(void);

Modified: head/sys/net/route.c
==============================================================================
--- head/sys/net/route.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/net/route.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -45,6 +45,7 @@
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
@@ -269,7 +270,8 @@ rtalloc1_fib(struct sockaddr *dst, int r
 	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	u_long nflags;
-	int err = 0, msgtype = RTM_MISS;
+	int needresolve = 0, err = 0, msgtype = RTM_MISS;
+	int needlock;
 
 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
 	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
@@ -283,59 +285,92 @@ rtalloc1_fib(struct sockaddr *dst, int r
 		V_rtstat.rts_unreach++;
 		goto miss2;
 	}
-	RADIX_NODE_HEAD_LOCK(rnh);
-	if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
-	    (rn->rn_flags & RNF_ROOT) == 0) {
-		/*
-		 * If we find it and it's not the root node, then
-		 * get a reference on the rtentry associated.
-		 */
+	needlock = !(ignflags & RTF_RNH_LOCKED);
+retry:
+	if (needlock)
+		RADIX_NODE_HEAD_RLOCK(rnh);
+#ifdef INVARIANTS	
+	else
+		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+#endif
+	rn = rnh->rnh_matchaddr(dst, rnh);
+	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+
 		newrt = rt = RNTORT(rn);
 		nflags = rt->rt_flags & ~ignflags;
 		if (report && (nflags & RTF_CLONING)) {
-			/*
-			 * We are apparently adding (report = 0 in delete).
-			 * If it requires that it be cloned, do so.
-			 * (This implies it wasn't a HOST route.)
-			 */
-			err = rtrequest_fib(RTM_RESOLVE, dst, NULL,
-					      NULL, 0, &newrt, fibnum);
-			if (err) {
+			if (needlock && !RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh)) {
+				RADIX_NODE_HEAD_RUNLOCK(rnh);
+				RADIX_NODE_HEAD_LOCK(rnh);
 				/*
-				 * If the cloning didn't succeed, maybe
-				 * what we have will do. Return that.
+				 * lookup again to make sure it wasn't changed
 				 */
-				newrt = rt;		/* existing route */
-				RT_LOCK(newrt);
-				RT_ADDREF(newrt);
-				goto miss;
-			}
-			KASSERT(newrt, ("no route and no error"));
-			RT_LOCK(newrt);
-			if (newrt->rt_flags & RTF_XRESOLVE) {
-				/*
-				 * If the new route specifies it be
-				 * externally resolved, then go do that.
-				 */
-				msgtype = RTM_RESOLVE;
-				goto miss;
-			}
-			/* Inform listeners of the new route. */
-			bzero(&info, sizeof(info));
-			info.rti_info[RTAX_DST] = rt_key(newrt);
-			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
-			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
-			if (newrt->rt_ifp != NULL) {
-				info.rti_info[RTAX_IFP] =
-				    newrt->rt_ifp->if_addr->ifa_addr;
-				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
+				rn = rnh->rnh_matchaddr(dst, rnh);
+				if (!(rn && ((rn->rn_flags & RNF_ROOT) == 0))) {
+					RADIX_NODE_HEAD_UNLOCK(rnh);
+					needresolve = 0;
+					log(LOG_INFO, "retrying route lookup ...\n");
+					goto retry;
+				}
 			}
-			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
+			needresolve = 1;
 		} else {
 			RT_LOCK(newrt);
 			RT_ADDREF(newrt);
+			if (needlock)
+				RADIX_NODE_HEAD_RUNLOCK(rnh);
+			goto done;
 		}
-		RADIX_NODE_HEAD_UNLOCK(rnh);
+	}
+	/*
+	 * if needresolve is set then we have the exclusive lock
+	 *  and we need to keep it held for the benefit of rtrequest_fib
+	 */
+	if (!needresolve && needlock)
+		RADIX_NODE_HEAD_RUNLOCK(rnh);
+	
+	if (needresolve) {
+		RADIX_NODE_HEAD_WLOCK_ASSERT(rnh);
+		/*
+		 * We are apparently adding (report = 0 in delete).
+		 * If it requires that it be cloned, do so.
+		 * (This implies it wasn't a HOST route.)
+		 */
+		err = rtrequest_fib(RTM_RESOLVE, dst, NULL,
+		    NULL, RTF_RNH_LOCKED, &newrt, fibnum);
+		if (err) {
+			/*
+			 * If the cloning didn't succeed, maybe
+			 * what we have will do. Return that.
+			 */
+			newrt = rt;		/* existing route */
+			RT_LOCK(newrt);
+			RT_ADDREF(newrt);
+			goto miss;
+		}
+		KASSERT(newrt, ("no route and no error"));
+		RT_LOCK(newrt);
+		if (newrt->rt_flags & RTF_XRESOLVE) {
+			/*
+			 * If the new route specifies it be
+			 * externally resolved, then go do that.
+			 */
+			msgtype = RTM_RESOLVE;
+			goto miss;
+		}
+		/* Inform listeners of the new route. */
+		bzero(&info, sizeof(info));
+		info.rti_info[RTAX_DST] = rt_key(newrt);
+		info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
+		info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
+		if (newrt->rt_ifp != NULL) {
+			info.rti_info[RTAX_IFP] =
+			    newrt->rt_ifp->if_addr->ifa_addr;
+			info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
+		}
+		rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
+		if (needlock)
+			RADIX_NODE_HEAD_UNLOCK(rnh);
 	} else {
 		/*
 		 * Either we hit the root or couldn't find any match,
@@ -344,7 +379,8 @@ rtalloc1_fib(struct sockaddr *dst, int r
 		 */
 		V_rtstat.rts_unreach++;
 	miss:
-		RADIX_NODE_HEAD_UNLOCK(rnh);
+		if (needlock && needresolve)
+			RADIX_NODE_HEAD_UNLOCK(rnh);
 	miss2:	if (report) {
 			/*
 			 * If required, report the failure to the supervising
@@ -356,6 +392,7 @@ rtalloc1_fib(struct sockaddr *dst, int r
 			rt_missmsg(msgtype, &info, 0, err);
 		}
 	}
+done:
 	if (newrt)
 		RT_LOCK_ASSERT(newrt);
 	return (newrt);
@@ -475,6 +512,8 @@ rtredirect_fib(struct sockaddr *dst,
 	short *stat = NULL;
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
+	struct radix_node_head *rnh =
+	    V_rt_tables[rt->rt_fibnum][dst->sa_family];
 
 	/* verify the gateway is directly reachable */
 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
@@ -524,14 +563,17 @@ rtredirect_fib(struct sockaddr *dst,
 			info.rti_info[RTAX_NETMASK] = netmask;
 			info.rti_ifa = ifa;
 			info.rti_flags = flags;
+			if (rt0 != NULL)
+				RT_UNLOCK(rt0);	/* drop lock to avoid LOR with RNH */
 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
 			if (rt != NULL) {
 				RT_LOCK(rt);
-				EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
+				if (rt0 != NULL)
+					EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
 				flags = rt->rt_flags;
 			}
-			if (rt0)
-				RTFREE_LOCKED(rt0);
+			if (rt0 != NULL)
+				RTFREE(rt0);
 			
 			stat = &V_rtstat.rts_dynamic;
 		} else {
@@ -547,8 +589,12 @@ rtredirect_fib(struct sockaddr *dst,
 			/*
 			 * add the key and gateway (in one malloc'd chunk).
 			 */
+			RT_UNLOCK(rt);
+			RADIX_NODE_HEAD_LOCK(rnh);
+			RT_LOCK(rt);
 			rt_setgate(rt, rt_key(rt), gateway);
-			gwrt = rtalloc1(gateway, 1, 0);
+			gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
+			RADIX_NODE_HEAD_UNLOCK(rnh);
 			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
 			RTFREE_LOCKED(gwrt);
 		}
@@ -782,7 +828,9 @@ rtexpunge(struct rtentry *rt)
 	struct ifaddr *ifa;
 	int error = 0;
 
+	rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
 	RT_LOCK_ASSERT(rt);
+	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
 #if 0
 	/*
 	 * We cannot assume anything about the reference count
@@ -798,8 +846,6 @@ rtexpunge(struct rtentry *rt)
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
-	RADIX_NODE_HEAD_LOCK(rnh);
-
 	/*
 	 * Remove the item from the tree; it should be there,
 	 * but when callers invoke us blindly it may not (sigh).
@@ -854,7 +900,6 @@ rtexpunge(struct rtentry *rt)
 	 */
 	V_rttrash++;
 bad:
-	RADIX_NODE_HEAD_UNLOCK(rnh);
 	return (error);
 }
 
@@ -869,7 +914,7 @@ rtrequest1_fib(int req, struct rt_addrin
 				u_int fibnum)
 {
 	INIT_VNET_NET(curvnet);
-	int error = 0;
+	int error = 0, needlock = 0;
 	register struct rtentry *rt;
 	register struct radix_node *rn;
 	register struct radix_node_head *rnh;
@@ -886,7 +931,10 @@ rtrequest1_fib(int req, struct rt_addrin
 	rnh = V_rt_tables[fibnum][dst->sa_family];
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
-	RADIX_NODE_HEAD_LOCK(rnh);
+	needlock = ((flags & RTF_RNH_LOCKED) == 0);
+	flags &= ~RTF_RNH_LOCKED;
+	if (needlock)
+		RADIX_NODE_HEAD_LOCK(rnh);
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree, nor do we want to clone it.
@@ -1200,7 +1248,8 @@ deldone:
 		error = EOPNOTSUPP;
 	}
 bad:
-	RADIX_NODE_HEAD_UNLOCK(rnh);
+	if (needlock)
+		RADIX_NODE_HEAD_UNLOCK(rnh);
 	return (error);
 #undef senderr
 }
@@ -1307,7 +1356,8 @@ rt_setgate(struct rtentry *rt, struct so
 
 again:
 	RT_LOCK_ASSERT(rt);
-
+	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+	
 	/*
 	 * A host route with the destination equal to the gateway
 	 * will interfere with keeping LLINFO in the routing
@@ -1333,7 +1383,7 @@ again:
 		struct rtentry *gwrt;
 
 		RT_UNLOCK(rt);		/* XXX workaround LOR */
-		gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum);
+		gwrt = rtalloc1_fib(gate, 1, RTF_RNH_LOCKED, rt->rt_fibnum);
 		if (gwrt == rt) {
 			RT_REMREF(rt);
 			return (EADDRINUSE); /* failure */
@@ -1404,12 +1454,8 @@ again:
 
 		arg.rnh = rnh;
 		arg.rt0 = rt;
-		RT_UNLOCK(rt);		/* XXX workaround LOR */
-		RADIX_NODE_HEAD_LOCK(rnh);
-		RT_LOCK(rt);
 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 				       rt_fixchange, &arg);
-		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 	return 0;

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/net/route.h	Sun Dec  7 21:15:43 2008	(r185747)
@@ -196,6 +196,7 @@ struct ortentry {
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
 					/* 0x1000000 and up unassigned */
+#define	RTF_RNH_LOCKED	 0x40000000	/* radix node head locked by caller */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\

Modified: head/sys/net/rtsock.c
==============================================================================
--- head/sys/net/rtsock.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/net/rtsock.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -43,6 +43,7 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -554,11 +555,11 @@ route_output(struct mbuf *m, struct sock
 		rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
-		RADIX_NODE_HEAD_LOCK(rnh);
+		RADIX_NODE_HEAD_RLOCK(rnh);
 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
 			info.rti_info[RTAX_NETMASK], rnh);
 		if (rt == NULL) {	/* XXX looks bogus */
-			RADIX_NODE_HEAD_UNLOCK(rnh);
+			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			senderr(ESRCH);
 		}
 #ifdef RADIX_MPATH
@@ -574,14 +575,14 @@ route_output(struct mbuf *m, struct sock
 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
 			if (!rt) {
-				RADIX_NODE_HEAD_UNLOCK(rnh);
+				RADIX_NODE_HEAD_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		}
 #endif
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
-		RADIX_NODE_HEAD_UNLOCK(rnh);
+		RADIX_NODE_HEAD_RUNLOCK(rnh);
 
 		/* 
 		 * Fix for PR: 82974

Modified: head/sys/netinet6/in6_rmx.c
==============================================================================
--- head/sys/netinet6/in6_rmx.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/netinet6/in6_rmx.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
+#include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 #include <sys/vimage.h>

Modified: head/sys/netinet6/nd6_rtr.c
==============================================================================
--- head/sys/netinet6/nd6_rtr.c	Sun Dec  7 19:42:20 2008	(r185746)
+++ head/sys/netinet6/nd6_rtr.c	Sun Dec  7 21:15:43 2008	(r185747)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
+#include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/vimage.h>


More information about the svn-src-all mailing list