panic in rt_check_fib()
Julian Elischer
julian at elischer.org
Sun Sep 14 07:24:27 UTC 2008
Julian Elischer wrote:
> To recap on this, I rewrote this function a couple of week sagobecause I
> couldn't keep track of what was going on, and I thought it might
> havesome bad edge cases. a couple of days later Giorgos contacted me
> saying hta the had a fairly reproducible situation
> where this was triggered and it appeared to be an edge case in
> this function that allowed it to try lock the same lock twice.
>
> I immediatly thought "ah=hah!" I may have a solution to this,
> and gave him a copy of my new function and indead it DOES fix that
> panic. however after deleting and recreating intefaces a few hundred
> times without crashing in rt_check_fib() it then fails somewhere else,
> (actually it leacks some resources and eventually networking stops).
>
> I'm not convinced that is a problem with the new or old rt_check() but
> it did stop me from just committing the new code.
>
> I rereading the way the function (did and still does) work it
> occurred to me that there was a large flaw in teh way it worked..
>
> It dropped a the lock on one route while it went off an did something
> else that might block, On returning it blindly re-grabbed that lock,
> completely ignoring the fact that the route might not even be valid any
> more. (or any of several other things that may have changed while
> it was away (maybe sleeping)).
>
> the code Giorgos is referring to is a patch I suggested to him to
> fix this oversight and not the one that I originally tested and
> had suggested to fix the edge case.
>
> I do however ask that some other people look at this patch!
>
> Julian
>
here's the version of the patch I ended up with...
-------------- next part --------------
Index: route.c
===================================================================
--- route.c (revision 183012)
+++ route.c (working copy)
@@ -1676,18 +1676,39 @@
* *lrt0 points to the cached route to the final destination;
* *lrt is not meaningful;
* fibnum is the index to the correct network fib for this packet
+ * XXX maybe we should/could get it from (*lrt0)->rt_fibnum
+ * instead of bringing it in here? I guess it depends on the
+ * call graph of how we get here.
+ * (*lrt0 has no ref held on it by us so REMREF is not needed.
+ * Refs only account for major structural references and not usages,
+ * which is actually a bit of a problem.)
*
* === Operation ===
* If the route is marked down try to find a new route. If the route
* to the gateway is gone, try to setup a new route. Otherwise,
* if the route is marked for packets to be rejected, enforce that.
+ * Note that rtalloc returns an rtentry with an extra REF that we may
+ * need to lose.
*
* === On return ===
* *dst is unchanged;
* *lrt0 points to the (possibly new) route to the final destination
- * *lrt points to the route to the next hop
+ * *lrt points to the route to the next hop [LOCKED]
*
* Their values are meaningful ONLY if no error is returned.
+ *
+ * To follow this you have to remember that:
+ * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!)
+ * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1)
+ * and an RT_UNLOCK
+ * RTFREE does an RT_LOCK and an RTFREE_LOCKED
+ * The gwroute pointer counts as a reference on the rtentry to which it points.
+ * so when we add it we use the ref that rtalloc gives us and when we lose it
+ * we need to remove the reference.
+ * RT_TEMP_UNLOCK does an RT_ADDREF before freeing the lock, and
+ * RT_RELOCK locks it (it can't have gone away due to the ref) and
+ * drops the ref, possibly freeing it (unocking in the process) if the
+ * ref goes to 0. (and zeroing the pointer if we do).
*/
int
rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
@@ -1701,58 +1722,82 @@
{
struct rtentry *rt;
struct rtentry *rt0;
- int error;
KASSERT(*lrt0 != NULL, ("rt_check"));
- rt = rt0 = *lrt0;
+ rt0 = *lrt0;
+ rt = NULL;
/* NB: the locking here is tortuous... */
- RT_LOCK(rt);
- if ((rt->rt_flags & RTF_UP) == 0) {
- RT_UNLOCK(rt);
- rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
- if (rt != NULL) {
- RT_REMREF(rt);
- /* XXX what about if change? */
- } else
+ RT_LOCK(rt0);
+retry:
+ if (rt0 && (rt0->rt_flags & RTF_UP) == 0) {
+ /* Current rt0 is useless, try get a replacement. */
+ RT_UNLOCK(rt0);
+ rt0 = NULL;
+ }
+ if (rt0 == NULL) {
+ rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum);
+ if (rt0 == NULL) {
return (EHOSTUNREACH);
- rt0 = rt;
+ }
+ RT_REMREF(rt0); /* don't need the reference. */
}
- /* XXX BSD/OS checks dst->sa_family != AF_NS */
- if (rt->rt_flags & RTF_GATEWAY) {
- if (rt->rt_gwroute == NULL)
- goto lookup;
- rt = rt->rt_gwroute;
- RT_LOCK(rt); /* NB: gwroute */
- if ((rt->rt_flags & RTF_UP) == 0) {
- RTFREE_LOCKED(rt); /* unlock gwroute */
- rt = rt0;
- rt0->rt_gwroute = NULL;
- lookup:
- RT_UNLOCK(rt0);
-/* XXX MRT link level looked up in table 0 */
- rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
- if (rt == rt0) {
- RT_REMREF(rt0);
- RT_UNLOCK(rt0);
+
+ if (rt0->rt_flags & RTF_GATEWAY) {
+ if ((rt = rt0->rt_gwroute) != NULL) {
+ RT_LOCK(rt); /* NB: gwroute */
+ if ((rt->rt_flags & RTF_UP) == 0) {
+ /* gw route is dud. ignore/lose it */
+ RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */
+ rt = rt0->rt_gwroute = NULL;
+ }
+ }
+
+ if (rt == NULL) { /* NOT AN ELSE CLAUSE */
+ RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */
+ rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum);
+ if ((rt == rt0) || (rt == NULL)) {
+ /* the best we can do is not good enough */
+ if (rt) {
+ RT_REMREF(rt); /* assumes ref > 0 */
+ RT_UNLOCK(rt);
+ }
+ RTFREE(rt0); /* lock, unref, (unlock) */
return (ENETUNREACH);
}
- RT_LOCK(rt0);
- if (rt0->rt_gwroute != NULL)
- RTFREE(rt0->rt_gwroute);
- rt0->rt_gwroute = rt;
- if (rt == NULL) {
- RT_UNLOCK(rt0);
- return (EHOSTUNREACH);
+ /*
+ * Relock it and lose the added reference.
+ * All sorts of things could have happenned while we
+ * had no lock on it, so check for them.
+ */
+ RT_RELOCK(rt0);
+ if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0))
+ /* Ru-roh.. what we had is no longer any good */
+ goto retry;
+ /*
+ * While we were away, someone replaced the gateway.
+ * Since a reference count is involved we can't just
+ * overwrite it.
+ */
+ if (rt0->rt_gwroute) {
+ if (rt0->rt_gwroute != rt) {
+ RTFREE_LOCKED(rt);
+ goto retry;
+ }
+ } else {
+ rt0->rt_gwroute = rt;
}
}
+ RT_LOCK_ASSERT(rt);
RT_UNLOCK(rt0);
+ } else {
+ /* think of rt as having the lock from now on.. */
+ rt = rt0;
}
/* XXX why are we inspecting rmx_expire? */
- error = (rt->rt_flags & RTF_REJECT) &&
- (rt->rt_rmx.rmx_expire == 0 ||
- time_uptime < rt->rt_rmx.rmx_expire);
- if (error) {
+ if ((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_rmx.rmx_expire == 0 ||
+ time_uptime < rt->rt_rmx.rmx_expire)) {
RT_UNLOCK(rt);
return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
}
Index: route.h
===================================================================
--- route.h (revision 183012)
+++ route.h (working copy)
@@ -316,21 +316,37 @@
} while (0)
#define RTFREE_LOCKED(_rt) do { \
- if ((_rt)->rt_refcnt <= 1) \
- rtfree(_rt); \
- else { \
- RT_REMREF(_rt); \
- RT_UNLOCK(_rt); \
- } \
- /* guard against invalid refs */ \
- _rt = 0; \
- } while (0)
+ if ((_rt)->rt_refcnt <= 1) \
+ rtfree(_rt); \
+ else { \
+ RT_REMREF(_rt); \
+ RT_UNLOCK(_rt); \
+ } \
+ /* guard against invalid refs */ \
+ _rt = 0; \
+} while (0)
#define RTFREE(_rt) do { \
- RT_LOCK(_rt); \
- RTFREE_LOCKED(_rt); \
- } while (0)
+ RT_LOCK(_rt); \
+ RTFREE_LOCKED(_rt); \
+} while (0)
+#define RT_TEMP_UNLOCK(_rt) do { \
+ RT_ADDREF(_rt); \
+ RT_UNLOCK(_rt); \
+} while (0)
+
+#define RT_RELOCK(_rt) do { \
+ RT_LOCK(_rt); \
+ if ((_rt)->rt_refcnt <= 1) { \
+ rtfree(_rt); \
+ _rt = 0; /* signal that it went away */ \
+ } else { \
+ RT_REMREF(_rt); \
+ /* note that _rt is still valid */ \
+ } \
+} while (0)
+
extern struct radix_node_head *rt_tables[][AF_MAX+1];
struct ifmultiaddr;
More information about the freebsd-current
mailing list