=?Windows-1252?Q?kern/185043:_Kernel_panic:_Sleeping_thread_(tid_=85, _pid?= …) owns a non-sleepable lock from netinet/in_multi.c

Bentkofsky, Michael MBentkofsky at verisign.com
Sat Dec 21 03:27:23 UTC 2013


I have just submitted PR kern/185043 and wanted to follow-up to freebsd-net with a patch that fixes the obvious paths leaving in_multi_lock incorrectly locked.

Index: sys/netinet/in_mcast.c
===================================================================
--- sys/netinet/in_mcast.c (revision 259264)
+++ sys/netinet/in_mcast.c (working copy)
@@ -1492,7 +1492,7 @@
    error = inm_merge(inm, imf);
    if (error) {
          CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-          goto out_imf_rollback;
+          goto out_in_multi_locked;
    }
     CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -1500,6 +1500,8 @@
    if (error)
          CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
 out_imf_rollback:
@@ -2168,8 +2170,12 @@
    if (is_new) {
          error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
              &inm);
-          if (error)
+          if (error) {
+                        CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
+                            __func__);
+                        IN_MULTI_UNLOCK();
               goto out_imo_free;
+                }
          imo->imo_membership[idx] = inm;
    } else {
          CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2177,20 +2183,21 @@
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
          CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
          error = igmp_change_state(inm);
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
    }
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
-out_imf_rollback:
    INP_WLOCK_ASSERT(inp);
    if (error) {
          imf_rollback(imf);
@@ -2394,7 +2401,7 @@
          if (error) {
               CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
                   __func__);
-               goto out_imf_rollback;
+               goto out_in_multi_locked;
          }
           CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2405,9 +2412,10 @@
          }
    }
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
-out_imf_rollback:
    if (error)
          imf_rollback(imf);
    else
@@ -2641,7 +2649,7 @@
    error = inm_merge(inm, imf);
    if (error) {
          CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-          goto out_imf_rollback;
+          goto out_in_multi_locked;
    }
     CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2649,6 +2657,8 @@
    if (error)
          CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
    IN_MULTI_UNLOCK();
 out_imf_rollback:

===== End of patch =====

The PR also cites a follow-up problem that this patch does not address, namely that the same technique for causing the problem (subscribing to a multicast address using quagga’s ospfd and doing /etc/rc.d/netif restart) can result in a race condition leading to this kernel assertion: if_freemulti: protospec not NULL. The PR describes the particular execution path and comments suggesting this might be a known problem. Since I have triggered the case, I am interested in patching this as well and need more time to consider a more thorough solution. Input is appreciated, of course.

Hopefully the patch above is obvious enough to be accepted.

“This message (including any attachments) is intended only for the use of the individual or entity to which it is addressed, and may contain information that is non-public, proprietary, privileged, confidential and exempt from disclosure under applicable law or may be constituted as attorney work product. If you are not the intended recipient, you are hereby notified that any use, dissemination, distribution, or copying of this communication is strictly prohibited. If you have received this message in error, notify sender immediately and delete this message immediately.”


More information about the freebsd-net mailing list