5.4-STABLE panic in propagate_priority() and a tentative patch

Garry Belka garry at NetworkPhysics.COM
Fri Aug 5 21:36:18 GMT 2005


Hi,

We saw several panics of the same kind on different systems running 
5.4-STABLE. The panic was in propagate_priority() and was ultimately 
traced to vget() call in  pfs_vncache_alloc(). vget() is called under 
global pfs mutex. When vget() sleeps,
  propagate_priority() in a different thread comes across a sleeping 
thread that owns a blocked mutex, and that causes a panic.

a tentative patch for 5.4-STABLE is below. In addition to a fix for 
panic, it includes
changes to switch to LIST_*() macros instead of directly manipulating 
queue pointers.

I'd be most interested to hear opinion of people experienced with vfs
whether this patch is suitable or what problems they see with it.

In order to apply it to 6.0, I think it might be sufficient to
uncomment XXX-- comments, but I hadn't checked that: the patch also 
includes some 6.0 fixes from Isilon backported to 5.4, and some of those 
depend on other 6.0 vfs changes and will fail on 5.4 so they are 
partially commented out to make it work on 5.4.

Best,
Garry

--- pseudofs_internal.h 23 Dec 2004 00:36:09 -0000      1.1
+++ pseudofs_internal.h 5 Aug 2005 20:50:42 -0000       1.2
@@ -43,8 +43,9 @@
         struct pfs_node *pvd_pn;
         pid_t            pvd_pid;
         struct vnode    *pvd_vnode;
-       struct pfs_vdata*pvd_prev, *pvd_next;
+        LIST_ENTRY(pfs_vdata)  pvd_link;
  };
+

  /*
   * Vnode cache


--- pseudofs_vncache.c  23 Dec 2004 00:36:09 -0000      1.1
+++ pseudofs_vncache.c  5 Aug 2005 20:50:42 -0000       1.2
@@ -38,6 +38,7 @@
  #include <sys/proc.h>
  #include <sys/sysctl.h>
  #include <sys/vnode.h>
+#include <sys/queue.h>

  #include <fs/pseudofs/pseudofs.h>
  #include <fs/pseudofs/pseudofs_internal.h>
@@ -45,7 +46,8 @@
  static MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache");

  static struct mtx pfs_vncache_mutex;
-static struct pfs_vdata *pfs_vncache;
+static LIST_HEAD(, pfs_vdata) pfs_vncache_list =
+    LIST_HEAD_INITIALIZER(&pfs_vncache_list);
  static eventhandler_tag pfs_exit_tag;
  static void pfs_exit(void *arg, struct proc *p);

@@ -106,6 +108,7 @@
                   struct pfs_node *pn, pid_t pid)
  {
         struct pfs_vdata *pvd;
+        struct vnode *vnp;
         int error;

         /*
@@ -113,10 +116,10 @@
          * XXX linear search is not very efficient.
          */
         mtx_lock(&pfs_vncache_mutex);
-       for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) {
+        LIST_FOREACH(pvd, &pfs_vncache_list, pvd_link) {
                 if (pvd->pvd_pn == pn && pvd->pvd_pid == pid &&
                     pvd->pvd_vnode->v_mount == mp) {
-                       if (vget(pvd->pvd_vnode, 0, curthread) == 0) {
+                       if (vget(pvd->pvd_vnode, LK_NOWAIT, curthread) 
== 0) {
                                 ++pfs_vncache_hits;
                                 *vpp = pvd->pvd_vnode;
                                 mtx_unlock(&pfs_vncache_mutex);
@@ -127,6 +130,20 @@
                                 return (0);
                         }
                         /* XXX if this can happen, we're in trouble */
+                        /* the vnode is being cleaned.
+                         * need to wait until it's gone
+                         */
+                       vnp = pvd->pvd_vnode;
+                        vhold(vnp);
+                       mtx_unlock(&pfs_vncache_mutex);
+                        /*XXX-- VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); */
+                        if (vget(vnp, 0, curthread) == 0) {
+                                /* XXX shouldn't happen.  */
+                                vrele(vnp);
+                        }
+                        /*XXX-- VOP_UNLOCK(vnp, 0, curthread); */
+                        vdrop(vnp);
+                       mtx_lock(&pfs_vncache_mutex);
                         break;
                 }
         }
@@ -135,8 +152,6 @@

         /* nope, get a new one */
         MALLOC(pvd, struct pfs_vdata *, sizeof *pvd, M_PFSVNCACHE, 
M_WAITOK);
-       if (++pfs_vncache_entries > pfs_vncache_maxentries)
-               pfs_vncache_maxentries = pfs_vncache_entries;
         error = getnewvnode("pseudofs", mp, pfs_vnodeop_p, vpp);
         if (error) {
                 FREE(pvd, M_PFSVNCACHE);
@@ -176,12 +191,13 @@
         if ((pn->pn_flags & PFS_PROCDEP) != 0)
                 (*vpp)->v_vflag |= VV_PROCDEP;
         pvd->pvd_vnode = *vpp;
+
         mtx_lock(&pfs_vncache_mutex);
-       pvd->pvd_prev = NULL;
-       pvd->pvd_next = pfs_vncache;
-       if (pvd->pvd_next)
-               pvd->pvd_next->pvd_prev = pvd;
-       pfs_vncache = pvd;
+
+        LIST_INSERT_HEAD(&pfs_vncache_list, pvd, pvd_link);
+       if (++pfs_vncache_entries > pfs_vncache_maxentries)
+               pfs_vncache_maxentries = pfs_vncache_entries;
+
         mtx_unlock(&pfs_vncache_mutex);
          (*vpp)->v_vnlock->lk_flags |= LK_CANRECURSE;
         vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
@@ -199,15 +215,10 @@
         mtx_lock(&pfs_vncache_mutex);
         pvd = (struct pfs_vdata *)vp->v_data;
         KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n"));
-       if (pvd->pvd_next)
-               pvd->pvd_next->pvd_prev = pvd->pvd_prev;
-       if (pvd->pvd_prev)
-               pvd->pvd_prev->pvd_next = pvd->pvd_next;
-       else
-               pfs_vncache = pvd->pvd_next;
+        LIST_REMOVE(pvd, pvd_link);
+       --pfs_vncache_entries;
         mtx_unlock(&pfs_vncache_mutex);

-       --pfs_vncache_entries;
         FREE(pvd, M_PFSVNCACHE);
         vp->v_data = NULL;
         return (0);
@@ -222,6 +233,8 @@
         struct pfs_vdata *pvd;
         struct vnode *vnp;

+        if (LIST_EMPTY(&pfs_vncache_list))
+            return;
         mtx_lock(&Giant);
         /*
          * This is extremely inefficient due to the fact that vgone() not
@@ -237,16 +250,18 @@
          * this particular case would be a BST sorted by PID.
          */
         mtx_lock(&pfs_vncache_mutex);
-       pvd = pfs_vncache;
-       while (pvd != NULL) {
+    restart:
+        LIST_FOREACH(pvd, &pfs_vncache_list, pvd_link) {
                 if (pvd->pvd_pid == p->p_pid) {
                         vnp = pvd->pvd_vnode;
+                        vhold(vnp);
                         mtx_unlock(&pfs_vncache_mutex);
+                        /*XXX-- VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); */
                         vgone(vnp);
+                        /*XXX-- VOP_UNLOCK(vnp, 0, curthread); */
+                        vdrop(vnp);
                         mtx_lock(&pfs_vncache_mutex);
-                       pvd = pfs_vncache;
-               } else {
-                       pvd = pvd->pvd_next;
+                        goto restart;
                 }
         }
         mtx_unlock(&pfs_vncache_mutex);
@@ -267,16 +282,19 @@
         pn->pn_flags |= PFS_DISABLED;
         /* XXX see comment above nearly identical code in pfs_exit() */
         mtx_lock(&pfs_vncache_mutex);
-       pvd = pfs_vncache;
-       while (pvd != NULL) {
+    restart:
+        LIST_FOREACH(pvd, &pfs_vncache_list, pvd_link) {
                 if (pvd->pvd_pn == pn) {
                         vnp = pvd->pvd_vnode;
+                        vhold(vnp);
                         mtx_unlock(&pfs_vncache_mutex);
+                        /*XXX-- VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); */
                         vgone(vnp);
+                        /*XXX-- VOP_UNLOCK(vnp, 0, curthread); */
+                        vdrop(vnp);
+
                         mtx_lock(&pfs_vncache_mutex);
-                       pvd = pfs_vncache;
-               } else {
-                       pvd = pvd->pvd_next;
+                        goto restart;
                 }
         }
         mtx_unlock(&pfs_vncache_mutex);




More information about the freebsd-stable mailing list