Reproduceable freeze with quotas enabled

Robert Watson rwatson at FreeBSD.org
Wed Nov 9 13:55:11 PST 2005


On Tue, 8 Nov 2005, Attila Nagy wrote:

> Any chance to investigate it further? It's a really annoying bug, which 
> makes quota support a little bit useless.

Attached is a patch that corrects at least one or two deadlock scenarios 
in UNIX domain socket garbage collecting (unpgc_task.diff).  I've also 
attached a patch for kern_descriptor.c that fixes a bug there in the 
passing of file descriptors referencing files over UNIX domain sockets 
(closef_threadnull.diff).  I've committed the latter to 7.x, with the 
intent to merge to 6.x in a week or so.  The larger UNIX domain socket 
garbage collection patch I need to think about some more, but I'm quite 
interested in whether it fixes the deadlock you're seeing (or for that 
matter, makes it worse somehow).

Thanks,

Robert N M Watson
-------------- next part --------------
Index: uipc_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.158
diff -u -r1.158 uipc_usrreq.c
--- uipc_usrreq.c	31 Oct 2005 15:41:25 -0000	1.158
+++ uipc_usrreq.c	9 Nov 2005 21:49:53 -0000
@@ -59,6 +59,7 @@
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
@@ -112,6 +113,13 @@
 #define	UNP_LOCK_ASSERT()	mtx_assert(&unp_mtx, MA_OWNED)
 #define	UNP_UNLOCK_ASSERT()	mtx_assert(&unp_mtx, MA_NOTOWNED)
 
+/*
+ * Garbage collection of cyclic file descriptor/socket references occurs
+ * asynchronously in a taskqueue context.  See unp_gc() for a full
+ * description.
+ */
+static struct task	unp_gc_task;
+
 static int     unp_attach(struct socket *);
 static void    unp_detach(struct unpcb *);
 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
@@ -120,7 +128,7 @@
 static void    unp_disconnect(struct unpcb *);
 static void    unp_shutdown(struct unpcb *);
 static void    unp_drop(struct unpcb *, int);
-static void    unp_gc(void);
+static void    unp_gc(__unused void *, int);
 static void    unp_scan(struct mbuf *, void (*)(struct file *));
 static void    unp_mark(struct file *);
 static void    unp_discard(struct file *);
@@ -795,19 +803,9 @@
 	}
 	soisdisconnected(unp->unp_socket);
 	unp->unp_socket->so_pcb = NULL;
-	if (unp_rights) {
-		/*
-		 * Normally the receive buffer is flushed later,
-		 * in sofree, but if our receive buffer holds references
-		 * to descriptors that are now garbage, we will dispose
-		 * of those descriptor references after the garbage collector
-		 * gets them (resulting in a "panic: closef: count < 0").
-		 */
-		sorflush(unp->unp_socket);
-		unp_gc();	/* Will unlock UNP. */
-	} else
-		UNP_UNLOCK();
-	UNP_UNLOCK_ASSERT();
+	if (unp_rights)
+		taskqueue_enqueue(taskqueue_thread, &unp_gc_task);
+	UNP_UNLOCK();
 	if (unp->unp_addr != NULL)
 		FREE(unp->unp_addr, M_SONAME);
 	uma_zfree(unp_zone, unp);
@@ -1395,7 +1393,7 @@
 	uma_zone_set_max(unp_zone, nmbclusters);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
-
+	TASK_INIT(&unp_gc_task, 0, unp_gc, NULL);
 	UNP_LOCK_INIT();
 }
 
@@ -1581,14 +1579,20 @@
 }
 
 /*
- * unp_defer is thread-local during garbage collection, and does not require
- * explicit synchronization.  unp_gcing prevents other threads from entering
- * garbage collection, and perhaps should be an sx lock instead.
+ * unp_defer indicates whether additional work has been defered for a future
+ * pass through unp_gc().  It is thread local and does not require explicit
+ * synchronization.
  */
-static int	unp_defer, unp_gcing;
+static int	unp_defer;
+
+static int unp_taskcount;
+SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+
+static int unp_recycled;
+SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
 
 static void
-unp_gc(void)
+unp_gc(__unused void *arg, int pending)
 {
 	struct file *fp, *nextfp;
 	struct socket *so;
@@ -1597,15 +1601,8 @@
 	int nfiles_snap;
 	int nfiles_slack = 20;
 
-	UNP_LOCK_ASSERT();
-
-	if (unp_gcing) {
-		UNP_UNLOCK();
-		return;
-	}
-	unp_gcing = 1;
+	unp_taskcount++;
 	unp_defer = 0;
-	UNP_UNLOCK();
 	/*
 	 * before going through all this, set all FDs to
 	 * be NOT defered and NOT externally accessible
@@ -1700,6 +1697,9 @@
 	} while (unp_defer);
 	sx_sunlock(&filelist_lock);
 	/*
+	 * XXXRW: The following comments need updating for a post-SMPng and
+	 * deferred unp_gc() world, but are still generally accurate.
+	 *
 	 * We grab an extra reference to each of the file table entries
 	 * that are not otherwise accessible and then free the rights
 	 * that are stored in messages on them.
@@ -1788,12 +1788,11 @@
 			FILE_UNLOCK(tfp);
 		}
 	}
-	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 		closef(*fpp, (struct thread *) NULL);
+		unp_recycled++;
+	}
 	free(extra_ref, M_TEMP);
-	unp_gcing = 0;
-
-	UNP_UNLOCK_ASSERT();
 }
 
 void
-------------- next part --------------
Index: kern_descrip.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.283
retrieving revision 1.284
diff -u -r1.283 -r1.284
--- kern_descrip.c	1 Nov 2005 17:13:05 -0000	1.283
+++ kern_descrip.c	9 Nov 2005 20:54:25 -0000	1.284
@@ -1880,9 +1880,13 @@
 	 * a flag in the unlock to free ONLY locks obeying POSIX
 	 * semantics, and not to free BSD-style file locks.
 	 * If the descriptor was in a message, POSIX-style locks
-	 * aren't passed with the descriptor.
+	 * aren't passed with the descripto, and the thread pointer
+	 * will be NULL.  Callers should be careful only to pass a
+	 * NULL thread pointer when there really is no owning
+	 * context that might have locks, or the locks will be
+	 * leaked.
 	 */
-	if (fp->f_type == DTYPE_VNODE) {
+	if (fp->f_type == DTYPE_VNODE && td != NULL) {
 		int vfslocked;
 
 		vp = fp->f_vnode;


More information about the freebsd-amd64 mailing list