svn commit: r201879 - in head: . share/man/man9 sys/conf sys/kern sys/sys

Attilio Rao attilio at FreeBSD.org
Sat Jan 9 01:46:39 UTC 2010


Author: attilio
Date: Sat Jan  9 01:46:38 2010
New Revision: 201879
URL: http://svn.freebsd.org/changeset/base/201879

Log:
  Introduce the new kernel thread called "deadlock resolver".
  While the name is pretentious, a good explanation of its targets is
  reported in this 17 months old presentation e-mail:
  http://lists.freebsd.org/pipermail/freebsd-arch/2008-August/008452.html
  
  In order to implement it, the sq_type in sleepqueues is mandatory and not
  only compiled along with INVARIANTS option. Additively, a new sleepqueue
  function, sleepq_type() is added, returning the type of the sleepqueue
  linked to a wchan.
  Three new sysctls are added in order to configure the thread:
  debug.deadlkres.slptime_threshold
  debug.deadlkres.blktime_threshold
  debug.deadlkres.sleepfreq
  
  rappresenting the thresholds for sleep and block time that will lead to
  a deadlock matching (when exceeded), while the sleepfreq rappresents the
  number of seconds between 2 consecutive thread runnings.
  In order to enable the deadlock resolver thread recompile your kernel
  with the option DEADLKRES.
  
  Reviewed by:	jeff
  Tested by:	pho, Giovanni Trematerra
  Sponsored by:	Nokia Incorporated, Sandvine Incorporated
  MFC after:	2 weeks

Modified:
  head/UPDATING
  head/share/man/man9/sleepqueue.9
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/kern/kern_clock.c
  head/sys/kern/subr_sleepqueue.c
  head/sys/kern/subr_turnstile.c
  head/sys/sys/proc.h
  head/sys/sys/sleepqueue.h

Modified: head/UPDATING
==============================================================================
--- head/UPDATING	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/UPDATING	Sat Jan  9 01:46:38 2010	(r201879)
@@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.
 	machines to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20100108:
+	Introduce the kernel thread "deadlock resolver" (which can be enabled
+	via the DEADLKRES option, see NOTES for more details) and the
+	sleepq_type() function for sleepqueues.
+
 20091202:
 	The rc.firewall and rc.firewall6 were unified, and
 	rc.firewall6 and rc.d/ip6fw were removed.

Modified: head/share/man/man9/sleepqueue.9
==============================================================================
--- head/share/man/man9/sleepqueue.9	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/share/man/man9/sleepqueue.9	Sat Jan  9 01:46:38 2010	(r201879)
@@ -23,7 +23,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd December 12, 2009
+.Dd January 8, 2010
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
@@ -44,6 +44,7 @@
 .Nm sleepq_sleepcnt ,
 .Nm sleepq_timedwait ,
 .Nm sleepq_timedwait_sig ,
+.Nm sleepq_type ,
 .Nm sleepq_wait ,
 .Nm sleepq_wait_sig
 .Nd manage the queues of sleeping threads
@@ -84,6 +85,8 @@
 .Fn sleepq_timedwait "void *wchan"
 .Ft int
 .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
+.Ft int
+.Fn sleepq_type "void *wchan"
 .Ft void
 .Fn sleepq_wait "void *wchan"
 .Ft int
@@ -366,6 +369,12 @@ given a
 .Fa wchan .
 .Pp
 The
+.Fn sleepq_type
+function returns the type of
+.Fa wchan
+associated to a sleepqueue.
+.Pp
+The
 .Fn sleepq_abort ,
 .Fn sleepq_broadcast ,
 and

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/conf/NOTES	Sat Jan  9 01:46:38 2010	(r201879)
@@ -2531,6 +2531,11 @@ options 	BOOTP_BLOCKSIZE=8192 # Override
 options 	SW_WATCHDOG
 
 #
+# Add the software deadlock resolver thread.
+#
+options		DEADLKRES
+
+#
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/conf/options	Sat Jan  9 01:46:38 2010	(r201879)
@@ -72,6 +72,7 @@ COMPAT_FREEBSD6	opt_compat.h
 COMPAT_FREEBSD7	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
+DEADLKRES	opt_watchdog.h
 DIRECTIO
 FULL_PREEMPTION	opt_sched.h
 IPI_PREEMPTION	opt_sched.h

Modified: head/sys/kern/kern_clock.c
==============================================================================
--- head/sys/kern/kern_clock.c	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/kern/kern_clock.c	Sat Jan  9 01:46:38 2010	(r201879)
@@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/callout.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
-#include <sys/lock.h>
+#include <sys/kthread.h>
 #include <sys/ktr.h>
+#include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
+#include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
 
+#ifdef DEADLKRES
+static int slptime_threshold = 1800;
+static int blktime_threshold = 900;
+static int sleepfreq = 3;
+
+static void
+deadlkres(void)
+{
+	struct proc *p;
+	struct thread *td;
+	void *wchan;
+	int blkticks, slpticks, slptype, tryl, tticks;
+
+	tryl = 0;
+	for (;;) {
+		blkticks = blktime_threshold * hz;
+		slpticks = slptime_threshold * hz;
+
+		/*
+		 * Avoid to sleep on the sx_lock in order to avoid a possible
+		 * priority inversion problem leading to starvation.
+		 * If the lock can't be held after 100 tries, panic.
+		 */
+		if (!sx_try_slock(&allproc_lock)) {
+			if (tryl > 100)
+		panic("%s: possible deadlock detected on allproc_lock\n",
+				    __func__);
+			tryl++;
+			pause("allproc_lock deadlkres", sleepfreq * hz);
+			continue;
+		}
+		tryl = 0;
+		FOREACH_PROC_IN_SYSTEM(p) {
+			PROC_LOCK(p);
+			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
+				if (TD_ON_LOCK(td)) {
+
+					/*
+					 * The thread should be blocked on a
+					 * turnstile, simply check if the
+					 * turnstile channel is in good state.
+					 */
+					MPASS(td->td_blocked != NULL);
+					tticks = ticks - td->td_blktick;
+					thread_unlock(td);
+					if (tticks > blkticks) {
+
+						/*
+						 * Accordingly with provided
+						 * thresholds, this thread is
+						 * stuck for too long on a
+						 * turnstile.
+						 */
+						PROC_UNLOCK(p);
+						sx_sunlock(&allproc_lock);
+	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+						    __func__, td, tticks);
+					}
+				} else if (TD_IS_SLEEPING(td)) {
+
+					/*
+					 * Check if the thread is sleeping on a
+					 * lock, otherwise skip the check.
+					 * Drop the thread lock in order to
+					 * avoid a LOR with the sleepqueue
+					 * spinlock.
+					 */
+					wchan = td->td_wchan;
+					tticks = ticks - td->td_slptick;
+					thread_unlock(td);
+					slptype = sleepq_type(wchan);
+					if ((slptype == SLEEPQ_SX ||
+					    slptype == SLEEPQ_LK) &&
+					    tticks > slpticks) {
+
+						/*
+						 * Accordingly with provided
+						 * thresholds, this thread is
+						 * stuck for too long on a
+						 * sleepqueue.
+						 */
+						PROC_UNLOCK(p);
+						sx_sunlock(&allproc_lock);
+	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+						    __func__, td, tticks);
+					}
+				} else
+					thread_unlock(td);
+			}
+			PROC_UNLOCK(p);
+		}
+		sx_sunlock(&allproc_lock);
+
+		/* Sleep for sleepfreq seconds. */
+		pause("deadlkres", sleepfreq * hz);
+	}
+}
+
+static struct kthread_desc deadlkres_kd = {
+	"deadlkres",
+	deadlkres,
+	(struct thread **)NULL
+};
+
+SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
+
+SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
+    &slptime_threshold, 0,
+    "Number of seconds within is valid to sleep on a sleepqueue");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
+    &blktime_threshold, 0,
+    "Number of seconds within is valid to block on a turnstile");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
+    "Number of seconds between any deadlock resolver thread run");
+#endif	/* DEADLKRES */
+
 void
 read_cpu_time(long *cp_time)
 {

Modified: head/sys/kern/subr_sleepqueue.c
==============================================================================
--- head/sys/kern/subr_sleepqueue.c	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/kern/subr_sleepqueue.c	Sat Jan  9 01:46:38 2010	(r201879)
@@ -122,8 +122,8 @@ struct sleepqueue {
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	void	*sq_wchan;			/* (c) Wait channel. */
-#ifdef INVARIANTS
 	int	sq_type;			/* (c) Queue type. */
+#ifdef INVARIANTS
 	struct lock_object *sq_lock;		/* (c) Associated lock. */
 #endif
 };
@@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_obje
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_lock = lock;
-		sq->sq_type = flags & SLEEPQ_TYPE;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
@@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_obje
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		sq->sq_wchan = wchan;
+		sq->sq_type = flags & SLEEPQ_TYPE;
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
@@ -669,6 +669,28 @@ sleepq_timedwait_sig(void *wchan, int pr
 }
 
 /*
+ * Returns the type of sleepqueue given a waitchannel.
+ */
+int
+sleepq_type(void *wchan)
+{
+	struct sleepqueue *sq;
+	int type;
+
+	MPASS(wchan != NULL);
+
+	sleepq_lock(wchan);
+	sq = sleepq_lookup(wchan);
+	if (sq == NULL) {
+		sleepq_release(wchan);
+		return (-1);
+	}
+	type = sq->sq_type;
+	sleepq_release(wchan);
+	return (type);
+}
+
+/*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
@@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepque
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
-#ifdef INVARIANTS
 	db_printf("Queue type: %d\n", sq->sq_type);
+#ifdef INVARIANTS
 	if (sq->sq_lock) {
 		lock = sq->sq_lock;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,

Modified: head/sys/kern/subr_turnstile.c
==============================================================================
--- head/sys/kern/subr_turnstile.c	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/kern/subr_turnstile.c	Sat Jan  9 01:46:38 2010	(r201879)
@@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, str
 	td->td_tsqueue = queue;
 	td->td_blocked = ts;
 	td->td_lockname = lock->lo_name;
+	td->td_blktick = ticks;
 	TD_SET_LOCK(td);
 	mtx_unlock_spin(&tc->tc_lock);
 	propagate_priority(td);
@@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, i
 		MPASS(TD_CAN_RUN(td));
 		td->td_blocked = NULL;
 		td->td_lockname = NULL;
+		td->td_blktick = 0;
 #ifdef INVARIANTS
 		td->td_tsqueue = 0xff;
 #endif

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/sys/proc.h	Sat Jan  9 01:46:38 2010	(r201879)
@@ -218,6 +218,7 @@ struct thread {
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	u_int		td_estcpu;	/* (t) estimated cpu utilization */
 	int		td_slptick;	/* (t) Time at sleep. */
+	int		td_blktick;	/* (t) Time spent blocked. */
 	struct rusage	td_ru;		/* (t) rusage information */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */

Modified: head/sys/sys/sleepqueue.h
==============================================================================
--- head/sys/sys/sleepqueue.h	Sat Jan  9 01:20:01 2010	(r201878)
+++ head/sys/sys/sleepqueue.h	Sat Jan  9 01:46:38 2010	(r201879)
@@ -112,6 +112,7 @@ void	sleepq_set_timeout(void *wchan, int
 u_int	sleepq_sleepcnt(void *wchan, int queue);
 int	sleepq_timedwait(void *wchan, int pri);
 int	sleepq_timedwait_sig(void *wchan, int pri);
+int	sleepq_type(void *wchan);
 void	sleepq_wait(void *wchan, int pri);
 int	sleepq_wait_sig(void *wchan, int pri);
 


More information about the svn-src-all mailing list