deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable
locks)
Kostik Belousov
kostikbel at gmail.com
Sat Jun 20 16:15:50 UTC 2009
On Fri, Jun 19, 2009 at 06:23:28PM +0200, Jilles Tjoelker wrote:
> I have been having trouble with deadlocks with NFS mounts for a while,
> and I have found at least one way it can deadlock. It seems an issue
> with the sleep/lock system.
>
> NFS sleeps while holding a lockmgr lock, waiting for a reply from the
> server. When the mount is set intr, this is an interruptible sleep, so
> that interrupting signals can abort the sleep. However, this also means
> that SIGSTOP etc will suspend the thread without waking it up first, so
> it will be suspended with a lock held.
>
> If it holds the wrong locks, it is possible that the shell will not be
> able to run, and the process cannot be continued in the normal manner.
>
> Due to some other things I do not understand, it is then possible that
> the process cannot be continued at all (SIGCONT seems ignored), but in
> simple cases SIGCONT works, and things go back to normal.
>
> In any case, this situation is undesirable, as even 'umount -f' doesn't
> work while the thread is suspended.
>
> Of course, this reasoning applies to any code that goes to sleep
> interruptibly while holding a lock (sx or lockmgr). Is this supposed to
> be possible (likely useful)? If so, a third type of sleep would be
> needed that is interrupted by signals but not suspended? If not,
> something should check that it doesn't happen and NFS intr mounts may
> need to check for signals periodically or find a way to avoid sleeping
> with locks held.
>
> The td_locks field is only accessible for the current thread, so it
> cannot be used to check if suspending is safe.
>
> Also, making SIGSTOP and the like interrupt/restart syscalls is not
> acceptable unless you find some way to do it such that userland won't
> notice. For example, a read of 10 megabytes from a regular file with
> that much available must not return less then 10 megabytes.
Note that NFS does check for the signals during i/o, so you may get
short reads anyway.
I do think that the right solution both there and with SINGLE_NO_EXIT
case for thread_single is to stop at the usermode boundary instead of
suspending a thread in the interruptible sleep state.
I set error code returned from interrupted msleep() to ERESTART,
that seems to be the right thing, at least to restart the i/o that
transferred no data upon receiving SIGSTOP.
My current patch is below. It contains some not strictly related
changes, e.g. for wakeup().
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5c1d553..28f4f4f 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2310,18 +2310,22 @@ static void
sig_suspend_threads(struct thread *td, struct proc *p, int sending)
{
struct thread *td2;
+ int wakeup_swapper;
PROC_LOCK_ASSERT(p, MA_OWNED);
PROC_SLOCK_ASSERT(p, MA_OWNED);
+ wakeup_swapper = 0;
FOREACH_THREAD_IN_PROC(p, td2) {
thread_lock(td2);
td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
- (td2->td_flags & TDF_SINTR) &&
- !TD_IS_SUSPENDED(td2)) {
- thread_suspend_one(td2);
- } else {
+ (td2->td_flags & TDF_SINTR)) {
+ if (TD_IS_SUSPENDED(td2))
+ wakeup_swapper |= thread_unsuspend_one(td2);
+ if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
+ wakeup_swapper |= sleepq_abort(td2, ERESTART);
+ } else if (!TD_IS_SUSPENDED(td2)) {
if (sending || td != td2)
td2->td_flags |= TDF_ASTPENDING;
#ifdef SMP
@@ -2331,6 +2335,8 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)
}
thread_unlock(td2);
}
+ if (wakeup_swapper)
+ kick_proc0();
}
int
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b91c1a5..d27d027 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -344,11 +344,16 @@ wakeup(void *ident)
{
int wakeup_swapper;
+ repeat:
sleepq_lock(ident);
wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
- if (wakeup_swapper)
- kick_proc0();
+ if (wakeup_swapper) {
+ if (ident == &proc0)
+ goto repeat;
+ else
+ kick_proc0();
+ }
}
/*
@@ -361,11 +366,16 @@ wakeup_one(void *ident)
{
int wakeup_swapper;
+ repeat:
sleepq_lock(ident);
wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
- if (wakeup_swapper)
- kick_proc0();
+ if (wakeup_swapper) {
+ if (ident == &proc0)
+ goto repeat;
+ else
+ kick_proc0();
+ }
}
static void
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index bb8779b..800a1d1 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -504,6 +504,22 @@ thread_unlink(struct thread *td)
/* Must NOT clear links to proc! */
}
+static int
+recalc_remaining(struct proc *p, int mode)
+{
+ int remaining;
+
+ if (mode == SINGLE_EXIT)
+ remaining = p->p_numthreads;
+ else if (mode == SINGLE_BOUNDARY)
+ remaining = p->p_numthreads - p->p_boundary_count;
+ else if (mode == SINGLE_NO_EXIT)
+ remaining = p->p_numthreads - p->p_suspcount;
+ else
+ panic("recalc_remaining: wrong mode %d", mode);
+ return (remaining);
+}
+
/*
* Enforce single-threading.
*
@@ -551,12 +567,7 @@ thread_single(int mode)
p->p_flag |= P_STOPPED_SINGLE;
PROC_SLOCK(p);
p->p_singlethread = td;
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
while (remaining != 1) {
if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
goto stopme;
@@ -587,18 +598,17 @@ thread_single(int mode)
wakeup_swapper |=
sleepq_abort(td2, ERESTART);
break;
+ case SINGLE_NO_EXIT:
+ if (TD_IS_SUSPENDED(td2) &&
+ !(td2->td_flags & TDF_BOUNDARY))
+ wakeup_swapper |=
+ thread_unsuspend_one(td2);
+ if (TD_ON_SLEEPQ(td2) &&
+ (td2->td_flags & TDF_SINTR))
+ wakeup_swapper |=
+ sleepq_abort(td2, ERESTART);
+ break;
default:
- if (TD_IS_SUSPENDED(td2)) {
- thread_unlock(td2);
- continue;
- }
- /*
- * maybe other inhibited states too?
- */
- if ((td2->td_flags & TDF_SINTR) &&
- (td2->td_inhibitors &
- (TDI_SLEEPING | TDI_SWAPPED)))
- thread_suspend_one(td2);
break;
}
}
@@ -611,12 +621,7 @@ thread_single(int mode)
}
if (wakeup_swapper)
kick_proc0();
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
/*
* Maybe we suspended some threads.. was it enough?
@@ -630,12 +635,7 @@ stopme:
* In the mean time we suspend as well.
*/
thread_suspend_switch(td);
- if (mode == SINGLE_EXIT)
- remaining = p->p_numthreads;
- else if (mode == SINGLE_BOUNDARY)
- remaining = p->p_numthreads - p->p_boundary_count;
- else
- remaining = p->p_numthreads - p->p_suspcount;
+ remaining = recalc_remaining(p, mode);
}
if (mode == SINGLE_EXIT) {
/*
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-arch/attachments/20090620/345bdaba/attachment.pgp
More information about the freebsd-arch
mailing list