Signals and an exiting thread
Kostik Belousov
kostikbel at gmail.com
Mon Oct 5 19:21:51 UTC 2009
On Fri, Oct 02, 2009 at 10:12:13PM +0200, Jilles Tjoelker wrote:
> On Thu, Oct 01, 2009 at 03:07:30PM +0300, Kostik Belousov wrote:
> > On Wed, Sep 30, 2009 at 11:02:19AM -0700, Justin Teller wrote:
> > > We're trying to control one process from another process through
> > > signals (I know, revolutionary ;-), and we've found that a signal
> > > occasionally gets lost. The process we're signaling is
> > > multi-threaded. It looks like the signal is lost when the kernel
> > > decides to post the signal to a thread that is in the process of dying
> > > (calling pthread_exit, etc).
>
> > > Is this expected behavior that we should just handle, or is it a race
> > > in the kernel that should be/will be/already is fixed?
>
> > > It may be that a fix is already in current, and I just haven't found
> > > it in my searches through the source code (I'm working off of source
> > > code for an older 8.0 image). If it is fixed, I'd appreciate a
> > > pointer to the code that fixes it.
>
> > When thread enters the kernel last time to be killed, it is very much
> > bad idea to allow it to return to usermode to handle directed signal.
> > And, there would always be window between entering the kernel and
> > marking the thread as exiting.
>
> > Moving the thread-directed signals back to the process queue is hard
> > and there is no way to distinguish which signals were sent to process
> > or to the thread.
>
> > Possibly, we could clear the thread signal mask while still in user mode.
> > I think it would still leave a very narrow window when a process
> > signal could be directed to the dying thread and not be delivered to
> > usermode; this happens when signal is generated while sigsetmask already
> > entered the kernel, but did not changed the mask yet. This is worked
> > around by rechecking the pending signals after setting the block mask
> > and releasing it if needed.
>
> SIGKILL cannot be masked. Is it possible that a kill(SIGKILL) is
> assigned to a dying thread and lost?
>
> (SIGSTOP cannot be masked either, but its processing is done by the
> sending thread, so it should be safe.)
>
> I suppose that race can also occur in other uses of pthread_sigmask().
> If a thread masks a signal for a while, and that signal is assigned to
> that thread just as it is executing pthread_sigmask(), it will only be
> processed when that thread unblocks or accepts it, even though other
> threads may have the signal unmasked or be in a sigwait() for it.
> Signals sent after pthread_sigmask() has changed the signal mask are
> likely processed sooner because they will be assigned to a different
> thread or left in the process queue.
>
> POSIX seems to say that signals generated for the process should remain
> queued for the process and should only be assigned to a thread at time
> of delivery.
>
> This could be implemented by leaving signals in the process queue or by
> tracking for each signal in the thread queue whether it was directed at
> the thread and moving the process signals back at sigmask/thr_exit.
> Either way I am not sure of all the consequences at this time.
I agree that postponing assignment of the thread for signal delivery
till the actual delivery occurs is the proper fix. I tried to cheat
in my previous patch. Below is an experimental change that did very
minimal testing.
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 0386fc4..abd9714 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -581,20 +581,11 @@ void
signotify(struct thread *td)
{
struct proc *p;
- sigset_t set;
p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
- /*
- * If our mask changed we may have to move signal that were
- * previously masked by all threads to our sigqueue.
- */
- set = p->p_sigqueue.sq_signals;
- SIGSETNAND(set, td->td_sigmask);
- if (! SIGISEMPTY(set))
- sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set);
if (SIGPENDING(td)) {
thread_lock(td);
td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
@@ -1985,17 +1976,9 @@ tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig);
prop = sigprop(sig);
- /*
- * If the signal is blocked and not destined for this thread, then
- * assign it to the process so that we can find it later in the first
- * thread that unblocks it. Otherwise, assign it to this thread now.
- */
if (td == NULL) {
td = sigtd(p, sig, prop);
- if (SIGISMEMBER(td->td_sigmask, sig))
- sigqueue = &p->p_sigqueue;
- else
- sigqueue = &td->td_sigqueue;
+ sigqueue = &p->p_sigqueue;
} else {
KASSERT(td->td_proc == p, ("invalid thread"));
sigqueue = &td->td_sigqueue;
@@ -2409,8 +2392,9 @@ issignal(struct thread *td, int stop_allowed)
{
struct proc *p;
struct sigacts *ps;
+ struct sigqueue *queue;
sigset_t sigpending;
- int sig, prop, newsig;
+ int sig, prop, newsig, signo;
p = td->td_proc;
ps = p->p_sigacts;
@@ -2420,6 +2404,7 @@ issignal(struct thread *td, int stop_allowed)
int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
sigpending = td->td_sigqueue.sq_signals;
+ SIGSETOR(sigpending, p->p_sigqueue.sq_signals);
SIGSETNAND(sigpending, td->td_sigmask);
if (p->p_flag & P_PPWAIT)
@@ -2440,6 +2425,7 @@ issignal(struct thread *td, int stop_allowed)
*/
if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) {
sigqueue_delete(&td->td_sigqueue, sig);
+ sigqueue_delete(&p->p_sigqueue, sig);
continue;
}
if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
@@ -2452,12 +2438,18 @@ issignal(struct thread *td, int stop_allowed)
if (sig != newsig) {
ksiginfo_t ksi;
+
+ queue = &td->td_sigqueue;
/*
* clear old signal.
* XXX shrug off debugger, it causes siginfo to
* be thrown away.
*/
- sigqueue_get(&td->td_sigqueue, sig, &ksi);
+ if (sigqueue_get(queue, sig, &ksi) == 0) {
+ queue = &p->p_sigqueue;
+ signo = sigqueue_get(queue, sig, &ksi);
+ KASSERT(signo == sig, ("signo != sig"));
+ }
/*
* If parent wants us to take the signal,
@@ -2472,7 +2464,7 @@ issignal(struct thread *td, int stop_allowed)
* Put the new signal into td_sigqueue. If the
* signal is being masked, look for other signals.
*/
- SIGADDSET(td->td_sigqueue.sq_signals, sig);
+ SIGADDSET(queue->sq_signals, sig);
if (SIGISMEMBER(td->td_sigmask, sig))
continue;
signotify(td);
@@ -2567,6 +2559,7 @@ issignal(struct thread *td, int stop_allowed)
return (sig);
}
sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */
+ sigqueue_delete(&p->p_sigqueue, sig);
}
/* NOTREACHED */
}
@@ -2613,7 +2606,9 @@ postsig(sig)
ps = p->p_sigacts;
mtx_assert(&ps->ps_mtx, MA_OWNED);
ksiginfo_init(&ksi);
- sigqueue_get(&td->td_sigqueue, sig, &ksi);
+ if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 &&
+ sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0)
+ return;
ksi.ksi_signo = sig;
if (ksi.ksi_code == SI_TIMER)
itimer_accept(p, ksi.ksi_timerid, &ksi);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 6d60ddb..ccf6479 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -90,6 +90,7 @@ userret(struct thread *td, struct trapframe *frame)
CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
td->td_name);
+#if 0
#ifdef DIAGNOSTIC
/* Check that we called signotify() enough. */
PROC_LOCK(p);
@@ -100,6 +101,7 @@ userret(struct thread *td, struct trapframe *frame)
thread_unlock(td);
PROC_UNLOCK(p);
#endif
+#endif
#ifdef KTRACE
KTRUSERRET(td);
#endif
@@ -218,7 +220,14 @@ ast(struct trapframe *framep)
ktrcsw(0, 1);
#endif
}
- if (flags & TDF_NEEDSIGCHK) {
+
+ /*
+ * Check for signals. Unlocked reads of p_pendingcnt or
+ * p_siglist might cause process-directed signal to be handled
+ * later.
+ */
+ if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
+ !SIGISEMPTY(p->p_siglist)) {
PROC_LOCK(p);
mtx_lock(&p->p_sigacts->ps_mtx);
while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0)
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 89b40f0..680da40 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -252,9 +252,10 @@ typedef struct sigqueue {
/* Return nonzero if process p has an unmasked pending signal. */
#define SIGPENDING(td) \
- (!SIGISEMPTY((td)->td_siglist) && \
- !sigsetmasked(&(td)->td_siglist, &(td)->td_sigmask))
-
+ ((!SIGISEMPTY((td)->td_siglist) && \
+ !sigsetmasked(&(td)->td_siglist, &(td)->td_sigmask)) || \
+ (!SIGISEMPTY((td)->td_proc->p_siglist) && \
+ !sigsetmasked(&(td)->td_proc->p_siglist, &(td)->td_sigmask)))
/*
* Return the value of the pseudo-expression ((*set & ~*mask) != 0). This
* is an optimized version of SIGISEMPTY() on a temporary variable
diff --git a/tools/regression/sigqueue/sigqtest1/sigqtest1.c b/tools/regression/sigqueue/sigqtest1/sigqtest1.c
index 0f40021..f0201c3 100644
--- a/tools/regression/sigqueue/sigqtest1/sigqtest1.c
+++ b/tools/regression/sigqueue/sigqtest1/sigqtest1.c
@@ -1,12 +1,14 @@
/* $FreeBSD$ */
-#include <signal.h>
-#include <stdio.h>
#include <err.h>
#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
int received;
-void handler(int sig, siginfo_t *si, void *ctx)
+void
+handler(int sig, siginfo_t *si, void *ctx)
{
if (si->si_code != SI_QUEUE)
errx(1, "si_code != SI_QUEUE");
@@ -15,7 +17,8 @@ void handler(int sig, siginfo_t *si, void *ctx)
received++;
}
-int main()
+int
+main()
{
struct sigaction sa;
union sigval val;
diff --git a/tools/regression/sigqueue/sigqtest2/sigqtest2.c b/tools/regression/sigqueue/sigqtest2/sigqtest2.c
index 078ea81..50b579d 100644
--- a/tools/regression/sigqueue/sigqtest2/sigqtest2.c
+++ b/tools/regression/sigqueue/sigqtest2/sigqtest2.c
@@ -1,24 +1,29 @@
/* $FreeBSD$ */
-#include <signal.h>
-#include <stdio.h>
-#include <err.h>
-#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
int stop_received;
int exit_received;
int cont_received;
-void job_handler(int sig, siginfo_t *si, void *ctx)
+void
+job_handler(int sig, siginfo_t *si, void *ctx)
{
int status;
int ret;
if (si->si_code == CLD_STOPPED) {
+ printf("%d: stop received\n", si->si_pid);
stop_received = 1;
kill(si->si_pid, SIGCONT);
} else if (si->si_code == CLD_EXITED) {
+ printf("%d: exit received\n", si->si_pid);
ret = waitpid(si->si_pid, &status, 0);
if (ret == -1)
errx(1, "waitpid");
@@ -26,11 +31,13 @@ void job_handler(int sig, siginfo_t *si, void *ctx)
errx(1, "!WIFEXITED(status)");
exit_received = 1;
} else if (si->si_code == CLD_CONTINUED) {
+ printf("%d: cont received\n", si->si_pid);
cont_received = 1;
}
}
-void job_control_test()
+void
+job_control_test(void)
{
struct sigaction sa;
pid_t pid;
@@ -43,9 +50,12 @@ void job_control_test()
stop_received = 0;
cont_received = 0;
exit_received = 0;
+ fflush(stdout);
pid = fork();
if (pid == 0) {
+ printf("child %d\n", getpid());
kill(getpid(), SIGSTOP);
+ sleep(2);
exit(1);
}
@@ -60,11 +70,13 @@ void job_control_test()
printf("job control test OK.\n");
}
-void rtsig_handler(int sig, siginfo_t *si, void *ctx)
+void
+rtsig_handler(int sig, siginfo_t *si, void *ctx)
{
}
-int main()
+int
+main()
{
struct sigaction sa;
sigset_t set;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-current/attachments/20091005/e9aeadff/attachment.pgp
More information about the freebsd-current
mailing list