suspending threads before devices
Konstantin Belousov
kostikbel at gmail.com
Thu Nov 20 14:28:18 UTC 2014
On Wed, Nov 19, 2014 at 02:08:20PM -0500, John Baldwin wrote:
> On Tuesday, November 18, 2014 08:43:09 PM Warner Losh wrote:
> > On Nov 18, 2014, at 3:21 PM, John Baldwin <jhb at FreeBSD.org> wrote:
> > > I would certainly like a way to quiesce threads before entering the real
> > > suspend path. I would also like to cleanly unmount filesystems during
> > > suspend as well and the thread issue is a prerequisite for that.
> > > However, reusing "stop at boundary" may not be quite correct because you
> > > probably don't want to block suspend because you have an NFS request that
> > > is retrying due to a down NFS server. For NFS I think you want any
> > > threads asleep to just not get a chance to run again until after resume
> > > completes.
> >
> > I???m almost certain you don???t want to ???unmount??? the filesystems. This would
> > invalidate all open file handles and would be mondo-bado, and would only
> > succeed if you forced this issue due to all the open references. Perhaps
> > you???re being imprecise.
>
> Yes, there should have been quotes around unmount. I want a
> VFS_SUSPEND/VFS_RESUME that for local filesystems (e.g. UFS) does the on-disk
> equivalent of unmount. (Flush dirty buffers and mark filesystem as clean.)
> You really want this for S4 and even for S3 so you don't have to fsck if
> resume fails. BTW, I think for network or userland filesystems you just punt
> on this (i.e. VFS_SUSPEND is a no-op or best-effort at most)
I think I will use MNT_LOCAL for start. Filesystems would come out clean,
but still marked mounted. We cannot avoid fsck, e.g. due to unlinked opened
files. I think it is fine to guarantee that the volume is in best-possible
persistent state, i.e. no filesystem structure damage could happen if resume
failed, but fsck might be still needed. VFS_SYNC() would do this.
Below is the prototyped patch for global process suspension. There is
debugging sysctl debug.total_stop, which demonstrates the KPI, also I
used it for light testing. It successfully survived suspend/resume of
usermode threads in multiuser with mt processes running.
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 7ae7d4e..19c33b6 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -289,7 +289,7 @@ kern_execve(td, args, mac_p)
args->endp - args->begin_envv);
if (p->p_flag & P_HADTHREADS) {
PROC_LOCK(p);
- if (thread_single(SINGLE_BOUNDARY)) {
+ if (thread_single(p, SINGLE_BOUNDARY)) {
PROC_UNLOCK(p);
exec_free_args(args);
return (ERESTART); /* Try again later. */
@@ -308,9 +308,9 @@ kern_execve(td, args, mac_p)
* force other threads to suicide.
*/
if (error == 0)
- thread_single(SINGLE_EXIT);
+ thread_single(p, SINGLE_EXIT);
else
- thread_single_end();
+ thread_single_end(p, SINGLE_BOUNDARY);
PROC_UNLOCK(p);
}
if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 1e4c095..b58e830 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -206,7 +206,7 @@ exit1(struct thread *td, int rv)
* re-check all suspension request, the thread should
* either be suspended there or exit.
*/
- if (!thread_single(SINGLE_EXIT))
+ if (!thread_single(p, SINGLE_EXIT))
break;
/*
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 62f43ba..80d7f82 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -317,7 +317,7 @@ fork_norfproc(struct thread *td, int flags)
if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
(flags & (RFCFDG | RFFDG))) {
PROC_LOCK(p1);
- if (thread_single(SINGLE_BOUNDARY)) {
+ if (thread_single(p1, SINGLE_BOUNDARY)) {
PROC_UNLOCK(p1);
return (ERESTART);
}
@@ -348,7 +348,7 @@ fail:
if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
(flags & (RFCFDG | RFFDG))) {
PROC_LOCK(p1);
- thread_single_end();
+ thread_single_end(p1, SINGLE_BOUNDARY);
PROC_UNLOCK(p1);
}
return (error);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 495139f..9f28ae5 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2893,3 +2893,114 @@ static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
"Process signal trampoline location");
+
+void
+proc_stop_total(void)
+{
+ struct proc *cp, *p;
+ int r;
+ bool restart, met_stopped, did_stop;
+
+ cp = curproc;
+allproc_loop:
+ sx_xlock(&allproc_lock);
+ met_stopped = did_stop = restart = false;
+ LIST_REMOVE(cp, p_list);
+ LIST_INSERT_HEAD(&allproc, cp, p_list);
+ for (;;) {
+ p = LIST_NEXT(cp, p_list);
+ if (p == NULL)
+ break;
+ LIST_REMOVE(cp, p_list);
+ LIST_INSERT_AFTER(p, cp, p_list);
+ PROC_LOCK(p);
+ if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
+ P_TOTAL_STOP)) != 0) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ if (P_SHOULDSTOP(p)) {
+ /*
+ * Stopped processes are only tolerated when
+ * there is no other processes which might
+ * continue them.
+ */
+ met_stopped = true;
+ PROC_UNLOCK(p);
+ continue;
+ }
+ _PHOLD(p);
+ sx_xunlock(&allproc_lock);
+ r = thread_single(p, SINGLE_TOTAL);
+ if (r != 0)
+ restart = true;
+ else
+ did_stop = true;
+ _PRELE(p);
+ PROC_UNLOCK(p);
+ sx_xlock(&allproc_lock);
+ }
+ sx_xunlock(&allproc_lock);
+ if (restart || (met_stopped && did_stop)) {
+ kern_yield(PRI_USER);
+ goto allproc_loop;
+ }
+}
+
+void
+proc_resume_total(void)
+{
+ struct proc *cp, *p;
+
+ cp = curproc;
+ sx_xlock(&allproc_lock);
+ LIST_REMOVE(cp, p_list);
+ LIST_INSERT_HEAD(&allproc, cp, p_list);
+ for (;;) {
+ p = LIST_NEXT(cp, p_list);
+ if (p == NULL)
+ break;
+ LIST_REMOVE(cp, p_list);
+ LIST_INSERT_AFTER(p, cp, p_list);
+ PROC_LOCK(p);
+ if ((p->p_flag & P_TOTAL_STOP) != 0) {
+ sx_xunlock(&allproc_lock);
+ _PHOLD(p);
+ thread_single_end(p, SINGLE_TOTAL);
+ _PRELE(p);
+ PROC_UNLOCK(p);
+ sx_xlock(&allproc_lock);
+ } else {
+ PROC_UNLOCK(p);
+ }
+ }
+ sx_xunlock(&allproc_lock);
+}
+
+#define TOTAL_STOP_DEBUG 1
+#ifdef TOTAL_STOP_DEBUG
+volatile static int ts_resume;
+
+static int
+sysctl_debug_total_stop(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = 0;
+ ts_resume = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (val != 0) {
+ proc_stop_total();
+ while (ts_resume == 0)
+ ;
+ proc_resume_total();
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, total_stop, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, (void *)&ts_resume, 0, sysctl_debug_total_stop, "I",
+ "");
+#endif
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5cdc2ce..eb00129 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2919,7 +2919,7 @@ sigexit(td, sig)
* XXX If another thread attempts to single-thread before us
* (e.g. via fork()), we won't get a dump at all.
*/
- if ((sigprop(sig) & SA_CORE) && (thread_single(SINGLE_NO_EXIT) == 0)) {
+ if ((sigprop(sig) & SA_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) {
p->p_sig = sig;
/*
* Log signals which would cause core dumps
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index ec084ed..92643d5 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE(proc, , , lwp__exit);
+static void thread_suspend_switch_ext(struct thread *td, struct proc *p);
/*
* thread related storage.
@@ -446,7 +447,7 @@ thread_exit(void)
if (p->p_numthreads == p->p_suspcount) {
thread_lock(p->p_singlethread);
wakeup_swapper = thread_unsuspend_one(
- p->p_singlethread);
+ p->p_singlethread, p);
thread_unlock(p->p_singlethread);
if (wakeup_swapper)
kick_proc0();
@@ -575,13 +576,20 @@ calc_remaining(struct proc *p, int mode)
remaining = p->p_numthreads;
else if (mode == SINGLE_BOUNDARY)
remaining = p->p_numthreads - p->p_boundary_count;
- else if (mode == SINGLE_NO_EXIT)
+ else if (mode == SINGLE_NO_EXIT || mode == SINGLE_TOTAL)
remaining = p->p_numthreads - p->p_suspcount;
else
panic("calc_remaining: wrong mode %d", mode);
return (remaining);
}
+static int
+remain_for_mode(int mode)
+{
+
+ return (mode == SINGLE_TOTAL ? 0 : 1);
+}
+
/*
* Enforce single-threading.
*
@@ -596,19 +604,20 @@ calc_remaining(struct proc *p, int mode)
* any sleeping threads that are interruptable. (PCATCH).
*/
int
-thread_single(int mode)
+thread_single(struct proc *p, int mode)
{
struct thread *td;
struct thread *td2;
- struct proc *p;
int remaining, wakeup_swapper;
td = curthread;
- p = td->td_proc;
+ KASSERT((mode == SINGLE_TOTAL && td->td_proc != p) ||
+ (mode != SINGLE_TOTAL && td->td_proc == p),
+ ("mode %d proc %p curproc %p", mode, p, td->td_proc));
mtx_assert(&Giant, MA_NOTOWNED);
PROC_LOCK_ASSERT(p, MA_OWNED);
- if ((p->p_flag & P_HADTHREADS) == 0)
+ if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_TOTAL)
return (0);
/* Is someone already single threading? */
@@ -625,11 +634,13 @@ thread_single(int mode)
else
p->p_flag &= ~P_SINGLE_BOUNDARY;
}
+ if (mode == SINGLE_TOTAL)
+ p->p_flag |= P_TOTAL_STOP;
p->p_flag |= P_STOPPED_SINGLE;
PROC_SLOCK(p);
p->p_singlethread = td;
remaining = calc_remaining(p, mode);
- while (remaining != 1) {
+ while (remaining != remain_for_mode(mode)) {
if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
goto stopme;
wakeup_swapper = 0;
@@ -643,7 +654,8 @@ thread_single(int mode)
case SINGLE_EXIT:
if (TD_IS_SUSPENDED(td2))
wakeup_swapper |=
- thread_unsuspend_one(td2);
+ thread_unsuspend_one(td2,
+ p);
if (TD_ON_SLEEPQ(td2) &&
(td2->td_flags & TDF_SINTR))
wakeup_swapper |=
@@ -653,17 +665,20 @@ thread_single(int mode)
if (TD_IS_SUSPENDED(td2) &&
!(td2->td_flags & TDF_BOUNDARY))
wakeup_swapper |=
- thread_unsuspend_one(td2);
+ thread_unsuspend_one(td2,
+ p);
if (TD_ON_SLEEPQ(td2) &&
(td2->td_flags & TDF_SINTR))
wakeup_swapper |=
sleepq_abort(td2, ERESTART);
break;
+ case SINGLE_TOTAL:
case SINGLE_NO_EXIT:
if (TD_IS_SUSPENDED(td2) &&
!(td2->td_flags & TDF_BOUNDARY))
wakeup_swapper |=
- thread_unsuspend_one(td2);
+ thread_unsuspend_one(td2,
+ p);
if (TD_ON_SLEEPQ(td2) &&
(td2->td_flags & TDF_SINTR))
wakeup_swapper |=
@@ -687,7 +702,7 @@ thread_single(int mode)
/*
* Maybe we suspended some threads.. was it enough?
*/
- if (remaining == 1)
+ if (remaining == remain_for_mode(mode))
break;
stopme:
@@ -695,7 +710,10 @@ stopme:
* Wake us up when everyone else has suspended.
* In the mean time we suspend as well.
*/
- thread_suspend_switch(td);
+ if (mode == SINGLE_TOTAL)
+ thread_suspend_switch_ext(td, p);
+ else
+ thread_suspend_switch(td);
remaining = calc_remaining(p, mode);
}
if (mode == SINGLE_EXIT) {
@@ -821,7 +839,7 @@ thread_suspend_check(int return_instead)
if (p->p_numthreads == p->p_suspcount + 1) {
thread_lock(p->p_singlethread);
wakeup_swapper =
- thread_unsuspend_one(p->p_singlethread);
+ thread_unsuspend_one(p->p_singlethread, p);
thread_unlock(p->p_singlethread);
if (wakeup_swapper)
kick_proc0();
@@ -882,6 +900,27 @@ thread_suspend_switch(struct thread *td)
PROC_SLOCK(p);
}
+static void
+thread_suspend_switch_ext(struct thread *td, struct proc *p)
+{
+
+ KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ PROC_SLOCK_ASSERT(p, MA_OWNED);
+ PROC_UNLOCK(p);
+ thread_lock(td);
+ td->td_flags &= ~TDF_NEEDSUSPCHK;
+ TD_SET_SUSPENDED(td);
+ sched_sleep(td, 0);
+ PROC_SUNLOCK(p);
+ DROP_GIANT();
+ mi_switch(SW_VOL | SWT_SUSPEND, NULL);
+ thread_unlock(td);
+ PICKUP_GIANT();
+ PROC_LOCK(p);
+ PROC_SLOCK(p);
+}
+
void
thread_suspend_one(struct thread *td)
{
@@ -897,15 +936,16 @@ thread_suspend_one(struct thread *td)
}
int
-thread_unsuspend_one(struct thread *td)
+thread_unsuspend_one(struct thread *td, struct proc *p)
{
- struct proc *p = td->td_proc;
- PROC_SLOCK_ASSERT(p, MA_OWNED);
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
TD_CLR_SUSPENDED(td);
- p->p_suspcount--;
+ if (td->td_proc == p) {
+ PROC_SLOCK_ASSERT(p, MA_OWNED);
+ p->p_suspcount--;
+ }
return (setrunnable(td));
}
@@ -925,7 +965,7 @@ thread_unsuspend(struct proc *p)
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
if (TD_IS_SUSPENDED(td)) {
- wakeup_swapper |= thread_unsuspend_one(td);
+ wakeup_swapper |= thread_unsuspend_one(td, p);
}
thread_unlock(td);
}
@@ -936,9 +976,12 @@ thread_unsuspend(struct proc *p)
* threading request. Now we've downgraded to single-threaded,
* let it continue.
*/
- thread_lock(p->p_singlethread);
- wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
- thread_unlock(p->p_singlethread);
+ if (p->p_singlethread->td_proc == p) {
+ thread_lock(p->p_singlethread);
+ wakeup_swapper = thread_unsuspend_one(
+ p->p_singlethread, p);
+ thread_unlock(p->p_singlethread);
+ }
}
if (wakeup_swapper)
kick_proc0();
@@ -948,16 +991,14 @@ thread_unsuspend(struct proc *p)
* End the single threading mode..
*/
void
-thread_single_end(void)
+thread_single_end(struct proc *p, int mode)
{
struct thread *td;
- struct proc *p;
int wakeup_swapper;
- td = curthread;
- p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
- p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
+ p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
+ P_TOTAL_STOP);
PROC_SLOCK(p);
p->p_singlethread = NULL;
wakeup_swapper = 0;
@@ -967,11 +1008,11 @@ thread_single_end(void)
* on the process. The single threader must be allowed
* to continue however as this is a bad place to stop.
*/
- if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
+ if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
if (TD_IS_SUSPENDED(td)) {
- wakeup_swapper |= thread_unsuspend_one(td);
+ wakeup_swapper |= thread_unsuspend_one(td, p);
}
thread_unlock(td);
}
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fac0915..161223b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -635,7 +635,7 @@ struct proc {
#define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
#define P_HWPMC 0x800000 /* Process is using HWPMCs */
#define P_JAILED 0x1000000 /* Process is in jail. */
-#define P_UNUSED1 0x2000000
+#define P_TOTAL_STOP 0x2000000 /* Stopped in proc_stop_total. */
#define P_INEXEC 0x4000000 /* Process is in execve(). */
#define P_STATCHILD 0x8000000 /* Child process stopped or exited. */
#define P_INMEM 0x10000000 /* Loaded into memory. */
@@ -696,6 +696,7 @@ struct proc {
#define SINGLE_NO_EXIT 0
#define SINGLE_EXIT 1
#define SINGLE_BOUNDARY 2
+#define SINGLE_TOTAL 3
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_PARGS);
@@ -899,6 +900,8 @@ struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
struct pstats *pstats_alloc(void);
+void proc_stop_total(void);
+void proc_resume_total(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
int securelevel_ge(struct ucred *cr, int level);
@@ -945,8 +948,8 @@ void thread_exit(void) __dead2;
void thread_free(struct thread *td);
void thread_link(struct thread *td, struct proc *p);
void thread_reap(void);
-int thread_single(int how);
-void thread_single_end(void);
+int thread_single(struct proc *p, int how);
+void thread_single_end(struct proc *p, int how);
void thread_stash(struct thread *td);
void thread_stopped(struct proc *p);
void childproc_stopped(struct proc *child, int reason);
@@ -957,7 +960,7 @@ void thread_suspend_switch(struct thread *);
void thread_suspend_one(struct thread *td);
void thread_unlink(struct thread *td);
void thread_unsuspend(struct proc *p);
-int thread_unsuspend_one(struct thread *td);
+int thread_unsuspend_one(struct thread *td, struct proc *p);
void thread_wait(struct proc *p);
struct thread *thread_find(struct proc *p, lwpid_t tid);
More information about the freebsd-arch
mailing list