suspending threads before devices

Konstantin Belousov kostikbel at gmail.com
Thu Nov 20 14:28:18 UTC 2014


On Wed, Nov 19, 2014 at 02:08:20PM -0500, John Baldwin wrote:
> On Tuesday, November 18, 2014 08:43:09 PM Warner Losh wrote:
> > On Nov 18, 2014, at 3:21 PM, John Baldwin <jhb at FreeBSD.org> wrote:
> > > I would certainly like a way to quiesce threads before entering the real
> > > suspend path.  I would also like to cleanly unmount filesystems during
> > > suspend as well and the thread issue is a prerequisite for that. 
> > > However, reusing "stop at boundary" may not be quite correct because you
> > > probably don't want to block suspend because you have an NFS request that
> > > is retrying due to a down NFS server.  For NFS I think you want any
> > > threads asleep to just not get a chance to run again until after resume
> > > completes.
> > 
> > I???m almost certain you don???t want to ???unmount??? the filesystems. This would
> > invalidate all open file handles and would be mondo-bado, and would only
> > succeed if you forced this issue due to all the open references. Perhaps
> > you???re being imprecise.
> 
> Yes, there should have been quotes around unmount.  I want a 
> VFS_SUSPEND/VFS_RESUME that for local filesystems (e.g. UFS) does the on-disk 
> equivalent of unmount.  (Flush dirty buffers and mark filesystem as clean.)  
> You really want this for S4 and even for S3 so you don't have to fsck if 
> resume fails.  BTW, I think for network or userland filesystems you just punt 
> on this (i.e. VFS_SUSPEND is a no-op or best-effort at most)

I think I will use MNT_LOCAL for start.  Filesystems would come out clean,
but still marked mounted.  We cannot avoid fsck, e.g. due to unlinked opened
files.  I think it is fine to guarantee that the volume is in best-possible
persistent state, i.e. no filesystem structure damage could happen if resume
failed, but fsck might be still needed.  VFS_SYNC() would do this.

Below is the prototyped patch for global process suspension. There is
debugging sysctl debug.total_stop, which demonstrates the KPI, also I
used it for light testing. It successfully survived suspend/resume of
usermode threads in multiuser with mt processes running.

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 7ae7d4e..19c33b6 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -289,7 +289,7 @@ kern_execve(td, args, mac_p)
 	    args->endp - args->begin_envv);
 	if (p->p_flag & P_HADTHREADS) {
 		PROC_LOCK(p);
-		if (thread_single(SINGLE_BOUNDARY)) {
+		if (thread_single(p, SINGLE_BOUNDARY)) {
 			PROC_UNLOCK(p);
 	       		exec_free_args(args);
 			return (ERESTART);	/* Try again later. */
@@ -308,9 +308,9 @@ kern_execve(td, args, mac_p)
 		 * force other threads to suicide.
 		 */
 		if (error == 0)
-			thread_single(SINGLE_EXIT);
+			thread_single(p, SINGLE_EXIT);
 		else
-			thread_single_end();
+			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 1e4c095..b58e830 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -206,7 +206,7 @@ exit1(struct thread *td, int rv)
 		 * re-check all suspension request, the thread should
 		 * either be suspended there or exit.
 		 */
-		if (!thread_single(SINGLE_EXIT))
+		if (!thread_single(p, SINGLE_EXIT))
 			break;
 
 		/*
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 62f43ba..80d7f82 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -317,7 +317,7 @@ fork_norfproc(struct thread *td, int flags)
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
-		if (thread_single(SINGLE_BOUNDARY)) {
+		if (thread_single(p1, SINGLE_BOUNDARY)) {
 			PROC_UNLOCK(p1);
 			return (ERESTART);
 		}
@@ -348,7 +348,7 @@ fail:
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
-		thread_single_end();
+		thread_single_end(p1, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p1);
 	}
 	return (error);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 495139f..9f28ae5 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2893,3 +2893,114 @@ static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
+
+void
+proc_stop_total(void)
+{
+	struct proc *cp, *p;
+	int r;
+	bool restart, met_stopped, did_stop;
+
+	cp = curproc;
+allproc_loop:
+	sx_xlock(&allproc_lock);
+	met_stopped = did_stop = restart = false;
+	LIST_REMOVE(cp, p_list);
+	LIST_INSERT_HEAD(&allproc, cp, p_list);
+	for (;;) {
+		p = LIST_NEXT(cp, p_list);
+		if (p == NULL)
+			break;
+		LIST_REMOVE(cp, p_list);
+		LIST_INSERT_AFTER(p, cp, p_list);
+		PROC_LOCK(p);
+		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
+		    P_TOTAL_STOP)) != 0) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		if (P_SHOULDSTOP(p)) {
+			/*
+			 * Stopped processes are only tolerated when
+			 * there is no other processes which might
+			 * continue them.
+			 */
+			met_stopped = true;
+			PROC_UNLOCK(p);
+			continue;
+		}
+		_PHOLD(p);
+		sx_xunlock(&allproc_lock);
+		r = thread_single(p, SINGLE_TOTAL);
+		if (r != 0)
+			restart = true;
+		else
+			did_stop = true;
+		_PRELE(p);
+		PROC_UNLOCK(p);
+		sx_xlock(&allproc_lock);
+	}
+	sx_xunlock(&allproc_lock);
+	if (restart || (met_stopped && did_stop)) {
+		kern_yield(PRI_USER);
+		goto allproc_loop;
+	}
+}
+
+void
+proc_resume_total(void)
+{
+	struct proc *cp, *p;
+
+	cp = curproc;
+	sx_xlock(&allproc_lock);
+	LIST_REMOVE(cp, p_list);
+	LIST_INSERT_HEAD(&allproc, cp, p_list);
+	for (;;) {
+		p = LIST_NEXT(cp, p_list);
+		if (p == NULL)
+			break;
+		LIST_REMOVE(cp, p_list);
+		LIST_INSERT_AFTER(p, cp, p_list);
+		PROC_LOCK(p);
+		if ((p->p_flag & P_TOTAL_STOP) != 0) {
+			sx_xunlock(&allproc_lock);
+			_PHOLD(p);
+			thread_single_end(p, SINGLE_TOTAL);
+			_PRELE(p);
+			PROC_UNLOCK(p);
+			sx_xlock(&allproc_lock);
+		} else {
+			PROC_UNLOCK(p);
+		}
+	}
+	sx_xunlock(&allproc_lock);
+}
+
+#define	TOTAL_STOP_DEBUG	1
+#ifdef TOTAL_STOP_DEBUG
+volatile static int ts_resume;
+
+static int
+sysctl_debug_total_stop(SYSCTL_HANDLER_ARGS)
+{
+	int error, val;
+
+	val = 0;
+	ts_resume = 0;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (val != 0) {
+		proc_stop_total();
+		while (ts_resume == 0)
+			;
+		proc_resume_total();
+	}
+	return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, total_stop, CTLTYPE_INT | CTLFLAG_RW |
+    CTLFLAG_MPSAFE, (void *)&ts_resume, 0, sysctl_debug_total_stop, "I",
+    "");
+#endif
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5cdc2ce..eb00129 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2919,7 +2919,7 @@ sigexit(td, sig)
 	 * XXX If another thread attempts to single-thread before us
 	 *     (e.g. via fork()), we won't get a dump at all.
 	 */
-	if ((sigprop(sig) & SA_CORE) && (thread_single(SINGLE_NO_EXIT) == 0)) {
+	if ((sigprop(sig) & SA_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) {
 		p->p_sig = sig;
 		/*
 		 * Log signals which would cause core dumps
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index ec084ed..92643d5 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE(proc, , , lwp__exit);
 
+static void thread_suspend_switch_ext(struct thread *td, struct proc *p);
 
 /*
  * thread related storage.
@@ -446,7 +447,7 @@ thread_exit(void)
 				if (p->p_numthreads == p->p_suspcount) {
 					thread_lock(p->p_singlethread);
 					wakeup_swapper = thread_unsuspend_one(
-						p->p_singlethread);
+						p->p_singlethread, p);
 					thread_unlock(p->p_singlethread);
 					if (wakeup_swapper)
 						kick_proc0();
@@ -575,13 +576,20 @@ calc_remaining(struct proc *p, int mode)
 		remaining = p->p_numthreads;
 	else if (mode == SINGLE_BOUNDARY)
 		remaining = p->p_numthreads - p->p_boundary_count;
-	else if (mode == SINGLE_NO_EXIT)
+	else if (mode == SINGLE_NO_EXIT || mode == SINGLE_TOTAL)
 		remaining = p->p_numthreads - p->p_suspcount;
 	else
 		panic("calc_remaining: wrong mode %d", mode);
 	return (remaining);
 }
 
+static int
+remain_for_mode(int mode)
+{
+
+	return (mode == SINGLE_TOTAL ? 0 : 1);
+}
+
 /*
  * Enforce single-threading.
  *
@@ -596,19 +604,20 @@ calc_remaining(struct proc *p, int mode)
  * any sleeping threads that are interruptable. (PCATCH).
  */
 int
-thread_single(int mode)
+thread_single(struct proc *p, int mode)
 {
 	struct thread *td;
 	struct thread *td2;
-	struct proc *p;
 	int remaining, wakeup_swapper;
 
 	td = curthread;
-	p = td->td_proc;
+	KASSERT((mode == SINGLE_TOTAL && td->td_proc != p) ||
+	    (mode != SINGLE_TOTAL && td->td_proc == p),
+	    ("mode %d proc %p curproc %p", mode, p, td->td_proc));
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	if ((p->p_flag & P_HADTHREADS) == 0)
+	if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_TOTAL)
 		return (0);
 
 	/* Is someone already single threading? */
@@ -625,11 +634,13 @@ thread_single(int mode)
 		else
 			p->p_flag &= ~P_SINGLE_BOUNDARY;
 	}
+	if (mode == SINGLE_TOTAL)
+		p->p_flag |= P_TOTAL_STOP;
 	p->p_flag |= P_STOPPED_SINGLE;
 	PROC_SLOCK(p);
 	p->p_singlethread = td;
 	remaining = calc_remaining(p, mode);
-	while (remaining != 1) {
+	while (remaining != remain_for_mode(mode)) {
 		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
 			goto stopme;
 		wakeup_swapper = 0;
@@ -643,7 +654,8 @@ thread_single(int mode)
 				case SINGLE_EXIT:
 					if (TD_IS_SUSPENDED(td2))
 						wakeup_swapper |=
-						    thread_unsuspend_one(td2);
+						    thread_unsuspend_one(td2,
+						    p);
 					if (TD_ON_SLEEPQ(td2) &&
 					    (td2->td_flags & TDF_SINTR))
 						wakeup_swapper |=
@@ -653,17 +665,20 @@ thread_single(int mode)
 					if (TD_IS_SUSPENDED(td2) &&
 					    !(td2->td_flags & TDF_BOUNDARY))
 						wakeup_swapper |=
-						    thread_unsuspend_one(td2);
+						    thread_unsuspend_one(td2,
+						    p);
 					if (TD_ON_SLEEPQ(td2) &&
 					    (td2->td_flags & TDF_SINTR))
 						wakeup_swapper |=
 						    sleepq_abort(td2, ERESTART);
 					break;
+				case SINGLE_TOTAL:
 				case SINGLE_NO_EXIT:
 					if (TD_IS_SUSPENDED(td2) &&
 					    !(td2->td_flags & TDF_BOUNDARY))
 						wakeup_swapper |=
-						    thread_unsuspend_one(td2);
+						    thread_unsuspend_one(td2,
+						    p);
 					if (TD_ON_SLEEPQ(td2) &&
 					    (td2->td_flags & TDF_SINTR))
 						wakeup_swapper |=
@@ -687,7 +702,7 @@ thread_single(int mode)
 		/*
 		 * Maybe we suspended some threads.. was it enough?
 		 */
-		if (remaining == 1)
+		if (remaining == remain_for_mode(mode))
 			break;
 
 stopme:
@@ -695,7 +710,10 @@ stopme:
 		 * Wake us up when everyone else has suspended.
 		 * In the mean time we suspend as well.
 		 */
-		thread_suspend_switch(td);
+		if (mode == SINGLE_TOTAL)
+			thread_suspend_switch_ext(td, p);
+		else
+			thread_suspend_switch(td);
 		remaining = calc_remaining(p, mode);
 	}
 	if (mode == SINGLE_EXIT) {
@@ -821,7 +839,7 @@ thread_suspend_check(int return_instead)
 			if (p->p_numthreads == p->p_suspcount + 1) {
 				thread_lock(p->p_singlethread);
 				wakeup_swapper =
-				    thread_unsuspend_one(p->p_singlethread);
+				    thread_unsuspend_one(p->p_singlethread, p);
 				thread_unlock(p->p_singlethread);
 				if (wakeup_swapper)
 					kick_proc0();
@@ -882,6 +900,27 @@ thread_suspend_switch(struct thread *td)
 	PROC_SLOCK(p);
 }
 
+static void
+thread_suspend_switch_ext(struct thread *td, struct proc *p)
+{
+
+	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	PROC_SLOCK_ASSERT(p, MA_OWNED);
+	PROC_UNLOCK(p);
+	thread_lock(td);
+	td->td_flags &= ~TDF_NEEDSUSPCHK;
+	TD_SET_SUSPENDED(td);
+	sched_sleep(td, 0);
+	PROC_SUNLOCK(p);
+	DROP_GIANT();
+	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
+	thread_unlock(td);
+	PICKUP_GIANT();
+	PROC_LOCK(p);
+	PROC_SLOCK(p);
+}
+
 void
 thread_suspend_one(struct thread *td)
 {
@@ -897,15 +936,16 @@ thread_suspend_one(struct thread *td)
 }
 
 int
-thread_unsuspend_one(struct thread *td)
+thread_unsuspend_one(struct thread *td, struct proc *p)
 {
-	struct proc *p = td->td_proc;
 
-	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 	TD_CLR_SUSPENDED(td);
-	p->p_suspcount--;
+	if (td->td_proc == p) {
+		PROC_SLOCK_ASSERT(p, MA_OWNED);
+		p->p_suspcount--;
+	}
 	return (setrunnable(td));
 }
 
@@ -925,7 +965,7 @@ thread_unsuspend(struct proc *p)
                 FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (TD_IS_SUSPENDED(td)) {
-				wakeup_swapper |= thread_unsuspend_one(td);
+				wakeup_swapper |= thread_unsuspend_one(td, p);
 			}
 			thread_unlock(td);
 		}
@@ -936,9 +976,12 @@ thread_unsuspend(struct proc *p)
 		 * threading request. Now we've downgraded to single-threaded,
 		 * let it continue.
 		 */
-		thread_lock(p->p_singlethread);
-		wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
-		thread_unlock(p->p_singlethread);
+		if (p->p_singlethread->td_proc == p) {
+			thread_lock(p->p_singlethread);
+			wakeup_swapper = thread_unsuspend_one(
+			    p->p_singlethread, p);
+			thread_unlock(p->p_singlethread);
+		}
 	}
 	if (wakeup_swapper)
 		kick_proc0();
@@ -948,16 +991,14 @@ thread_unsuspend(struct proc *p)
  * End the single threading mode..
  */
 void
-thread_single_end(void)
+thread_single_end(struct proc *p, int mode)
 {
 	struct thread *td;
-	struct proc *p;
 	int wakeup_swapper;
 
-	td = curthread;
-	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
+	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
+	    P_TOTAL_STOP);
 	PROC_SLOCK(p);
 	p->p_singlethread = NULL;
 	wakeup_swapper = 0;
@@ -967,11 +1008,11 @@ thread_single_end(void)
 	 * on the process. The single threader must be allowed
 	 * to continue however as this is a bad place to stop.
 	 */
-	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
+	if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
                 FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (TD_IS_SUSPENDED(td)) {
-				wakeup_swapper |= thread_unsuspend_one(td);
+				wakeup_swapper |= thread_unsuspend_one(td, p);
 			}
 			thread_unlock(td);
 		}
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fac0915..161223b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -635,7 +635,7 @@ struct proc {
 #define	P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
 #define	P_HWPMC		0x800000 /* Process is using HWPMCs */
 #define	P_JAILED	0x1000000 /* Process is in jail. */
-#define	P_UNUSED1	0x2000000
+#define	P_TOTAL_STOP	0x2000000 /* Stopped in proc_stop_total. */
 #define	P_INEXEC	0x4000000 /* Process is in execve(). */
 #define	P_STATCHILD	0x8000000 /* Child process stopped or exited. */
 #define	P_INMEM		0x10000000 /* Loaded into memory. */
@@ -696,6 +696,7 @@ struct proc {
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
+#define	SINGLE_TOTAL	3
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
@@ -899,6 +900,8 @@ struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent);
 struct	pstats *pstats_alloc(void);
+void	proc_stop_total(void);
+void	proc_resume_total(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 int	securelevel_ge(struct ucred *cr, int level);
@@ -945,8 +948,8 @@ void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap(void);
-int	thread_single(int how);
-void	thread_single_end(void);
+int	thread_single(struct proc *p, int how);
+void	thread_single_end(struct proc *p, int how);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
@@ -957,7 +960,7 @@ void	thread_suspend_switch(struct thread *);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
-int	thread_unsuspend_one(struct thread *td);
+int	thread_unsuspend_one(struct thread *td, struct proc *p);
 void	thread_wait(struct proc *p);
 struct thread	*thread_find(struct proc *p, lwpid_t tid);
 


More information about the freebsd-arch mailing list