git: a2cfe535771d - main - exit1(9): do not deadlock if exit is called due to PT_SC_REMOTERQ

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Mon, 08 Jun 2026 20:14:44 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=a2cfe535771ded3ca8526bae405a5b61f71f1f33

commit a2cfe535771ded3ca8526bae405a5b61f71f1f33
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-06-05 20:21:59 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-06-08 20:14:20 +0000

    exit1(9): do not deadlock if exit is called due to PT_SC_REMOTERQ
    
    The remote syscall is executed in the context where debugger owns a
    p_lock hold on the target.  Due to this, exit1() waiting for p_lock
    going to zero, never happen.
    
    Postpone the exit1() call to ast then, saving the provided rval and
    signo in the struct proc.  Mark the async-exiting proc with the new
    p_flag P_ASYNC_EXIT.
    
    While p_xexit can be reused, p_xsig can be only set by actual exit1(),
    otherwise it breaks the ptrace mechanism. Allocate a dedicated p_asig
    for it.
    
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D57482
---
 sys/compat/linux/linux_fork.c |  4 +--
 sys/compat/linux/linux_misc.c |  4 +--
 sys/kern/kern_exec.c          | 13 +++++-----
 sys/kern/kern_exit.c          | 57 ++++++++++++++++++++++++++++++++++++++++---
 sys/kern/kern_fork.c          |  3 ++-
 sys/kern/kern_ucoredump.c     |  4 +--
 sys/sys/proc.h                |  4 ++-
 sys/sys/signalvar.h           |  2 +-
 sys/sys/syscallsubr.h         |  1 +
 9 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 1c9189162a09..24c5d3351623 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -486,8 +486,8 @@ linux_exit(struct thread *td, struct linux_exit_args *args)
 	 * exit via pthread_exit() try thr_exit() first.
 	 */
 	kern_thr_exit(td);
-	exit1(td, args->rval, 0);
-		/* NOTREACHED */
+	kern_exit(td, args->rval, 0);
+	return (0);
 }
 
 int
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index eafba4f4bd19..c863e1db8b02 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1545,8 +1545,8 @@ linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
 	 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
 	 * as it doesnt occur often.
 	 */
-	exit1(td, args->error_code, 0);
-		/* NOTREACHED */
+	kern_exit(td, args->error_code, 0);
+	return (0);
 }
 
 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 4066682cbcc5..8ea00543989e 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -341,11 +341,11 @@ post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 }
 
 /*
- * kern_execve() has the astonishing property of not always returning to
- * the caller.  If sufficiently bad things happen during the call to
- * do_execve(), it can end up calling exit1(); as a result, callers must
- * avoid doing anything which they might need to undo (e.g., allocating
- * memory).
+ * kern_execve() has the astonishing property of not always returning
+ * to the caller.  If sufficiently bad things happen during the call
+ * to do_execve(), it can end up calling exit2(). Callers must avoid
+ * doing anything which they might need to undo (e.g., allocating
+ * memory), unless called from the ptrace(PT_SC_REMOTERQ) handler.
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
@@ -1042,8 +1042,7 @@ exec_fail:
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exec_cleanup(td, oldvmspace);
-		exit1(td, 0, SIGABRT);
-		/* NOT REACHED */
+		kern_exit(td, 0, SIGABRT);
 	}
 
 #ifdef KTRACE
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 18ea3a7bd29d..63e46dcf46f7 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -204,9 +204,8 @@ exit_onexit(struct proc *p)
 int
 sys__exit(struct thread *td, struct _exit_args *uap)
 {
-
-	exit1(td, uap->rval, 0);
-	__unreachable();
+	kern_exit(td, uap->rval, 0);
+	return (0);
 }
 
 void
@@ -216,6 +215,48 @@ proc_set_p2_wexit(struct proc *p)
 	p->p_flag2 |= P2_WEXIT;
 }
 
+static void
+ast_async_exit(struct thread *td, int asts)
+{
+	struct proc *p;
+
+	p = td->td_proc;
+	if ((p->p_flag & P_ASYNC_EXIT) != 0)
+		exit1(td, p->p_xexit, p->p_asig);
+}
+
+/*
+ * The variation on exit1() intended to be used in the syscall
+ * handlers.  Unlike exit1(), it might delay the current process exit
+ * to ast.  This is needed e.g. when _exit(2) is executed due to the
+ * ptrace(PT_SC_REMOTERQ), which must do more work after the syscall
+ * handler call.
+ */
+void
+kern_exit(struct thread *td, int rval, int signo)
+{
+	struct proc *p;
+
+	KASSERT(rval == 0 || signo == 0,
+	    ("kern_exit rv %d sig %d", rval, signo));
+
+	p = td->td_proc;
+	if ((td->td_dbgflags & TDB_SCREMOTEREQ) != 0) {
+		PROC_LOCK(p);
+		p->p_xexit = rval;
+		p->p_asig = signo;
+		p->p_flag |= P_ASYNC_EXIT;
+		ast_sched(td, TDA_ASYNC_EXIT);
+		PROC_UNLOCK(p);
+		return;
+	}
+	if ((p->p_flag & P_ASYNC_EXIT) != 0) {
+		rval = p->p_xexit;
+		signo = p->p_asig;
+	}
+	exit1(td, rval, signo);
+}
+
 /*
  * Exit: deallocate address space and other resources, change proc state to
  * zombie, and unlink proc from allproc and parent's lists.  Save exit status
@@ -231,6 +272,7 @@ exit1(struct thread *td, int rval, int signo)
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
+	MPASS((td->td_dbgflags & TDB_SCREMOTEREQ) == 0);
 	TSPROCEXIT(td->td_proc->p_pid);
 
 	p = td->td_proc;
@@ -828,7 +870,7 @@ out:
 	sbuf_delete(sb);
 	PROC_LOCK(p);
 	sigexit(td, sig);
-	/* NOTREACHED */
+	return (0);
 }
 
 #ifdef COMPAT_43
@@ -1627,3 +1669,10 @@ proc_reparent(struct proc *child, struct proc *parent, bool set_oppid)
 	if (set_oppid)
 		child->p_oppid = parent->p_pid;
 }
+
+static void
+initexit(void *dummy __unused)
+{
+	ast_register(TDA_ASYNC_EXIT, ASTR_ASTF_REQUIRED, 0, ast_async_exit);
+}
+SYSINIT(exit, SI_SUB_EXEC, SI_ORDER_ANY, initexit, NULL);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 75f8413e5f36..2fb4d9d4274d 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -66,6 +66,7 @@
 #include <sys/signalvar.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
@@ -1258,7 +1259,7 @@ fork_return(struct thread *td, struct trapframe *frame)
 	 * If the prison was killed mid-fork, die along with it.
 	 */
 	if (!prison_isalive(td->td_ucred->cr_prison))
-		exit1(td, 0, SIGKILL);
+		kern_exit(td, 0, SIGKILL);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_SYSRET))
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
index d425596b5f24..e08490fbf7b1 100644
--- a/sys/kern/kern_ucoredump.c
+++ b/sys/kern/kern_ucoredump.c
@@ -46,6 +46,7 @@
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rmlock.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucoredump.h>
@@ -197,8 +198,7 @@ sigexit(struct thread *td, int sig)
 			    err != NULL ? err : "");
 	} else
 		PROC_UNLOCK(p);
-	exit1(td, 0, sig);
-	/* NOTREACHED */
+	kern_exit(td, 0, sig);
 }
 
 
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index ed69a09422e2..5f017e6ece2c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -504,6 +504,7 @@ enum {
 	TDA_MOD3,		/* .. and after */
 	TDA_MOD4,
 	TDA_SCHED_PRIV,
+	TDA_ASYNC_EXIT,
 	TDA_MAX,
 };
 #define	TDAI(tda)		(1U << (tda))
@@ -777,6 +778,7 @@ struct proc {
 
 	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
 	LIST_ENTRY(proc) p_jaillist;	/* (d) Jail process linkage. */
+	u_int		p_asig;		/* (c) ASYNCEXIT pending signal. */
 };
 
 #define	p_session	p_pgrp->pg_session
@@ -842,7 +844,7 @@ struct proc {
 #define	P_INEXEC	0x04000000	/* Process is in execve(). */
 #define	P_STATCHILD	0x08000000	/* Child process stopped or exited. */
 #define	P_INMEM		0x10000000	/* Loaded into memory, always set. */
-#define	P_UNUSED1	0x20000000	/* --available-- */
+#define	P_ASYNC_EXIT	0x20000000	/* XXX */
 #define	P_UNUSED2	0x40000000	/* --available-- */
 #define	P_PPTRACE	0x80000000	/* PT_TRACEME by vforked child. */
 
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 9a4009d269af..c7b3b620a459 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -399,7 +399,7 @@ int	sigacts_shared(struct sigacts *ps);
 int	sig_ast_checksusp(struct thread *td);
 int	sig_ast_needsigchk(struct thread *td);
 void	sig_drop_caught(struct proc *p);
-void	sigexit(struct thread *td, int sig) __dead2;
+void	sigexit(struct thread *td, int sig);
 int	sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
 void	sigfastblock_clear(struct thread *td);
 void	sigfastblock_fetch(struct thread *td);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 8d546428820e..0eb471cc9dde 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -136,6 +136,7 @@ int	kern_cpuset_setid(struct thread *td, cpuwhich_t which,
 int	kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
 int	kern_execve(struct thread *td, struct image_args *args,
 	    struct mac *mac_p, struct vmspace *oldvmspace);
+void	kern_exit(struct thread *, int, int);
 int	kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace,
 	    const char *attrname);
 int	kern_extattr_delete_path(struct thread *td, const char *path,