git: a2cfe535771d - main - exit1(9): do not deadlock if exit is called due to PT_SC_REMOTERQ
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 08 Jun 2026 20:14:44 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=a2cfe535771ded3ca8526bae405a5b61f71f1f33
commit a2cfe535771ded3ca8526bae405a5b61f71f1f33
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-06-05 20:21:59 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-06-08 20:14:20 +0000
exit1(9): do not deadlock if exit is called due to PT_SC_REMOTERQ
The remote syscall is executed in the context where debugger owns a
p_lock hold on the target. Due to this, exit1() waiting for p_lock
going to zero, never happen.
Postpone the exit1() call to ast then, saving the provided rval and
signo in the struct proc. Mark the async-exiting proc with the new
p_flag P_ASYNC_EXIT.
While p_xexit can be reused, p_xsig can be only set by actual exit1(),
otherwise it breaks the ptrace mechanism. Allocate a dedicated p_asig
for it.
Reviewed by: markj
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D57482
---
sys/compat/linux/linux_fork.c | 4 +--
sys/compat/linux/linux_misc.c | 4 +--
sys/kern/kern_exec.c | 13 +++++-----
sys/kern/kern_exit.c | 57 ++++++++++++++++++++++++++++++++++++++++---
sys/kern/kern_fork.c | 3 ++-
sys/kern/kern_ucoredump.c | 4 +--
sys/sys/proc.h | 4 ++-
sys/sys/signalvar.h | 2 +-
sys/sys/syscallsubr.h | 1 +
9 files changed, 72 insertions(+), 20 deletions(-)
diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 1c9189162a09..24c5d3351623 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -486,8 +486,8 @@ linux_exit(struct thread *td, struct linux_exit_args *args)
* exit via pthread_exit() try thr_exit() first.
*/
kern_thr_exit(td);
- exit1(td, args->rval, 0);
- /* NOTREACHED */
+ kern_exit(td, args->rval, 0);
+ return (0);
}
int
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index eafba4f4bd19..c863e1db8b02 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1545,8 +1545,8 @@ linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
* SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
* as it doesnt occur often.
*/
- exit1(td, args->error_code, 0);
- /* NOTREACHED */
+ kern_exit(td, args->error_code, 0);
+ return (0);
}
#define _LINUX_CAPABILITY_VERSION_1 0x19980330
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 4066682cbcc5..8ea00543989e 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -341,11 +341,11 @@ post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
}
/*
- * kern_execve() has the astonishing property of not always returning to
- * the caller. If sufficiently bad things happen during the call to
- * do_execve(), it can end up calling exit1(); as a result, callers must
- * avoid doing anything which they might need to undo (e.g., allocating
- * memory).
+ * kern_execve() has the astonishing property of not always returning
+ * to the caller. If sufficiently bad things happen during the call
+ * to do_execve(), it can end up calling exit2(). Callers must avoid
+ * doing anything which they might need to undo (e.g., allocating
+ * memory), unless called from the ptrace(PT_SC_REMOTERQ) handler.
*/
int
kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
@@ -1042,8 +1042,7 @@ exec_fail:
if (error && imgp->vmspace_destroyed) {
/* sorry, no more process anymore. exit gracefully */
exec_cleanup(td, oldvmspace);
- exit1(td, 0, SIGABRT);
- /* NOT REACHED */
+ kern_exit(td, 0, SIGABRT);
}
#ifdef KTRACE
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 18ea3a7bd29d..63e46dcf46f7 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -204,9 +204,8 @@ exit_onexit(struct proc *p)
int
sys__exit(struct thread *td, struct _exit_args *uap)
{
-
- exit1(td, uap->rval, 0);
- __unreachable();
+ kern_exit(td, uap->rval, 0);
+ return (0);
}
void
@@ -216,6 +215,48 @@ proc_set_p2_wexit(struct proc *p)
p->p_flag2 |= P2_WEXIT;
}
+static void
+ast_async_exit(struct thread *td, int asts)
+{
+ struct proc *p;
+
+ p = td->td_proc;
+ if ((p->p_flag & P_ASYNC_EXIT) != 0)
+ exit1(td, p->p_xexit, p->p_asig);
+}
+
+/*
+ * The variation on exit1() intended to be used in the syscall
+ * handlers. Unlike exit1(), it might delay the current process exit
+ * to ast. This is needed e.g. when _exit(2) is executed due to the
+ * ptrace(PT_SC_REMOTERQ), which must do more work after the syscall
+ * handler call.
+ */
+void
+kern_exit(struct thread *td, int rval, int signo)
+{
+ struct proc *p;
+
+ KASSERT(rval == 0 || signo == 0,
+ ("kern_exit rv %d sig %d", rval, signo));
+
+ p = td->td_proc;
+ if ((td->td_dbgflags & TDB_SCREMOTEREQ) != 0) {
+ PROC_LOCK(p);
+ p->p_xexit = rval;
+ p->p_asig = signo;
+ p->p_flag |= P_ASYNC_EXIT;
+ ast_sched(td, TDA_ASYNC_EXIT);
+ PROC_UNLOCK(p);
+ return;
+ }
+ if ((p->p_flag & P_ASYNC_EXIT) != 0) {
+ rval = p->p_xexit;
+ signo = p->p_asig;
+ }
+ exit1(td, rval, signo);
+}
+
/*
* Exit: deallocate address space and other resources, change proc state to
* zombie, and unlink proc from allproc and parent's lists. Save exit status
@@ -231,6 +272,7 @@ exit1(struct thread *td, int rval, int signo)
mtx_assert(&Giant, MA_NOTOWNED);
KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
+ MPASS((td->td_dbgflags & TDB_SCREMOTEREQ) == 0);
TSPROCEXIT(td->td_proc->p_pid);
p = td->td_proc;
@@ -828,7 +870,7 @@ out:
sbuf_delete(sb);
PROC_LOCK(p);
sigexit(td, sig);
- /* NOTREACHED */
+ return (0);
}
#ifdef COMPAT_43
@@ -1627,3 +1669,10 @@ proc_reparent(struct proc *child, struct proc *parent, bool set_oppid)
if (set_oppid)
child->p_oppid = parent->p_pid;
}
+
+static void
+initexit(void *dummy __unused)
+{
+ ast_register(TDA_ASYNC_EXIT, ASTR_ASTF_REQUIRED, 0, ast_async_exit);
+}
+SYSINIT(exit, SI_SUB_EXEC, SI_ORDER_ANY, initexit, NULL);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 75f8413e5f36..2fb4d9d4274d 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -66,6 +66,7 @@
#include <sys/signalvar.h>
#include <sys/sx.h>
#include <sys/syscall.h>
+#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/vmmeter.h>
@@ -1258,7 +1259,7 @@ fork_return(struct thread *td, struct trapframe *frame)
* If the prison was killed mid-fork, die along with it.
*/
if (!prison_isalive(td->td_ucred->cr_prison))
- exit1(td, 0, SIGKILL);
+ kern_exit(td, 0, SIGKILL);
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
index d425596b5f24..e08490fbf7b1 100644
--- a/sys/kern/kern_ucoredump.c
+++ b/sys/kern/kern_ucoredump.c
@@ -46,6 +46,7 @@
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rmlock.h>
+#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/ucoredump.h>
@@ -197,8 +198,7 @@ sigexit(struct thread *td, int sig)
err != NULL ? err : "");
} else
PROC_UNLOCK(p);
- exit1(td, 0, sig);
- /* NOTREACHED */
+ kern_exit(td, 0, sig);
}
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index ed69a09422e2..5f017e6ece2c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -504,6 +504,7 @@ enum {
TDA_MOD3, /* .. and after */
TDA_MOD4,
TDA_SCHED_PRIV,
+ TDA_ASYNC_EXIT,
TDA_MAX,
};
#define TDAI(tda) (1U << (tda))
@@ -777,6 +778,7 @@ struct proc {
TAILQ_HEAD(, kq_timer_cb_data) p_kqtim_stop; /* (c) */
LIST_ENTRY(proc) p_jaillist; /* (d) Jail process linkage. */
+ u_int p_asig; /* (c) ASYNCEXIT pending signal. */
};
#define p_session p_pgrp->pg_session
@@ -842,7 +844,7 @@ struct proc {
#define P_INEXEC 0x04000000 /* Process is in execve(). */
#define P_STATCHILD 0x08000000 /* Child process stopped or exited. */
#define P_INMEM 0x10000000 /* Loaded into memory, always set. */
-#define P_UNUSED1 0x20000000 /* --available-- */
+#define P_ASYNC_EXIT 0x20000000 /* XXX */
#define P_UNUSED2 0x40000000 /* --available-- */
#define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 9a4009d269af..c7b3b620a459 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -399,7 +399,7 @@ int sigacts_shared(struct sigacts *ps);
int sig_ast_checksusp(struct thread *td);
int sig_ast_needsigchk(struct thread *td);
void sig_drop_caught(struct proc *p);
-void sigexit(struct thread *td, int sig) __dead2;
+void sigexit(struct thread *td, int sig);
int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
void sigfastblock_clear(struct thread *td);
void sigfastblock_fetch(struct thread *td);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 8d546428820e..0eb471cc9dde 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -136,6 +136,7 @@ int kern_cpuset_setid(struct thread *td, cpuwhich_t which,
int kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
int kern_execve(struct thread *td, struct image_args *args,
struct mac *mac_p, struct vmspace *oldvmspace);
+void kern_exit(struct thread *, int, int);
int kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace,
const char *attrname);
int kern_extattr_delete_path(struct thread *td, const char *path,