git: e1a84b7708c2 - main - execve_block(): a mechanism for mutual exclusion with execve() on the process

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sun, 21 Jun 2026 11:47:58 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=e1a84b7708c2514769625c2af6c5034694013b6a

commit e1a84b7708c2514769625c2af6c5034694013b6a
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-05-26 17:36:20 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-06-21 11:46:52 +0000

    execve_block(): a mechanism for mutual exclusion with execve() on the process
    
    A consumer of execve_block(9) is synchorized with the execution of
    execve(2) family of syscalls, ensuring that execve_block region is
    mutually exclusive with the execve processing. Either execve_block() or
    execve() would sleep until other finishes.
    
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D57497
---
 sys/kern/kern_exec.c   | 69 ++++++++++++++++++++++++++++++++++++++++++++++++--
 sys/kern/kern_exit.c   |  1 +
 sys/kern/kern_thread.c |  4 +--
 sys/sys/imgact.h       |  3 +++
 sys/sys/proc.h         |  3 ++-
 5 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index de6d8b08cec3..7c25dc60960d 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -385,6 +385,62 @@ execve_nosetid(struct image_params *imgp)
 	}
 }
 
+/*
+ * Returns true if the execblock was obtained, in this case the
+ * process lock is kept.  Returns false if the execblock was not
+ * obtained, but the function slept and the lock was dropped.
+ */
+bool
+execve_block(struct thread *td, struct proc *p)
+{
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	MPASS(td == curthread);
+	MPASS(p != td->td_proc || (p->p_flag & P_INEXEC) == 0);
+
+	if (p != td->td_proc && (p->p_flag & P_INEXEC) != 0) {
+		p->p_flag |= P_INEXEC_WAIT;
+		msleep(&p->p_execblock, &p->p_mtx, PDROP, "inexec", 0);
+		return (false);
+	}
+	MPASS(p->p_execblock < UINT_MAX);
+	p->p_execblock++;
+	return (true);
+}
+
+/*
+ * Might drop the process lock internally, callers must re-check the
+ * invariants afterward.
+ */
+void
+execve_block_wait(struct thread *td, struct proc *p)
+{
+	bool first;
+
+	PROC_ASSERT_HELD(p);
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	for (first = true;; first = false) {
+		if (!first)
+			PROC_LOCK(p);
+		if (execve_block(td, p))
+			return;
+	}
+}
+
+void
+execve_unblock(struct thread *td, struct proc *p)
+{
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	MPASS(td == curthread);
+
+	MPASS(p->p_execblock > 0);
+	p->p_execblock--;
+	if (p->p_execblock == 0 && (p->p_flag & P_INEXEC_WAIT) != 0) {
+		p->p_flag &= ~P_INEXEC_WAIT;
+		wakeup(&p->p_execblock);
+	}
+}
+
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
@@ -440,6 +496,10 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
+	while (p->p_execblock != 0) {
+		p->p_flag |= P_INEXEC_WAIT;
+		msleep(&p->p_execblock, &p->p_mtx, 0, "exeblk", 0);
+	}
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
@@ -911,7 +971,10 @@ interpret:
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
-	p->p_flag &= ~P_INEXEC;
+	MPASS(p->p_execblock == 0);
+	if ((p->p_flag & P_INEXEC_WAIT) != 0)
+		wakeup(&p->p_execblock);
+	p->p_flag &= ~(P_INEXEC | P_INEXEC_WAIT);
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
@@ -1007,7 +1070,9 @@ exec_fail_dealloc:
 exec_fail:
 		/* we're done here, clear P_INEXEC */
 		PROC_LOCK(p);
-		p->p_flag &= ~P_INEXEC;
+		if ((p->p_flag & P_INEXEC_WAIT) != 0)
+			wakeup(&p->p_execblock);
+		p->p_flag &= ~(P_INEXEC | P_INEXEC_WAIT);
 		PROC_UNLOCK(p);
 
 		SDT_PROBE1(proc, , , exec__failure, error);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 2f94386c9101..8f9007752b3b 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -365,6 +365,7 @@ exit1(struct thread *td, int rval, int signo)
 	while (p->p_lock > 0)
 		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
 
+	MPASS(p->p_execblock == 0);
 	PROC_UNLOCK(p);
 	/* Drain the limit callout while we don't have the proc locked */
 	callout_drain(&p->p_limco);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 15a327e66c7d..28c6dcff0ceb 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -100,7 +100,7 @@ _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
     "struct proc KBI p_pid");
 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c8,
     "struct proc KBI p_filemon");
-_Static_assert(offsetof(struct proc, p_comm) == 0x3e0,
+_Static_assert(offsetof(struct proc, p_comm) == 0x3e4,
     "struct proc KBI p_comm");
 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4d0,
     "struct proc KBI p_emuldata");
@@ -120,7 +120,7 @@ _Static_assert(offsetof(struct proc, p_pid) == 0x78,
     "struct proc KBI p_pid");
 _Static_assert(offsetof(struct proc, p_filemon) == 0x270,
     "struct proc KBI p_filemon");
-_Static_assert(offsetof(struct proc, p_comm) == 0x284,
+_Static_assert(offsetof(struct proc, p_comm) == 0x288,
     "struct proc KBI p_comm");
 _Static_assert(offsetof(struct proc, p_emuldata) == 0x318,
     "struct proc KBI p_emuldata");
diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
index f99b1273ee02..dbb76a1d9b93 100644
--- a/sys/sys/imgact.h
+++ b/sys/sys/imgact.h
@@ -124,6 +124,9 @@ int	exec_shell_imgact(struct image_params *);
 int	exec_copyin_args(struct image_args *, const char *, char **, char **);
 int	pre_execve(struct thread *td, struct vmspace **oldvmspace);
 void	post_execve(struct thread *td, int error, struct vmspace *oldvmspace);
+bool	execve_block(struct thread *td, struct proc *p);
+void	execve_block_wait(struct thread *td, struct proc *p);
+void	execve_unblock(struct thread *td, struct proc *p);
 #endif
 
 #endif /* !_SYS_IMGACT_H_ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index ff6e944a5ad2..20389c728f37 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -724,6 +724,7 @@ struct proc {
 	int		p_pendingexits; /* (c) Count of pending thread exits. */
 	struct filemon	*p_filemon;	/* (c) filemon-specific data. */
 	int		p_pdeathsig;	/* (c) Signal from parent on exit. */
+	u_int		p_execblock;	/* (c) Blockers for execve. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
@@ -852,7 +853,7 @@ struct proc {
 #define	P_STATCHILD	0x08000000	/* Child process stopped or exited. */
 #define	P_INMEM		0x10000000	/* Loaded into memory, always set. */
 #define	P_ASYNC_EXIT	0x20000000	/* XXX */
-#define	P_UNUSED2	0x40000000	/* --available-- */
+#define	P_INEXEC_WAIT	0x40000000	/* Waiters for P_INEXEC/p_execblock */
 #define	P_PPTRACE	0x80000000	/* PT_TRACEME by vforked child. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)