git: e1b0d051bbf7 - main - proc: Allow to make proc_rwmem() operate on a consistent address space

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sun, 21 Jun 2026 11:48:00 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=e1b0d051bbf7e4e730470cbd6622f71a639834c3

commit e1b0d051bbf7e4e730470cbd6622f71a639834c3
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-06-07 17:57:06 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-06-21 11:46:52 +0000

    proc: Allow to make proc_rwmem() operate on a consistent address space
    
    Provide proc_vmspace_ref() to safely obtain the reference to the target
    process vmspace, optionally
    - requiring that the target cannot execve(2) and thus cannot change its
      vmspace until vmspace in unreferenced
    - requiring the check of permissions of the caller after the vmspace
      reference is obtained, since the process lock might have been dropped
      in the process.
    
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D57497
---
 sys/fs/cuse/cuse.c         |   4 +-
 sys/fs/procfs/procfs_mem.c |   2 +-
 sys/kern/kern_prot.c       |  10 ++--
 sys/kern/sys_process.c     | 127 ++++++++++++++++++++++++++++++++++++++-------
 sys/sys/ptrace.h           |  15 +++++-
 5 files changed, 129 insertions(+), 29 deletions(-)

diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
index b914b2d5017c..8f67c4b5572b 100644
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -916,7 +916,7 @@ cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
 		};
 
 		PHOLD(proc_s);
-		error = proc_rwmem(proc_s, &uio);
+		error = proc_rwmem(proc_s, &uio, 0);
 		PRELE(proc_s);
 
 	} else if (proc_cur == proc_s) {
@@ -935,7 +935,7 @@ cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
 		};
 
 		PHOLD(proc_d);
-		error = proc_rwmem(proc_d, &uio);
+		error = proc_rwmem(proc_d, &uio, 0);
 		PRELE(proc_d);
 	} else {
 		error = EINVAL;
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
index 0020b8f8a8d8..824fcd33b2ef 100644
--- a/sys/fs/procfs/procfs_mem.c
+++ b/sys/fs/procfs/procfs_mem.c
@@ -64,7 +64,7 @@ procfs_doprocmem(PFS_FILL_ARGS)
 	error = p_candebug(td, p);
 	PROC_UNLOCK(p);
 	if (error == 0)
-		error = proc_rwmem(p, uio);
+		error = proc_rwmem(p, uio, 0);
 
 	return (error);
 }
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index c8dc05f0ebbd..47744f6b07fe 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -2360,11 +2360,11 @@ p_candebug(struct thread *td, struct proc *p)
 	}
 
 	/*
-	 * Can't trace a process that's currently exec'ing.
-	 *
-	 * XXX: Note, this is not a security policy decision, it's a
-	 * basic correctness/functionality decision.  Therefore, this check
-	 * should be moved to the caller's of p_candebug().
+	 * Can't trace a process that's currently exec'ing.  Otherwise
+	 * the process vmspace might change, and the target might be
+	 * loading a setugid image.  The execve_block(9) and
+	 * proc_vmspace_ref(9) allow to get the stable credentials and
+	 * vmspace reference.
 	 */
 	if ((p->p_flag & P_INEXEC) != 0)
 		return (EBUSY);
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 5effc6fbe2d7..81cff03250a9 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -33,6 +33,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/imgact.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
@@ -367,25 +368,93 @@ proc_sstep(struct thread *td)
 	return (ptrace_single_step(td));
 }
 
+static int
+proc_vmspace_check_access(struct thread *td, struct proc *p, int flags)
+{
+	PROC_ASSERT_HELD(p);
+	if ((flags & PRVM_CHECK_DEBUG) != 0)
+		return (p_candebug(td, p));
+	if ((flags & PRVM_CHECK_VISIBILITY) != 0)
+		return (p_cansee(td, p));
+	return (0);
+}
+
 int
-proc_rwmem(struct proc *p, struct uio *uio)
+proc_vmspace_ref(struct thread *td, struct proc *p, int flags,
+    struct vmspace **vmp)
+{
+	struct vmspace *vm;
+	int error;
+
+	MPASS((flags & ~(PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY |
+	    PRVM_CHECK_DEBUG)) == 0);
+	MPASS((flags & (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG)) !=
+	    (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG));
+
+	PROC_LOCK(p);
+	if (p != td->td_proc) {
+		PROC_ASSERT_HELD(p);
+
+		/*
+		 * Make sure that the vmspace doesn't switch out from
+		 * under us.
+		 */
+		if ((flags & PRVM_BLOCK_EXEC) != 0) {
+			for (;;) {
+				if (!execve_block(td, p)) {
+					PROC_LOCK(p);
+					continue;
+				}
+				error = proc_vmspace_check_access(td, p, flags);
+				if (error != 0) {
+					execve_unblock(td, p);
+					PROC_UNLOCK(p);
+					return (error);
+				}
+				break;
+			}
+		} else {
+			error = proc_vmspace_check_access(td, p, flags);
+			if (error != 0) {
+				PROC_UNLOCK(p);
+				return (error);
+			}
+		}
+	}
+	vm = vmspace_acquire_ref(p);
+	if (vm == NULL) {
+		if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0)
+			execve_unblock(td, p);
+		PROC_UNLOCK(p);
+		return (ESRCH);
+	}
+	PROC_UNLOCK(p);
+	*vmp = vm;
+	return (0);
+}
+
+void
+proc_vmspace_unref(struct thread *td, struct proc *p, int flags,
+    struct vmspace *vm)
+{
+	vmspace_free(vm);
+	if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0) {
+		PROC_LOCK(p);
+		PROC_ASSERT_HELD(p);
+		execve_unblock(td, p);
+		PROC_UNLOCK(p);
+	}
+}
+
+static int
+vmspace_rwmem(struct vmspace *vm, struct uio *uio)
 {
 	vm_map_t map;
 	vm_offset_t pageno;		/* page number */
 	vm_prot_t reqprot;
 	int error, fault_flags, page_offset, writing;
 
-	/*
-	 * Make sure that the process' vmspace remains live.
-	 */
-	if (p != curproc)
-		PROC_ASSERT_HELD(p);
-	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
-
-	/*
-	 * The map we want...
-	 */
-	map = &p->p_vmspace->vm_map;
+	map = &vm->vm_map;
 
 	/*
 	 * If we are writing, then we request vm_fault() to create a private
@@ -399,7 +468,7 @@ proc_rwmem(struct proc *p, struct uio *uio)
 	if (writing) {
 		error = priv_check(curthread, PRIV_PROC_MEM_WRITE);
 		if (error != 0)
-			return (error);
+			goto out;
 	}
 
 	/*
@@ -457,16 +526,34 @@ proc_rwmem(struct proc *p, struct uio *uio)
 
 	} while (error == 0 && uio->uio_resid > 0);
 
+out:
 	return (error);
 }
 
-static ssize_t
-proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
+int
+proc_rwmem(struct proc *p, struct uio *uio, int flags)
+{
+	struct vmspace *vm;
+	struct thread *td;
+	int error;
+
+	td = curthread;
+	error = proc_vmspace_ref(td, p, flags, &vm);
+	if (error != 0)
+		return (error);
+	error = vmspace_rwmem(vm, uio);
+	proc_vmspace_unref(td, p, flags, vm);
+	return (error);
+}
+
+ssize_t
+vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va, void *buf,
     size_t len, enum uio_rw rw)
 {
 	struct iovec iov;
 	struct uio uio;
 	ssize_t slen;
+	int error;
 
 	MPASS(len < SSIZE_MAX);
 	slen = (ssize_t)len;
@@ -480,8 +567,8 @@ proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = rw;
 	uio.uio_td = td;
-	proc_rwmem(p, &uio);
-	if (uio.uio_resid == slen)
+	error = vmspace_rwmem(vm, &uio);
+	if (error != 0 || uio.uio_resid == slen)
 		return (-1);
 	return (slen - uio.uio_resid);
 }
@@ -491,7 +578,7 @@ proc_readmem(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
     size_t len)
 {
 
-	return (proc_iop(td, p, va, buf, len, UIO_READ));
+	return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_READ));
 }
 
 ssize_t
@@ -499,7 +586,7 @@ proc_writemem(struct thread *td, struct proc *p, vm_offset_t va, void *buf,
     size_t len)
 {
 
-	return (proc_iop(td, p, va, buf, len, UIO_WRITE));
+	return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_WRITE));
 }
 
 static int
@@ -1492,7 +1579,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 			goto out;
 		}
 		PROC_UNLOCK(p);
-		error = proc_rwmem(p, &uio);
+		error = proc_rwmem(p, &uio, 0);
 		piod->piod_len -= uio.uio_resid;
 		PROC_LOCK(p);
 		break;
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index cdde142d0487..2ef52b22044c 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -247,7 +247,20 @@ int	proc_write_fpregs(struct thread *_td, struct fpreg *_fpreg);
 int	proc_read_dbregs(struct thread *_td, struct dbreg *_dbreg);
 int	proc_write_dbregs(struct thread *_td, struct dbreg *_dbreg);
 int	proc_sstep(struct thread *_td);
-int	proc_rwmem(struct proc *_p, struct uio *_uio);
+
+#define	PRVM_BLOCK_EXEC		0x00000001
+#define	PRVM_CHECK_VISIBILITY	0x00000002
+#define	PRVM_CHECK_DEBUG	0x00000004
+
+#include <sys/_uio.h>
+struct vmspace;
+int	proc_vmspace_ref(struct thread *_td, struct proc *_p, int _flags,
+	    struct vmspace **_vmp);
+void	proc_vmspace_unref(struct thread *_td, struct proc *_p, int _flags,
+	    struct vmspace *_vm);
+ssize_t	vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va,
+	    void *buf, size_t len, enum uio_rw rw);
+int	proc_rwmem(struct proc *_p, struct uio *_uio, int _flags);
 ssize_t	proc_readmem(struct thread *_td, struct proc *_p, vm_offset_t _va,
 	    void *_buf, size_t _len);
 ssize_t	proc_writemem(struct thread *_td, struct proc *_p, vm_offset_t _va,