git: 351d5f7fc516 - main - exec: store parent directory and hardlink name of the binary in struct proc

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Thu, 28 Oct 2021 17:50:42 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=351d5f7fc5161ededeaa226ee3f21a438ee4a632

commit 351d5f7fc5161ededeaa226ee3f21a438ee4a632
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2021-10-23 18:44:22 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2021-10-28 17:49:56 +0000

    exec: store parent directory and hardlink name of the binary in struct proc
    
    While doing it, also move all the code to resolve pathnames and obtain
    text vp and dvp, into single place.   Besides simplifying the code, it
    avoids spurious vnode relocks and validates the explanation why
    a transient text reference on the script vnode is not harmful.
    
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D32611
---
 sys/kern/kern_exec.c   | 108 ++++++++++++++++++++++++++++---------------------
 sys/kern/kern_exit.c   |  10 ++++-
 sys/kern/kern_fork.c   |  12 +++++-
 sys/kern/kern_thread.c |   6 +--
 sys/sys/proc.h         |   2 +
 5 files changed, 85 insertions(+), 53 deletions(-)

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index d61a9d5b0b1c..52119036e95b 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -420,7 +420,9 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 #ifdef KTRACE
 	struct ktr_io_params *kiop;
 #endif
-	struct vnode *oldtextvp = NULL, *newtextvp;
+	struct vnode *oldtextvp, *newtextvp;
+	struct vnode *oldtextdvp, *newtextdvp;
+	char *oldbinname, *newbinname;
 	bool credential_changing;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
@@ -436,6 +438,9 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
+	oldtextvp = oldtextdvp = NULL;
+	newtextvp = newtextdvp = NULL;
+	newbinname = oldbinname = NULL;
 #ifdef KTRACE
 	kiop = NULL;
 #endif
@@ -471,19 +476,6 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 		goto exec_fail;
 #endif
 
-	/*
-	 * Translate the file name. namei() returns a vnode pointer
-	 *	in ni_vp among other things.
-	 *
-	 * XXXAUDIT: It would be desirable to also audit the name of the
-	 * interpreter if this is an interpreted binary.
-	 */
-	if (args->fname != NULL) {
-		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
-		    SAVENAME | AUDITVNODE1 | WANTPARENT,
-		    UIO_SYSSPACE, args->fname, td);
-	}
-
 	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
@@ -500,12 +492,42 @@ interpret:
 			goto exec_fail;
 		}
 #endif
+
+		/*
+		 * Translate the file name. namei() returns a vnode
+		 * pointer in ni_vp among other things.
+		 */
+		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
+		    SAVENAME | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE,
+		    args->fname, td);
+
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
+		newtextdvp = nd.ni_dvp;
+		nd.ni_dvp = NULL;
+		newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS,
+		    M_WAITOK);
+		memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen);
+		newbinname[nd.ni_cnd.cn_namelen] = '\0';
 		imgp->vp = newtextvp;
+
+		/*
+		 * Do the best to calculate the full path to the image file.
+		 */
+		if (args->fname[0] == '/') {
+			imgp->execpath = args->fname;
+		} else {
+			VOP_UNLOCK(imgp->vp);
+			freepath_size = MAXPATHLEN;
+			if (vn_fullpath_hardlink(newtextvp, newtextdvp,
+			    newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath,
+			    &imgp->freepath, &freepath_size) != 0)
+				imgp->execpath = args->fname;
+			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
+		}
 	} else {
 		AUDIT_ARG_FD(args->fd);
 		/*
@@ -515,6 +537,9 @@ interpret:
 		    &newtextvp);
 		if (error)
 			goto exec_fail;
+		if (vn_fullpath(imgp->vp, &imgp->execpath,
+		    &imgp->freepath) != 0)
+			imgp->execpath = args->fname;
 		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
@@ -624,28 +649,6 @@ interpret:
 	}
 	/* The new credentials are installed into the process later. */
 
-	/*
-	 * Do the best to calculate the full path to the image file.
-	 */
-	if (args->fname != NULL) {
-		if (args->fname[0] == '/') {
-			imgp->execpath = args->fname;
-		} else {
-			VOP_UNLOCK(imgp->vp);
-			freepath_size = MAXPATHLEN;
-			if (vn_fullpath_hardlink(&nd, &imgp->execpath,
-			    &imgp->freepath, &freepath_size) != 0)
-				imgp->execpath = args->fname;
-			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
-		}
-	} else {
-		VOP_UNLOCK(imgp->vp);
-		if (vn_fullpath(imgp->vp, &imgp->execpath,
-		    &imgp->freepath) != 0)
-			imgp->execpath = args->fname;
-		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
-	}
-
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
@@ -699,10 +702,15 @@ interpret:
 			imgp->opened = false;
 		}
 		vput(newtextvp);
+		imgp->vp = newtextvp = NULL;
 		if (args->fname != NULL) {
-			if (nd.ni_dvp != NULL)
-				vrele(nd.ni_dvp);
+			if (newtextdvp != NULL) {
+				vrele(newtextdvp);
+				newtextdvp = NULL;
+			}
 			NDFREE(&nd, NDF_ONLY_PNBUF);
+			free(newbinname, M_PARGS);
+			newbinname = NULL;
 		}
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
@@ -712,9 +720,6 @@ interpret:
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
 		args->fname = imgp->interpreter_name;
-		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
-		    SAVENAME | WANTPARENT,
-		    UIO_SYSSPACE, imgp->interpreter_name, td);
 		goto interpret;
 	}
 
@@ -875,11 +880,17 @@ interpret:
 	}
 
 	/*
-	 * Store the vp for use in procfs.  This vnode was referenced by namei
-	 * or fgetvp_exec.
+	 * Store the vp for use in kern.proc.pathname.  This vnode was
+	 * referenced by namei() or fgetvp_exec().
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
+	oldtextdvp = p->p_textdvp;
+	p->p_textdvp = newtextdvp;
+	newtextdvp = NULL;
+	oldbinname = p->p_binname;
+	p->p_binname = newbinname;
+	newbinname = NULL;
 
 #ifdef KDTRACE_HOOKS
 	/*
@@ -953,11 +964,11 @@ exec_fail_dealloc:
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp);
-		if (args->fname != NULL) {
-			if (nd.ni_dvp != NULL)
-				vrele(nd.ni_dvp);
+		if (args->fname != NULL)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
-		}
+		if (newtextdvp != NULL)
+			vrele(newtextdvp);
+		free(newbinname, M_PARGS);
 	}
 
 	if (imgp->object != NULL)
@@ -996,6 +1007,9 @@ exec_fail:
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
+	if (oldtextdvp != NULL)
+		vrele(oldtextdvp);
+	free(oldbinname, M_PARGS);
 #ifdef KTRACE
 	ktr_io_params_free(kiop);
 #endif
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 79f1d5bd63e5..44203435fa68 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -424,12 +424,20 @@ exit1(struct thread *td, int rval, int signo)
 	ktrprocexit(td);
 #endif
 	/*
-	 * Release reference to text vnode
+	 * Release reference to text vnode etc
 	 */
 	if (p->p_textvp != NULL) {
 		vrele(p->p_textvp);
 		p->p_textvp = NULL;
 	}
+	if (p->p_textdvp != NULL) {
+		vrele(p->p_textdvp);
+		p->p_textdvp = NULL;
+	}
+	if (p->p_binname != NULL) {
+		free(p->p_binname, M_PARGS);
+		p->p_binname = NULL;
+	}
 
 	/*
 	 * Release our limits structure.
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index e8c34a31e6a1..5b518a0183d8 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -528,6 +528,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	}
 
 	p2->p_textvp = p1->p_textvp;
+	p2->p_textdvp = p1->p_textdvp;
 	p2->p_fd = fd;
 	p2->p_fdtol = fdtol;
 	p2->p_pd = pd;
@@ -549,9 +550,16 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	PROC_UNLOCK(p1);
 	PROC_UNLOCK(p2);
 
-	/* Bump references to the text vnode (for procfs). */
-	if (p2->p_textvp)
+	/*
+	 * Bump references to the text vnode and directory, and copy
+	 * the hardlink name.
+	 */
+	if (p2->p_textvp != NULL)
 		vrefact(p2->p_textvp);
+	if (p2->p_textdvp != NULL)
+		vrefact(p2->p_textdvp);
+	p2->p_binname = p1->p_binname == NULL ? NULL :
+	    strdup(p1->p_binname, M_PARGS);
 
 	/*
 	 * Set up linkage for kernel based threading.
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 65c5cc65c87e..71e3c501dddf 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -97,11 +97,11 @@ _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
     "struct proc KBI p_flag");
 _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
     "struct proc KBI p_pid");
-_Static_assert(offsetof(struct proc, p_filemon) == 0x3b8,
+_Static_assert(offsetof(struct proc, p_filemon) == 0x3c8,
     "struct proc KBI p_filemon");
-_Static_assert(offsetof(struct proc, p_comm) == 0x3d0,
+_Static_assert(offsetof(struct proc, p_comm) == 0x3e0,
     "struct proc KBI p_comm");
-_Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8,
+_Static_assert(offsetof(struct proc, p_emuldata) == 0x4c8,
     "struct proc KBI p_emuldata");
 #endif
 #ifdef __i386__
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e83f98646451..177efd5257af 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -666,6 +666,8 @@ struct proc {
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct ktr_io_params	*p_ktrioparms;	/* (c + o) Params for ktrace. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
+	struct vnode	*p_textdvp;	/* (b) Dir containing textvp. */
+	char		*p_binname;	/* (b) Binary hardlink name. */
 	u_int		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */