git: 5eeb4f737f11 - main - imgact_binmisc: Optionally pre-open the interpreter vnode

From: Doug Rabson <dfr_at_FreeBSD.org>
Date: Thu, 08 Dec 2022 14:32:35 UTC
The branch main has been updated by dfr:

URL: https://cgit.FreeBSD.org/src/commit/?id=5eeb4f737f11b253ac330ae459b05e30fd16d0e8

commit 5eeb4f737f11b253ac330ae459b05e30fd16d0e8
Author:     Doug Rabson <dfr@FreeBSD.org>
AuthorDate: 2022-11-17 10:48:20 +0000
Commit:     Doug Rabson <dfr@FreeBSD.org>
CommitDate: 2022-12-08 14:32:03 +0000

    imgact_binmisc: Optionally pre-open the interpreter vnode
    
    This allows the use of chroot and/or jail environments which depend on
    interpreters registed with imgact_binmisc to use emulator binaries from
    the host to emulate programs inside the chroot.
    
    Reviewed by:    imp
    MFC after:      2 weeks
    Differential Revision: https://reviews.freebsd.org/D37432
---
 sys/kern/imgact_binmisc.c        | 49 ++++++++++++++++++++++++++++++++++++----
 sys/kern/kern_exec.c             | 18 ++++++++++++++-
 sys/sys/imgact.h                 |  1 +
 sys/sys/imgact_binmisc.h         |  3 ++-
 usr.sbin/binmiscctl/binmiscctl.8 |  8 +++++++
 usr.sbin/binmiscctl/binmiscctl.c | 15 ++++++++----
 6 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/sys/kern/imgact_binmisc.c b/sys/kern/imgact_binmisc.c
index 951822df06b1..65b2e8e409a6 100644
--- a/sys/kern/imgact_binmisc.c
+++ b/sys/kern/imgact_binmisc.c
@@ -30,15 +30,18 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/ctype.h>
 #include <sys/exec.h>
+#include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_binmisc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/namei.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/vnode.h>
 
 #include <machine/atomic.h>
 
@@ -63,6 +66,7 @@ typedef struct imgact_binmisc_entry {
 	uint8_t				 *ibe_magic;
 	uint8_t				 *ibe_mask;
 	uint8_t				 *ibe_interpreter;
+	struct vnode			 *ibe_interpreter_vnode;
 	ssize_t				  ibe_interp_offset;
 	uint32_t			  ibe_interp_argcnt;
 	uint32_t			  ibe_interp_length;
@@ -114,7 +118,7 @@ static struct sx interp_list_sx;
  * Populate the entry with the information about the interpreter.
  */
 static void
-imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe)
+imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe, int flags)
 {
 	uint32_t len = 0, argc = 1;
 	char t[IBE_INTERP_LEN_MAX];
@@ -150,6 +154,30 @@ imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe)
 	memcpy(ibe->ibe_interpreter, t, len);
 	ibe->ibe_interp_argcnt = argc;
 	ibe->ibe_interp_length = len;
+
+	ibe->ibe_interpreter_vnode = NULL;
+	if (flags & IBF_PRE_OPEN) {
+		struct nameidata nd;
+		int error;
+
+		tp = t;
+		while (*tp != '\0' && *tp != ' ') {
+			tp++;
+		}
+		*tp = '\0';
+		NDINIT(&nd, LOOKUP, FOLLOW | ISOPEN, UIO_SYSSPACE, t);
+
+		/*
+		 * If there is an error, just stop now and fall back
+		 * to the non pre-open case where we lookup during
+		 * exec.
+		 */
+		error = namei(&nd);
+		if (error)
+			return;
+
+		ibe->ibe_interpreter_vnode = nd.ni_vp;
+	}
 }
 
 /*
@@ -167,7 +195,7 @@ imgact_binmisc_new_entry(ximgact_binmisc_entry_t *xbe, ssize_t interp_offset,
 	ibe->ibe_name = malloc(namesz, M_BINMISC, M_WAITOK|M_ZERO);
 	strlcpy(ibe->ibe_name, xbe->xbe_name, namesz);
 
-	imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe);
+	imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe, xbe->xbe_flags);
 
 	ibe->ibe_magic = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
 	memcpy(ibe->ibe_magic, xbe->xbe_magic, xbe->xbe_msize);
@@ -199,6 +227,8 @@ imgact_binmisc_destroy_entry(imgact_binmisc_entry_t *ibe)
 		free(ibe->ibe_interpreter, M_BINMISC);
 	if (ibe->ibe_name)
 		free(ibe->ibe_name, M_BINMISC);
+	if (ibe->ibe_interpreter_vnode)
+		vrele(ibe->ibe_interpreter_vnode);
 	if (ibe)
 		free(ibe, M_BINMISC);
 }
@@ -271,15 +301,20 @@ imgact_binmisc_add_entry(ximgact_binmisc_entry_t *xbe)
 		}
 	}
 
+	/*
+	 * Preallocate a new entry. We do this without holding the
+	 * lock to avoid lock-order problems if IBF_PRE_OPEN is
+	 * set.
+	 */
+	ibe = imgact_binmisc_new_entry(xbe, interp_offset, argv0_cnt);
+
 	INTERP_LIST_WLOCK();
 	if (imgact_binmisc_find_entry(xbe->xbe_name) != NULL) {
 		INTERP_LIST_WUNLOCK();
+		imgact_binmisc_destroy_entry(ibe);
 		return (EEXIST);
 	}
 
-	/* Preallocate a new entry. */
-	ibe = imgact_binmisc_new_entry(xbe, interp_offset, argv0_cnt);
-
 	SLIST_INSERT_HEAD(&interpreter_list, ibe, link);
 	interp_list_entry_count++;
 	INTERP_LIST_WUNLOCK();
@@ -698,6 +733,10 @@ imgact_binmisc_exec(struct image_params *imgp)
 	/* Catch ibe->ibe_argv0_cnt counting more #a than we did. */
 	MPASS(ibe->ibe_argv0_cnt == argv0_cnt);
 	imgp->interpreter_name = imgp->args->begin_argv;
+	if (ibe->ibe_interpreter_vnode) {
+		imgp->interpreter_vp = ibe->ibe_interpreter_vnode;
+		vref(imgp->interpreter_vp);
+	}
 
 done:
 	INTERP_LIST_RUNLOCK();
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index cb45d18fbb85..68dab42c25cc 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -504,6 +504,18 @@ interpret:
 				imgp->execpath = args->fname;
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
+	} else if (imgp->interpreter_vp) {
+		/*
+		 * An image activator has already provided an open vnode
+		 */
+		newtextvp = imgp->interpreter_vp;
+		imgp->interpreter_vp = NULL;
+		if (vn_fullpath(newtextvp, &imgp->execpath,
+		    &imgp->freepath) != 0)
+			imgp->execpath = args->fname;
+		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
+		AUDIT_ARG_VNODE1(newtextvp);
+		imgp->vp = newtextvp;
 	} else {
 		AUDIT_ARG_FD(args->fd);
 
@@ -702,7 +714,11 @@ interpret:
 		free(imgp->freepath, M_TEMP);
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
-		args->fname = imgp->interpreter_name;
+		if (imgp->interpreter_vp) {
+			args->fname = NULL;
+		} else {
+			args->fname = imgp->interpreter_name;
+		}
 		goto interpret;
 	}
 
diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
index 0be3e71604bf..963f53aa387b 100644
--- a/sys/sys/imgact.h
+++ b/sys/sys/imgact.h
@@ -94,6 +94,7 @@ struct image_params {
 	u_int map_flags;
 #define IMGP_ASLR_SHARED_PAGE	0x1
 	uint32_t imgp_flags;
+	struct vnode *interpreter_vp;	/* vnode of the interpreter */
 };
 
 #ifdef _KERNEL
diff --git a/sys/sys/imgact_binmisc.h b/sys/sys/imgact_binmisc.h
index 74ff68622075..37ae9dcdd774 100644
--- a/sys/sys/imgact_binmisc.h
+++ b/sys/sys/imgact_binmisc.h
@@ -57,8 +57,9 @@ _Static_assert(IBE_MAGIC_MAX <= IBE_MATCH_MAX,
  */
 #define	IBF_ENABLED	0x0001	/* Entry is active. */
 #define	IBF_USE_MASK	0x0002	/* Use mask on header magic field. */
+#define	IBF_PRE_OPEN	0x0004	/* Cache the vnode for interpreter */
 
-#define	IBF_VALID_UFLAGS	0x0003	/* Bits allowed from userland. */
+#define	IBF_VALID_UFLAGS	0x0007	/* Bits allowed from userland. */
 
 /*
  * Used with sysctlbyname() to pass imgact bin misc entries in and out of the
diff --git a/usr.sbin/binmiscctl/binmiscctl.8 b/usr.sbin/binmiscctl/binmiscctl.8
index 82624cccd9ed..198997d22058 100644
--- a/usr.sbin/binmiscctl/binmiscctl.8
+++ b/usr.sbin/binmiscctl/binmiscctl.8
@@ -46,6 +46,7 @@
 .Op Fl -mask Ar mask
 .Op Fl -offset Ar offset
 .Op Fl -set-enabled
+.Op Fl -pre-open
 .Nm
 .Cm disable
 .Ar name
@@ -87,6 +88,7 @@ Operation must be one of the following:
 .Op Fl -mask Ar mask
 .Op Fl -offset Ar offset
 .Op Fl -set-enabled
+.Op Fl -pre-open
 .Xc
 Add a new activator entry in the kernel.
 You must specify a
@@ -124,6 +126,12 @@ To enable the activator entry the
 option is used.
 The activator default state is disabled.
 .Pp
+To make the interpreter automatically available in jails and chroots,
+use the
+.Fl -pre-open
+option to allow the kernel to open the binary at configuration time
+rather then lazily when the the interpreted program is started.
+.Pp
 The interpreter
 .Ar path
 may also contain arguments for the interpreter including
diff --git a/usr.sbin/binmiscctl/binmiscctl.c b/usr.sbin/binmiscctl/binmiscctl.c
index 3c5e19def67b..2ec2cc3f64d4 100644
--- a/usr.sbin/binmiscctl/binmiscctl.c
+++ b/usr.sbin/binmiscctl/binmiscctl.c
@@ -75,7 +75,8 @@ static const struct {
 		"<name> --interpreter <path_and_arguments> \\\n"
 		"\t\t--magic <magic_bytes> [--mask <mask_bytes>] \\\n"
 		"\t\t--size <magic_size> [--offset <magic_offset>] \\\n"
-		"\t\t[--set-enabled]"
+		"\t\t[--set-enabled] \\\n"
+		"\t\t[--pre-open]"
 	},
 	{
 		CMD_REMOVE,
@@ -122,6 +123,7 @@ add_opts[] = {
 	{ "magic",		required_argument,	NULL,	'm' },
 	{ "offset",		required_argument,	NULL,	'o' },
 	{ "size",		required_argument,	NULL,	's' },
+	{ "pre-open",		no_argument,		NULL,	'p' },
 	{ NULL,			0,			NULL,	0   }
 };
 
@@ -192,8 +194,9 @@ printxbe(ximgact_binmisc_entry_t *xbe)
 
 	printf("name: %s\n", xbe->xbe_name);
 	printf("interpreter: %s\n", xbe->xbe_interpreter);
-	printf("flags: %s%s\n", (flags & IBF_ENABLED) ? "ENABLED " : "",
-	    (flags & IBF_USE_MASK) ? "USE_MASK " : "");
+	printf("flags: %s%s%s\n", (flags & IBF_ENABLED) ? "ENABLED " : "",
+	    (flags & IBF_USE_MASK) ? "USE_MASK " : "",
+	    (flags & IBF_PRE_OPEN) ? "PRE_OPEN " : "");
 	printf("magic size: %u\n", xbe->xbe_msize);
 	printf("magic offset: %u\n", xbe->xbe_moffset);
 
@@ -291,7 +294,7 @@ add_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe)
 		    IBE_NAME_MAX);
 	strlcpy(&xbe->xbe_name[0], argv[0], IBE_NAME_MAX);
 
-	while ((ch = getopt_long(argc, argv, "ei:m:M:o:s:", add_opts, NULL))
+	while ((ch = getopt_long(argc, argv, "epi:m:M:o:s:", add_opts, NULL))
 	    != -1) {
 
 		switch(ch) {
@@ -328,6 +331,10 @@ add_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe)
 				    xbe->xbe_msize);
 			break;
 
+		case 'p':
+			xbe->xbe_flags |= IBF_PRE_OPEN;
+			break;
+
 		default:
 			usage("Unknown argument: '%c'", ch);
 		}