Re: git: 5eeb4f737f11 - main - imgact_binmisc: Optionally pre-open the interpreter vnode

From: Mateusz Guzik <mjguzik_at_gmail.com>
Date: Thu, 08 Dec 2022 17:30:18 UTC
On 12/8/22, Doug Rabson <dfr@freebsd.org> wrote:
> The branch main has been updated by dfr:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=5eeb4f737f11b253ac330ae459b05e30fd16d0e8
>
> commit 5eeb4f737f11b253ac330ae459b05e30fd16d0e8
> Author:     Doug Rabson <dfr@FreeBSD.org>
> AuthorDate: 2022-11-17 10:48:20 +0000
> Commit:     Doug Rabson <dfr@FreeBSD.org>
> CommitDate: 2022-12-08 14:32:03 +0000
>
>     imgact_binmisc: Optionally pre-open the interpreter vnode
>
>     This allows the use of chroot and/or jail environments which depend on
>     interpreters registed with imgact_binmisc to use emulator binaries from
>     the host to emulate programs inside the chroot.
>
>     Reviewed by:    imp
>     MFC after:      2 weeks
>     Differential Revision: https://reviews.freebsd.org/D37432
> ---
>  sys/kern/imgact_binmisc.c        | 49
> ++++++++++++++++++++++++++++++++++++----
>  sys/kern/kern_exec.c             | 18 ++++++++++++++-
>  sys/sys/imgact.h                 |  1 +
>  sys/sys/imgact_binmisc.h         |  3 ++-
>  usr.sbin/binmiscctl/binmiscctl.8 |  8 +++++++
>  usr.sbin/binmiscctl/binmiscctl.c | 15 ++++++++----
>  6 files changed, 83 insertions(+), 11 deletions(-)
>
> diff --git a/sys/kern/imgact_binmisc.c b/sys/kern/imgact_binmisc.c
> index 951822df06b1..65b2e8e409a6 100644
> --- a/sys/kern/imgact_binmisc.c
> +++ b/sys/kern/imgact_binmisc.c
> @@ -30,15 +30,18 @@ __FBSDID("$FreeBSD$");
>  #include <sys/param.h>
>  #include <sys/ctype.h>
>  #include <sys/exec.h>
> +#include <sys/fcntl.h>
>  #include <sys/imgact.h>
>  #include <sys/imgact_binmisc.h>
>  #include <sys/kernel.h>
>  #include <sys/lock.h>
>  #include <sys/malloc.h>
>  #include <sys/mutex.h>
> +#include <sys/namei.h>
>  #include <sys/sbuf.h>
>  #include <sys/sysctl.h>
>  #include <sys/sx.h>
> +#include <sys/vnode.h>
>
>  #include <machine/atomic.h>
>
> @@ -63,6 +66,7 @@ typedef struct imgact_binmisc_entry {
>  	uint8_t				 *ibe_magic;
>  	uint8_t				 *ibe_mask;
>  	uint8_t				 *ibe_interpreter;
> +	struct vnode			 *ibe_interpreter_vnode;
>  	ssize_t				  ibe_interp_offset;
>  	uint32_t			  ibe_interp_argcnt;
>  	uint32_t			  ibe_interp_length;
> @@ -114,7 +118,7 @@ static struct sx interp_list_sx;
>   * Populate the entry with the information about the interpreter.
>   */
>  static void
> -imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe)
> +imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe, int
> flags)
>  {
>  	uint32_t len = 0, argc = 1;
>  	char t[IBE_INTERP_LEN_MAX];
> @@ -150,6 +154,30 @@ imgact_binmisc_populate_interp(char *str,
> imgact_binmisc_entry_t *ibe)
>  	memcpy(ibe->ibe_interpreter, t, len);
>  	ibe->ibe_interp_argcnt = argc;
>  	ibe->ibe_interp_length = len;
> +
> +	ibe->ibe_interpreter_vnode = NULL;
> +	if (flags & IBF_PRE_OPEN) {
> +		struct nameidata nd;
> +		int error;
> +
> +		tp = t;
> +		while (*tp != '\0' && *tp != ' ') {
> +			tp++;
> +		}
> +		*tp = '\0';
> +		NDINIT(&nd, LOOKUP, FOLLOW | ISOPEN, UIO_SYSSPACE, t);
> +
> +		/*
> +		 * If there is an error, just stop now and fall back
> +		 * to the non pre-open case where we lookup during
> +		 * exec.
> +		 */
> +		error = namei(&nd);
> +		if (error)
> +			return;

you need to add NDFREE_PNBUF(&nd)

> +
> +		ibe->ibe_interpreter_vnode = nd.ni_vp;
> +	}
>  }
>
>  /*
> @@ -167,7 +195,7 @@ imgact_binmisc_new_entry(ximgact_binmisc_entry_t *xbe,
> ssize_t interp_offset,
>  	ibe->ibe_name = malloc(namesz, M_BINMISC, M_WAITOK|M_ZERO);
>  	strlcpy(ibe->ibe_name, xbe->xbe_name, namesz);
>
> -	imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe);
> +	imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe,
> xbe->xbe_flags);
>
>  	ibe->ibe_magic = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
>  	memcpy(ibe->ibe_magic, xbe->xbe_magic, xbe->xbe_msize);
> @@ -199,6 +227,8 @@ imgact_binmisc_destroy_entry(imgact_binmisc_entry_t
> *ibe)
>  		free(ibe->ibe_interpreter, M_BINMISC);
>  	if (ibe->ibe_name)
>  		free(ibe->ibe_name, M_BINMISC);
> +	if (ibe->ibe_interpreter_vnode)
> +		vrele(ibe->ibe_interpreter_vnode);
>  	if (ibe)
>  		free(ibe, M_BINMISC);
>  }
> @@ -271,15 +301,20 @@ imgact_binmisc_add_entry(ximgact_binmisc_entry_t
> *xbe)
>  		}
>  	}
>
> +	/*
> +	 * Preallocate a new entry. We do this without holding the
> +	 * lock to avoid lock-order problems if IBF_PRE_OPEN is
> +	 * set.
> +	 */
> +	ibe = imgact_binmisc_new_entry(xbe, interp_offset, argv0_cnt);
> +
>  	INTERP_LIST_WLOCK();
>  	if (imgact_binmisc_find_entry(xbe->xbe_name) != NULL) {
>  		INTERP_LIST_WUNLOCK();
> +		imgact_binmisc_destroy_entry(ibe);
>  		return (EEXIST);
>  	}
>
> -	/* Preallocate a new entry. */
> -	ibe = imgact_binmisc_new_entry(xbe, interp_offset, argv0_cnt);
> -
>  	SLIST_INSERT_HEAD(&interpreter_list, ibe, link);
>  	interp_list_entry_count++;
>  	INTERP_LIST_WUNLOCK();
> @@ -698,6 +733,10 @@ imgact_binmisc_exec(struct image_params *imgp)
>  	/* Catch ibe->ibe_argv0_cnt counting more #a than we did. */
>  	MPASS(ibe->ibe_argv0_cnt == argv0_cnt);
>  	imgp->interpreter_name = imgp->args->begin_argv;
> +	if (ibe->ibe_interpreter_vnode) {
> +		imgp->interpreter_vp = ibe->ibe_interpreter_vnode;
> +		vref(imgp->interpreter_vp);
> +	}
>
>  done:
>  	INTERP_LIST_RUNLOCK();
> diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
> index cb45d18fbb85..68dab42c25cc 100644
> --- a/sys/kern/kern_exec.c
> +++ b/sys/kern/kern_exec.c
> @@ -504,6 +504,18 @@ interpret:
>  				imgp->execpath = args->fname;
>  			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
>  		}
> +	} else if (imgp->interpreter_vp) {
> +		/*
> +		 * An image activator has already provided an open vnode
> +		 */
> +		newtextvp = imgp->interpreter_vp;
> +		imgp->interpreter_vp = NULL;
> +		if (vn_fullpath(newtextvp, &imgp->execpath,
> +		    &imgp->freepath) != 0)
> +			imgp->execpath = args->fname;
> +		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
> +		AUDIT_ARG_VNODE1(newtextvp);
> +		imgp->vp = newtextvp;
>  	} else {
>  		AUDIT_ARG_FD(args->fd);
>
> @@ -702,7 +714,11 @@ interpret:
>  		free(imgp->freepath, M_TEMP);
>  		imgp->freepath = NULL;
>  		/* set new name to that of the interpreter */
> -		args->fname = imgp->interpreter_name;
> +		if (imgp->interpreter_vp) {
> +			args->fname = NULL;
> +		} else {
> +			args->fname = imgp->interpreter_name;
> +		}
>  		goto interpret;
>  	}
>
> diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
> index 0be3e71604bf..963f53aa387b 100644
> --- a/sys/sys/imgact.h
> +++ b/sys/sys/imgact.h
> @@ -94,6 +94,7 @@ struct image_params {
>  	u_int map_flags;
>  #define IMGP_ASLR_SHARED_PAGE	0x1
>  	uint32_t imgp_flags;
> +	struct vnode *interpreter_vp;	/* vnode of the interpreter */
>  };
>
>  #ifdef _KERNEL
> diff --git a/sys/sys/imgact_binmisc.h b/sys/sys/imgact_binmisc.h
> index 74ff68622075..37ae9dcdd774 100644
> --- a/sys/sys/imgact_binmisc.h
> +++ b/sys/sys/imgact_binmisc.h
> @@ -57,8 +57,9 @@ _Static_assert(IBE_MAGIC_MAX <= IBE_MATCH_MAX,
>   */
>  #define	IBF_ENABLED	0x0001	/* Entry is active. */
>  #define	IBF_USE_MASK	0x0002	/* Use mask on header magic field. */
> +#define	IBF_PRE_OPEN	0x0004	/* Cache the vnode for interpreter */
>
> -#define	IBF_VALID_UFLAGS	0x0003	/* Bits allowed from userland. */
> +#define	IBF_VALID_UFLAGS	0x0007	/* Bits allowed from userland. */
>
>  /*
>   * Used with sysctlbyname() to pass imgact bin misc entries in and out of
> the
> diff --git a/usr.sbin/binmiscctl/binmiscctl.8
> b/usr.sbin/binmiscctl/binmiscctl.8
> index 82624cccd9ed..198997d22058 100644
> --- a/usr.sbin/binmiscctl/binmiscctl.8
> +++ b/usr.sbin/binmiscctl/binmiscctl.8
> @@ -46,6 +46,7 @@
>  .Op Fl -mask Ar mask
>  .Op Fl -offset Ar offset
>  .Op Fl -set-enabled
> +.Op Fl -pre-open
>  .Nm
>  .Cm disable
>  .Ar name
> @@ -87,6 +88,7 @@ Operation must be one of the following:
>  .Op Fl -mask Ar mask
>  .Op Fl -offset Ar offset
>  .Op Fl -set-enabled
> +.Op Fl -pre-open
>  .Xc
>  Add a new activator entry in the kernel.
>  You must specify a
> @@ -124,6 +126,12 @@ To enable the activator entry the
>  option is used.
>  The activator default state is disabled.
>  .Pp
> +To make the interpreter automatically available in jails and chroots,
> +use the
> +.Fl -pre-open
> +option to allow the kernel to open the binary at configuration time
> +rather then lazily when the the interpreted program is started.
> +.Pp
>  The interpreter
>  .Ar path
>  may also contain arguments for the interpreter including
> diff --git a/usr.sbin/binmiscctl/binmiscctl.c
> b/usr.sbin/binmiscctl/binmiscctl.c
> index 3c5e19def67b..2ec2cc3f64d4 100644
> --- a/usr.sbin/binmiscctl/binmiscctl.c
> +++ b/usr.sbin/binmiscctl/binmiscctl.c
> @@ -75,7 +75,8 @@ static const struct {
>  		"<name> --interpreter <path_and_arguments> \\\n"
>  		"\t\t--magic <magic_bytes> [--mask <mask_bytes>] \\\n"
>  		"\t\t--size <magic_size> [--offset <magic_offset>] \\\n"
> -		"\t\t[--set-enabled]"
> +		"\t\t[--set-enabled] \\\n"
> +		"\t\t[--pre-open]"
>  	},
>  	{
>  		CMD_REMOVE,
> @@ -122,6 +123,7 @@ add_opts[] = {
>  	{ "magic",		required_argument,	NULL,	'm' },
>  	{ "offset",		required_argument,	NULL,	'o' },
>  	{ "size",		required_argument,	NULL,	's' },
> +	{ "pre-open",		no_argument,		NULL,	'p' },
>  	{ NULL,			0,			NULL,	0   }
>  };
>
> @@ -192,8 +194,9 @@ printxbe(ximgact_binmisc_entry_t *xbe)
>
>  	printf("name: %s\n", xbe->xbe_name);
>  	printf("interpreter: %s\n", xbe->xbe_interpreter);
> -	printf("flags: %s%s\n", (flags & IBF_ENABLED) ? "ENABLED " : "",
> -	    (flags & IBF_USE_MASK) ? "USE_MASK " : "");
> +	printf("flags: %s%s%s\n", (flags & IBF_ENABLED) ? "ENABLED " : "",
> +	    (flags & IBF_USE_MASK) ? "USE_MASK " : "",
> +	    (flags & IBF_PRE_OPEN) ? "PRE_OPEN " : "");
>  	printf("magic size: %u\n", xbe->xbe_msize);
>  	printf("magic offset: %u\n", xbe->xbe_moffset);
>
> @@ -291,7 +294,7 @@ add_cmd(__unused int argc, char *argv[],
> ximgact_binmisc_entry_t *xbe)
>  		    IBE_NAME_MAX);
>  	strlcpy(&xbe->xbe_name[0], argv[0], IBE_NAME_MAX);
>
> -	while ((ch = getopt_long(argc, argv, "ei:m:M:o:s:", add_opts, NULL))
> +	while ((ch = getopt_long(argc, argv, "epi:m:M:o:s:", add_opts, NULL))
>  	    != -1) {
>
>  		switch(ch) {
> @@ -328,6 +331,10 @@ add_cmd(__unused int argc, char *argv[],
> ximgact_binmisc_entry_t *xbe)
>  				    xbe->xbe_msize);
>  			break;
>
> +		case 'p':
> +			xbe->xbe_flags |= IBF_PRE_OPEN;
> +			break;
> +
>  		default:
>  			usage("Unknown argument: '%c'", ch);
>  		}
>


-- 
Mateusz Guzik <mjguzik gmail.com>