Re: git: 851dc7f859c2 - main - jail: add jail descriptors
- In reply to: Jamie Gritton : "git: 851dc7f859c2 - main - jail: add jail descriptors"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 05 Sep 2025 13:33:22 UTC
On Thu, 4 Sep 2025, Jamie Gritton wrote:
> The branch main has been updated by jamie:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=851dc7f859c23cab09a348bca03ab655534fb7e0
>
> commit 851dc7f859c23cab09a348bca03ab655534fb7e0
> Author: Jamie Gritton <jamie@FreeBSD.org>
> AuthorDate: 2025-09-04 20:27:47 +0000
> Commit: Jamie Gritton <jamie@FreeBSD.org>
> CommitDate: 2025-09-04 20:27:47 +0000
>
> jail: add jail descriptors
>
> Similar to process descriptors, jail desriptors are allow jail
> administration using the file descriptor interface instead of JIDs.
> They come from and can be used by jail_set(2) and jail_get(2),
> and there are two new system calls, jail_attach_jd(2) and
> jail_remove_jd(2).
>
> Reviewed by: bz, brooks
Just for the records, I looked at this more than a year ago but I have
not done any (final) review.
Still happy that this landed.
> Relnotes: yes
> Differential Revision: https://reviews.freebsd.org/D43696
> ---
> lib/libjail/jail.c | 64 +++-
> lib/libsys/Symbol.sys.map | 2 +
> lib/libsys/_libsys.h | 4 +
> lib/libsys/jail.2 | 267 ++++++++++++++++-
> lib/libsys/syscalls.map | 4 +
> sys/compat/freebsd32/freebsd32_syscall.h | 4 +-
> sys/compat/freebsd32/freebsd32_syscalls.c | 2 +
> sys/compat/freebsd32/freebsd32_sysent.c | 2 +
> sys/compat/freebsd32/freebsd32_systrace_args.c | 44 +++
> sys/conf/files | 1 +
> sys/kern/init_sysent.c | 2 +
> sys/kern/kern_descrip.c | 2 +
> sys/kern/kern_jail.c | 396 +++++++++++++++++++++++--
> sys/kern/kern_jaildesc.c | 337 +++++++++++++++++++++
> sys/kern/syscalls.c | 2 +
> sys/kern/syscalls.master | 10 +
> sys/kern/systrace_args.c | 44 +++
> sys/sys/file.h | 1 +
> sys/sys/jail.h | 15 +-
> sys/sys/jaildesc.h | 85 ++++++
> sys/sys/syscall.h | 4 +-
> sys/sys/syscall.mk | 4 +-
> sys/sys/sysproto.h | 10 +
> sys/sys/user.h | 4 +
> 24 files changed, 1256 insertions(+), 54 deletions(-)
>
> diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c
> index 30282e67866c..931391055919 100644
> --- a/lib/libjail/jail.c
> +++ b/lib/libjail/jail.c
> @@ -75,8 +75,9 @@ int
> jail_setv(int flags, ...)
> {
> va_list ap, tap;
> - struct jailparam *jp;
> - const char *name, *value;
> + struct jailparam *jp, *jp_desc;
> + const char *name;
> + char *value, *desc_value;
> int njp, jid;
>
> /* Create the parameter list and import the parameters. */
> @@ -86,15 +87,24 @@ jail_setv(int flags, ...)
> (void)va_arg(tap, char *);
> va_end(tap);
> jp = alloca(njp * sizeof(struct jailparam));
> - for (njp = 0; (name = va_arg(ap, char *)) != NULL;) {
> + jp_desc = NULL;
> + desc_value = NULL;
> + for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) {
> value = va_arg(ap, char *);
> if (jailparam_init(jp + njp, name) < 0)
> goto error;
> - if (jailparam_import(jp + njp++, value) < 0)
> + if (jailparam_import(jp + njp, value) < 0)
> goto error;
> + if (!strcmp(name, "desc")
> + && (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) {
> + jp_desc = jp + njp;
> + desc_value = value;
> + }
> }
> va_end(ap);
> jid = jailparam_set(jp, njp, flags);
> + if (jid > 0 && jp_desc != NULL)
> + sprintf(desc_value, "%d", *(int *)jp_desc->jp_value);
> jailparam_free(jp, njp);
> return (jid);
>
> @@ -112,9 +122,10 @@ int
> jail_getv(int flags, ...)
> {
> va_list ap, tap;
> - struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
> + struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
> char *valarg, *value;
> - const char *name, *key_value, *lastjid_value, *jid_value, *name_value;
> + const char *name, *key_value, *desc_value, *lastjid_value, *jid_value;
> + const char *name_value;
> int njp, i, jid;
>
> /* Create the parameter list and find the key. */
> @@ -126,15 +137,19 @@ jail_getv(int flags, ...)
>
> jp = alloca(njp * sizeof(struct jailparam));
> va_copy(tap, ap);
> - jp_lastjid = jp_jid = jp_name = NULL;
> - lastjid_value = jid_value = name_value = NULL;
> + jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
> + desc_value = lastjid_value = jid_value = name_value = NULL;
> for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) {
> value = va_arg(tap, char *);
> if (jailparam_init(jp + njp, name) < 0) {
> va_end(tap);
> goto error;
> }
> - if (!strcmp(jp[njp].jp_name, "lastjid")) {
> + if (!strcmp(jp[njp].jp_name, "desc")
> + && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) {
> + jp_desc = jp + njp;
> + desc_value = value;
> + } else if (!strcmp(jp[njp].jp_name, "lastjid")) {
> jp_lastjid = jp + njp;
> lastjid_value = value;
> } else if (!strcmp(jp[njp].jp_name, "jid")) {
> @@ -147,7 +162,10 @@ jail_getv(int flags, ...)
> }
> va_end(tap);
> /* Import the key parameter. */
> - if (jp_lastjid != NULL) {
> + if (jp_desc != NULL && (flags & JAIL_USE_DESC)) {
> + jp_key = jp_desc;
> + key_value = desc_value;
> + } else if (jp_lastjid != NULL) {
> jp_key = jp_lastjid;
> key_value = lastjid_value;
> } else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) {
> @@ -163,6 +181,9 @@ jail_getv(int flags, ...)
> }
> if (jailparam_import(jp_key, key_value) < 0)
> goto error;
> + if (jp_desc != NULL && jp_desc != jp_key
> + && jailparam_import(jp_desc, desc_value) < 0)
> + goto error;
> /* Get the jail and export the parameters. */
> jid = jailparam_get(jp, njp, flags);
> if (jid < 0)
> @@ -571,7 +592,7 @@ int
> jailparam_get(struct jailparam *jp, unsigned njp, int flags)
> {
> struct iovec *jiov;
> - struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key;
> + struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
> int i, ai, ki, jid, arrays, sanity;
> unsigned j;
>
> @@ -580,10 +601,13 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
> * Find the key and any array parameters.
> */
> jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1));
> - jp_lastjid = jp_jid = jp_name = NULL;
> + jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
> arrays = 0;
> for (ai = j = 0; j < njp; j++) {
> - if (!strcmp(jp[j].jp_name, "lastjid"))
> + if (!strcmp(jp[j].jp_name, "desc")
> + && (flags & (JAIL_USE_DESC | JAIL_AT_DESC)))
> + jp_desc = jp + j;
> + else if (!strcmp(jp[j].jp_name, "lastjid"))
> jp_lastjid = jp + j;
> else if (!strcmp(jp[j].jp_name, "jid"))
> jp_jid = jp + j;
> @@ -599,7 +623,9 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
> ai++;
> }
> }
> - jp_key = jp_lastjid ? jp_lastjid :
> + jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) &&
> + jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc :
> + jp_lastjid ? jp_lastjid :
> jp_jid && jp_jid->jp_valuelen == sizeof(int) &&
> jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name;
> if (jp_key == NULL || jp_key->jp_value == NULL) {
> @@ -622,6 +648,14 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
> jiov[ki].iov_len = JAIL_ERRMSGLEN;
> ki++;
> jail_errmsg[0] = 0;
> + if (jp_desc != NULL && jp_desc != jp_key) {
> + jiov[ki].iov_base = jp_desc->jp_name;
> + jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1;
> + ki++;
> + jiov[ki].iov_base = jp_desc->jp_value;
> + jiov[ki].iov_len = jp_desc->jp_valuelen;
> + ki++;
> + }
> if (arrays && jail_get(jiov, ki, flags) < 0) {
> if (!jail_errmsg[0])
> snprintf(jail_errmsg, sizeof(jail_errmsg),
> @@ -649,7 +683,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
> jiov[ai].iov_base = jp[j].jp_value;
> memset(jiov[ai].iov_base, 0, jiov[ai].iov_len);
> ai++;
> - } else if (jp + j != jp_key) {
> + } else if (jp + j != jp_key && jp + j != jp_desc) {
> jiov[i].iov_base = jp[j].jp_name;
> jiov[i].iov_len = strlen(jp[j].jp_name) + 1;
> i++;
> diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
> index 1a297f9df581..e3fd8ac10621 100644
> --- a/lib/libsys/Symbol.sys.map
> +++ b/lib/libsys/Symbol.sys.map
> @@ -382,6 +382,8 @@ FBSD_1.8 {
> getrlimitusage;
> inotify_add_watch_at;
> inotify_rm_watch;
> + jail_attach_jd;
> + jail_remove_jd;
> kcmp;
> setcred;
> setgroups;
> diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
> index 34eebc1aa67a..6bd768708a78 100644
> --- a/lib/libsys/_libsys.h
> +++ b/lib/libsys/_libsys.h
> @@ -468,6 +468,8 @@ typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t);
> typedef int (__sys_inotify_rm_watch_t)(int, int);
> typedef int (__sys_getgroups_t)(int, gid_t *);
> typedef int (__sys_setgroups_t)(int, const gid_t *);
> +typedef int (__sys_jail_attach_jd_t)(int);
> +typedef int (__sys_jail_remove_jd_t)(int);
>
> _Noreturn void __sys__exit(int rval);
> int __sys_fork(void);
> @@ -872,6 +874,8 @@ int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask
> int __sys_inotify_rm_watch(int fd, int wd);
> int __sys_getgroups(int gidsetsize, gid_t * gidset);
> int __sys_setgroups(int gidsetsize, const gid_t * gidset);
> +int __sys_jail_attach_jd(int fd);
> +int __sys_jail_remove_jd(int fd);
> __END_DECLS
>
> #endif /* __LIBSYS_H_ */
> diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2
> index 8f8b9925c712..a0f47cc61cb3 100644
> --- a/lib/libsys/jail.2
> +++ b/lib/libsys/jail.2
> @@ -23,7 +23,7 @@
> .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> .\" SUCH DAMAGE.
> .\"
> -.Dd November 29, 2023
> +.Dd September 4, 2025
> .Dt JAIL 2
> .Os
> .Sh NAME
> @@ -31,7 +31,9 @@
> .Nm jail_get ,
> .Nm jail_set ,
> .Nm jail_remove ,
> -.Nm jail_attach
> +.Nm jail_attach ,
> +.Nm jail_remove_jd ,
> +.Nm jail_attach_jd
> .Nd create and manage system jails
> .Sh LIBRARY
> .Lb libc
> @@ -44,6 +46,10 @@
> .Fn jail_attach "int jid"
> .Ft int
> .Fn jail_remove "int jid"
> +.Ft int
> +.Fn jail_attach_jd "int fd"
> +.Ft int
> +.Fn jail_remove_jd "int fd"
> .In sys/uio.h
> .Ft int
> .Fn jail_get "struct iovec *iov" "u_int niov" "int flags"
> @@ -188,6 +194,29 @@ system call.
> This is deprecated in
> .Fn jail_set
> and has no effect.
> +.It Dv JAIL_USE_DESC
> +Identify the jail by a descriptor in the
> +.Va desc
> +parameter.
> +.It Dv JAIL_AT_DESC
> +Operate in the context of the jail described by the
> +.Va desc
> +parameter, instead of the current jail.
> +Only one of
> +.Dv JAIL_USE_DESC
> +or
> +.Dv JAIL_AT_DESC
> +may be specified.
> +.It Dv JAIL_GET_DESC
> +Return a new jail descriptor for the jail in the
> +.Va desc
> +parameter.
> +.It Dv JAIL_OWN_DESC
> +Return an
> +.Dq owning
> +jail descriptor in the
> +.Va desc
> +parameter.
> .El
> .Pp
> The
> @@ -221,6 +250,9 @@ arguments consists of one or more following flags:
> .Bl -tag -width indent
> .It Dv JAIL_DYING
> Allow getting a jail that is in the process of being removed.
> +.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC
> +These have the same meaning as they do in
> +.Fn jail_set .
> .El
> .Pp
> The
> @@ -238,6 +270,101 @@ system call removes the jail identified by
> .Fa jid .
> It will kill all processes belonging to the jail, and remove any children
> of that jail.
> +.Pp
> +The
> +.Fn jail_attach_fd
> +and
> +.Fn jail_remove_fd
> +system calls work the same as
> +.Fn jail_attach
> +and
> +.Fn jail_remove ,
> +except that they operate on the jail identified by jail descriptor
> +.Fa fd .
> +.Ss Jail Descriptors
> +In addition to the jail ID,
> +jails can be referred to using a jail descriptor,
> +a type of file descriptor tied to a particular jail.
> +Jail descriptors are created by calling
> +.Fn jail_set
> +or
> +.Fn jail_get
> +with the special parameter
> +.Va desc ,
> +and either the
> +.Dv JAIL_GET_DESC
> +or
> +.Dv JAIL_OWN_DESC
> +flags set.
> +The difference between the two flags is that descriptors created with
> +.Dv JAIL_OWN_DESC
> +.Po
> +called
> +.Dq owning
> +descriptors
> +.Pc
> +will automatically remove the jail when the descriptor is closed.
> +.Pp
> +Jail descriptors can be passed back to
> +.Fn jail_set
> +or
> +.Fm jail_get
> +with the
> +.Va desc
> +parameter,
> +and either the
> +.Dv JAIL_USE_DESC
> +or
> +.Dv JAIL_AT_DESC
> +flags set.
> +With
> +.Dv JAIL_USE_DESC ,
> +the descriptor identifies the jail to operate on,
> +instead of the
> +.Va jid
> +or
> +.Va name
> +parameter.
> +With
> +.Dv JAIL_AT_DESC ,
> +the descriptor is used in place of the current jail,
> +allowing accessing or creating jails that are children of the
> +descriptor jail.
> +.Pp
> +The system calls
> +.Fn jail_attach_jd
> +and
> +.Fn jail_aremove_jd
> +work the same as
> +.Fn jail_attach
> +and
> +.Fn jail_remove ,
> +except that they operate on the jail referred to by the passed descriptor.
> +.Pp
> +Jail operations via descriptors can be done by processes that do not
> +normally have permission to see or affect the jail,
> +as long as they are allowed by the file permissions of the jail
> +descriptor itself.
> +These permissions can be changed by the descriptor owner via
> +.Xr fchmod 2
> +and
> +.Xr fchown 2 .
> +.Fn jail_get
> +requires read permission,
> +.Fn jail_set
> +and
> +.Fn jail_remove
> +require write permission,
> +and
> +.Fn jail_attach
> +requires execute permission.
> +Also, use of a descriptor with the
> +.Dv JAIL_AT_DESC
> +flag requires execute permission.
> +An owning descriptor is identified by the
> +.Em sticky bit ,
> +which may also be changed via
> +.Xr fchmod 2 .
> .Sh RETURN VALUES
> If successful,
> .Fn jail ,
> @@ -249,7 +376,7 @@ They return \-1 on failure, and set
> .Va errno
> to indicate the error.
> .Pp
> -.Rv -std jail_attach jail_remove
> +.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd
> .Sh ERRORS
> The
> .Fn jail
> @@ -275,12 +402,44 @@ The
> system call
> will fail if:
> .Bl -tag -width Er
> +.It Bq Er EBADF
> +The
> +.Va desc
> +parameter does not refer to a valid jail descriptor,
> +and either the
> +.Dv JAIL_USE_DESC
> +or
> +.Dv JAIL_AT_DESC
> +flag was set.
> +.It Bq Er EACCES
> +Write permission is denied on the jail descriptor in the
> +.Va desc
> +parameter,
> +and the
> +.Dv JAIL_USE_DESC
> +flag was set.
> +.It Bq Er EACCES
> +Execute permission is denied on the jail descriptor in the
> +.Va desc
> +parameter,
> +and either the
> +.Dv JAIL_AT_DESC
> +or
> +.Dv JAIL_ATTACH
> +flag was set.
> .It Bq Er EPERM
> This process is not allowed to create a jail, either because it is not
> the super-user, or because it would exceed the jail's
> .Va children.max
> limit.
> .It Bq Er EPERM
> +The jail descriptor in the
> +.Va desc
> +parameter was created by a user other than the super-user,
> +and the
> +.Dv JAIL_USE_DESC
> +flag was set.
> +.It Bq Er EPERM
> A jail parameter was set to a less restrictive value then the current
> environment.
> .It Bq Er EFAULT
> @@ -298,8 +457,12 @@ flag is not set.
> .It Bq Er ENOENT
> The jail referred to by a
> .Va jid
> -is not accessible by the process, because the process is in a different
> -jail.
> +parameter is not accessible by the process, because the process is in a
> +different jail.
> +.It Bq Er ENOENT
> +The jail referred to by a
> +.Va desc
> +parameter has been removed.
> .It Bq Er EEXIST
> The jail referred to by a
> .Va jid
> @@ -326,6 +489,24 @@ flags is not set.
> A supplied string parameter is longer than allowed.
> .It Bq Er EAGAIN
> There are no jail IDs left.
> +.It Bq Er EMFILE
> +A jail descriptor could not be created for the
> +.Va desc
> +parameter with either the
> +.Dv JAIL_GET_DESC
> +or
> +.Dv JAIL_OWN_DESC
> +flag set,
> +because the process has already reached its limit for open file descriptors.
> +.It Bq Er ENFILE
> +A jail descriptor could not be created for the
> +.Va desc
> +parameter with either the
> +.Dv JAIL_GET_DESC
> +or
> +.Dv JAIL_OWN_DESC
> +flag set,
> +because the system file table is full.
> .El
> .Pp
> The
> @@ -333,6 +514,29 @@ The
> system call
> will fail if:
> .Bl -tag -width Er
> +.It Bq Er EBADF
> +The
> +.Va desc
> +parameter does not refer to a valid jail descriptor,
> +and either the
> +.Dv JAIL_USE_DESC
> +or
> +.Dv JAIL_AT_DESC
> +flag was set.
> +.It Bq Er EACCES
> +Read permission is denied on the jail descriptor in the
> +.Va desc
> +parameter,
> +and the
> +.Dv JAIL_USE_DESC
> +flag was set.
> +.It Bq Er EACCES
> +Execute permission is denied on the jail descriptor in the
> +.Va desc
> +parameter,
> +and the
> +.Dv JAIL_AT_DESC
> +flag was set.
> .It Bq Er EFAULT
> .Fa Iov ,
> or one of the addresses contained within it,
> @@ -352,10 +556,33 @@ jail.
> The
> .Va lastjid
> parameter is greater than the highest current jail ID.
> +.It Bq Er ENOENT
> +The jail referred to by a
> +.Va desc
> +parameter has been removed
> +.Pq even if the Dv JAIL_CREATE flag has been set .
> .It Bq Er EINVAL
> A supplied parameter is the wrong size.
> .It Bq Er EINVAL
> A supplied parameter name does not match any known parameters.
> +.It Bq Er EMFILE
> +A jail descriptor could not be created for the
> +.Va desc
> +parameter with either the
> +.Dv JAIL_GET_DESC
> +or
> +.Dv JAIL_OWN_DESC
> +flag set,
> +because the process has already reached its limit for open file descriptors.
> +.It Bq Er ENFILE
> +A jail descriptor could not be created for the
> +.Va desc
> +parameter with either the
> +.Dv JAIL_GET_DESC
> +or
> +.Dv JAIL_OWN_DESC
> +flag set,
> +because the system file table is full.
> .El
> .Pp
> The
> @@ -373,11 +600,39 @@ The jail specified by
> does not exist.
> .El
> .Pp
> +The
> +.Fn jail_attach_jd
> +and
> +.Fn jail_remove_jd
> +system calls
> +will fail if:
> +.Bl -tag -width Er
> +.It Bq Er EBADF
> +The
> +.Fa fd
> +argument is not a valid jail descriptor.
> +.It Bq Er EACCES
> +Permission is denied on the jail descriptor
> +.Po
> +execute permission for
> +.Fn jail_attach_fd ,
> +or write permission for
> +.Fn jail_remove_fd
> +.Pc .
> +.It Bq Er EPERM
> +The jail descriptor was created by a user other than the super-user.
> +.It Bq Er EINVAL
> +The jail specified by
> +.Fa jid
> +has been removed.
> +.El
> +.Pp
> Further
> .Fn jail ,
> .Fn jail_set ,
> +.Fn jail_attach ,
> and
> -.Fn jail_attach
> +.Fn jail_attach_jd
> call
> .Xr chroot 2
> internally, so they can fail for all the same reasons.
> diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
> index 4cf80a2ffc69..b5400b9849b3 100644
> --- a/lib/libsys/syscalls.map
> +++ b/lib/libsys/syscalls.map
> @@ -813,4 +813,8 @@ FBSDprivate_1.0 {
> __sys_getgroups;
> _setgroups;
> __sys_setgroups;
> + _jail_attach_jd;
> + __sys_jail_attach_jd;
> + _jail_remove_jd;
> + __sys_jail_remove_jd;
> };
> diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
> index 90cd21a80923..54063150eef9 100644
> --- a/sys/compat/freebsd32/freebsd32_syscall.h
> +++ b/sys/compat/freebsd32/freebsd32_syscall.h
> @@ -515,4 +515,6 @@
> #define FREEBSD32_SYS_inotify_rm_watch 594
> #define FREEBSD32_SYS_getgroups 595
> #define FREEBSD32_SYS_setgroups 596
> -#define FREEBSD32_SYS_MAXSYSCALL 597
> +#define FREEBSD32_SYS_jail_attach_jd 597
> +#define FREEBSD32_SYS_jail_remove_jd 598
> +#define FREEBSD32_SYS_MAXSYSCALL 599
> diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
> index f0f8d26554b5..f7cc4c284e4d 100644
> --- a/sys/compat/freebsd32/freebsd32_syscalls.c
> +++ b/sys/compat/freebsd32/freebsd32_syscalls.c
> @@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
> "inotify_rm_watch", /* 594 = inotify_rm_watch */
> "getgroups", /* 595 = getgroups */
> "setgroups", /* 596 = setgroups */
> + "jail_attach_jd", /* 597 = jail_attach_jd */
> + "jail_remove_jd", /* 598 = jail_remove_jd */
> };
> diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
> index 12f1a346c3e9..18f809ef04e3 100644
> --- a/sys/compat/freebsd32/freebsd32_sysent.c
> +++ b/sys/compat/freebsd32/freebsd32_sysent.c
> @@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
> { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
> { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
> { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
> + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
> + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
> };
> diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
> index e471c5148021..29a5497e9efa 100644
> --- a/sys/compat/freebsd32/freebsd32_systrace_args.c
> +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
> @@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
> *n_args = 2;
> break;
> }
> + /* jail_attach_jd */
> + case 597: {
> + struct jail_attach_jd_args *p = params;
> + iarg[a++] = p->fd; /* int */
> + *n_args = 1;
> + break;
> + }
> + /* jail_remove_jd */
> + case 598: {
> + struct jail_remove_jd_args *p = params;
> + iarg[a++] = p->fd; /* int */
> + *n_args = 1;
> + break;
> + }
> default:
> *n_args = 0;
> break;
> @@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
> break;
> };
> break;
> + /* jail_attach_jd */
> + case 597:
> + switch (ndx) {
> + case 0:
> + p = "int";
> + break;
> + default:
> + break;
> + };
> + break;
> + /* jail_remove_jd */
> + case 598:
> + switch (ndx) {
> + case 0:
> + p = "int";
> + break;
> + default:
> + break;
> + };
> + break;
> default:
> break;
> };
> @@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
> if (ndx == 0 || ndx == 1)
> p = "int";
> break;
> + /* jail_attach_jd */
> + case 597:
> + if (ndx == 0 || ndx == 1)
> + p = "int";
> + break;
> + /* jail_remove_jd */
> + case 598:
> + if (ndx == 0 || ndx == 1)
> + p = "int";
> + break;
> default:
> break;
> };
> diff --git a/sys/conf/files b/sys/conf/files
> index d89813c70355..9661bafea8f9 100644
> --- a/sys/conf/files
> +++ b/sys/conf/files
> @@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
> kern/kern_idle.c standard
> kern/kern_intr.c standard
> kern/kern_jail.c standard
> +kern/kern_jaildesc.c standard
> kern/kern_jailmeta.c standard
> kern/kern_kcov.c optional kcov \
> compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
> diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
> index fcd232cde21e..e42e7dcf8b44 100644
> --- a/sys/kern/init_sysent.c
> +++ b/sys/kern/init_sysent.c
> @@ -663,4 +663,6 @@ struct sysent sysent[] = {
> { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
> { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
> { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
> + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
> + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
> };
> diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
> index a27ab33b34da..057235574eb5 100644
> --- a/sys/kern/kern_descrip.c
> +++ b/sys/kern/kern_descrip.c
> @@ -5250,6 +5250,8 @@ file_type_to_name(short type)
> return ("eventfd");
> case DTYPE_TIMERFD:
> return ("timerfd");
> + case DTYPE_JAILDESC:
> + return ("jail");
> default:
> return ("unkn");
> }
> diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
> index 52210553016b..5a1fbe23ddeb 100644
> --- a/sys/kern/kern_jail.c
> +++ b/sys/kern/kern_jail.c
> @@ -39,6 +39,7 @@
> #include <sys/kernel.h>
> #include <sys/systm.h>
> #include <sys/errno.h>
> +#include <sys/file.h>
> #include <sys/sysproto.h>
> #include <sys/malloc.h>
> #include <sys/osd.h>
> @@ -49,6 +50,7 @@
> #include <sys/taskqueue.h>
> #include <sys/fcntl.h>
> #include <sys/jail.h>
> +#include <sys/jaildesc.h>
> #include <sys/linker.h>
> #include <sys/lock.h>
> #include <sys/mman.h>
> @@ -988,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
> int
> kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> {
> + struct file *jfp_out;
> + struct jaildesc *desc_in;
> struct nameidata nd;
> #ifdef INET
> struct prison_ip *ip4;
> @@ -998,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> struct vfsopt *opt;
> struct vfsoptlist *opts;
> struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
> + struct ucred *jdcred;
> struct vnode *root;
> char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
> char *g_path, *osrelstr;
> @@ -1011,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> int created, cuflags, descend, drflags, enforce;
> int error, errmsg_len, errmsg_pos;
> int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
> - int deadid, jid, jsys, len, level;
> + int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
> int childmax, osreldt, rsnum, slevel;
> #ifdef INET
> int ip4s;
> @@ -1027,17 +1032,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> unsigned tallow;
> char numbuf[12];
>
> - error = priv_check(td, PRIV_JAIL_SET);
> - if (!error && (flags & JAIL_ATTACH))
> - error = priv_check(td, PRIV_JAIL_ATTACH);
> - if (error)
> - return (error);
> mypr = td->td_ucred->cr_prison;
> - if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
> + if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
> + && mypr->pr_childmax == 0)
> return (EPERM);
> if (flags & ~JAIL_SET_MASK)
> return (EINVAL);
> + if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
> + == (JAIL_USE_DESC | JAIL_AT_DESC))
> + return (EINVAL);
> + prison_hold(mypr);
>
> +#ifdef INET
> + ip4 = NULL;
> +#endif
> +#ifdef INET6
> + ip6 = NULL;
> +#endif
> + g_path = NULL;
> + jfp_out = NULL;
> + jfd_out = -1;
> /*
> * Check all the parameters before committing to anything. Not all
> * errors can be caught early, but we may as well try. Also, this
> @@ -1050,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> */
> error = vfs_buildopts(optuio, &opts);
> if (error)
> - return (error);
> -#ifdef INET
> - ip4 = NULL;
> -#endif
> -#ifdef INET6
> - ip6 = NULL;
> -#endif
> - g_path = NULL;
> + goto done_free;
>
> cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
> if (!cuflags) {
> @@ -1066,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> goto done_errmsg;
> }
>
> + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
> + if (error == ENOENT) {
> + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
> + JAIL_OWN_DESC)) {
> + vfs_opterror(opts, "missing desc");
> + goto done_errmsg;
> + }
> + jfd_in = -1;
> + } else if (error != 0)
> + goto done_free;
> + else {
> + if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
> + JAIL_OWN_DESC))) {
> + vfs_opterror(opts, "unexpected desc");
> + goto done_errmsg;
> + }
> + if (flags & JAIL_AT_DESC) {
> + /*
> + * Look up and create jails based on the
> + * descriptor's prison.
> + */
> + prison_free(mypr);
> + error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
> + NULL);
> + if (error != 0) {
> + vfs_opterror(opts, error == ENOENT
> + ? "descriptor to dead jail"
> + : "not a jail descriptor");
> + goto done_errmsg;
> + }
> + /*
> + * Check file permissions using the current
> + * credentials, and operation permissions
> + * using the descriptor's credentials.
> + */
> + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
> + desc_in->jd_gid, VEXEC, td->td_ucred);
> + JAILDESC_UNLOCK(desc_in);
> + if (error != 0)
> + goto done_free;
> + if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
> + error = EPERM;
> + goto done_free;
> + }
> + }
> + if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
> + /* Allocate a jail descriptor to return later. */
> + error = jaildesc_alloc(td, &jfp_out, &jfd_out,
> + flags & JAIL_OWN_DESC);
> + if (error)
> + goto done_free;
> + }
> + }
> +
> + /*
> + * Delay the permission check if using a jail descriptor,
> + * until we get the descriptor's credentials.
> + */
> + if (!(flags & JAIL_USE_DESC)) {
> + error = priv_check(td, PRIV_JAIL_SET);
> + if (error == 0 && (flags & JAIL_ATTACH))
> + error = priv_check(td, PRIV_JAIL_ATTACH);
> + if (error)
> + goto done_free;
> + }
> +
> error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
> if (error == ENOENT)
> jid = 0;
> @@ -1441,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
> error = EAGAIN;
> goto done_deref;
> }
> - if (jid != 0) {
> + if (flags & JAIL_USE_DESC) {
> + /* Get the jail from its descriptor. */
> + error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
> + if (error) {
> + vfs_opterror(opts, error == ENOENT
> + ? "descriptor to dead jail"
> + : "not a jail descriptor");
> + goto done_deref;
> + }
> + drflags |= PD_DEREF;
> + /*
> + * Check file permissions using the current credentials,
> + * and operation permissions using the descriptor's
> + * credentials.
> + */
> + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
> + desc_in->jd_gid, VWRITE, td->td_ucred);
> + if (error == 0 && (flags & JAIL_ATTACH))
> + error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
> + desc_in->jd_gid, VEXEC, td->td_ucred);
> + JAILDESC_UNLOCK(desc_in);
> + if (error == 0)
> + error = priv_check_cred(jdcred, PRIV_JAIL_SET);
> + if (error == 0 && (flags & JAIL_ATTACH))
> + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
> + crfree(jdcred);
> + if (error)
> + goto done_deref;
> + mtx_lock(&pr->pr_mtx);
> + drflags |= PD_LOCKED;
> + if (cuflags == JAIL_CREATE) {
> + error = EEXIST;
> + vfs_opterror(opts, "jail %d already exists",
> + pr->pr_id);
> + goto done_deref;
> + }
> + if (!prison_isalive(pr)) {
> + /* While a jid can be resurrected, the prison
> + * itself cannot.
> + */
> + error = ENOENT;
> + vfs_opterror(opts, "jail %d is dying", pr->pr_id);
> *** 1065 LINES SKIPPED ***
>
--
Bjoern A. Zeeb r15:7