git: 851dc7f859c2 - main - jail: add jail descriptors
Date: Thu, 04 Sep 2025 20:31:51 UTC
The branch main has been updated by jamie:
URL: https://cgit.FreeBSD.org/src/commit/?id=851dc7f859c23cab09a348bca03ab655534fb7e0
commit 851dc7f859c23cab09a348bca03ab655534fb7e0
Author: Jamie Gritton <jamie@FreeBSD.org>
AuthorDate: 2025-09-04 20:27:47 +0000
Commit: Jamie Gritton <jamie@FreeBSD.org>
CommitDate: 2025-09-04 20:27:47 +0000
jail: add jail descriptors
Similar to process descriptors, jail desriptors are allow jail
administration using the file descriptor interface instead of JIDs.
They come from and can be used by jail_set(2) and jail_get(2),
and there are two new system calls, jail_attach_jd(2) and
jail_remove_jd(2).
Reviewed by: bz, brooks
Relnotes: yes
Differential Revision: https://reviews.freebsd.org/D43696
---
lib/libjail/jail.c | 64 +++-
lib/libsys/Symbol.sys.map | 2 +
lib/libsys/_libsys.h | 4 +
lib/libsys/jail.2 | 267 ++++++++++++++++-
lib/libsys/syscalls.map | 4 +
sys/compat/freebsd32/freebsd32_syscall.h | 4 +-
sys/compat/freebsd32/freebsd32_syscalls.c | 2 +
sys/compat/freebsd32/freebsd32_sysent.c | 2 +
sys/compat/freebsd32/freebsd32_systrace_args.c | 44 +++
sys/conf/files | 1 +
sys/kern/init_sysent.c | 2 +
sys/kern/kern_descrip.c | 2 +
sys/kern/kern_jail.c | 396 +++++++++++++++++++++++--
sys/kern/kern_jaildesc.c | 337 +++++++++++++++++++++
sys/kern/syscalls.c | 2 +
sys/kern/syscalls.master | 10 +
sys/kern/systrace_args.c | 44 +++
sys/sys/file.h | 1 +
sys/sys/jail.h | 15 +-
sys/sys/jaildesc.h | 85 ++++++
sys/sys/syscall.h | 4 +-
sys/sys/syscall.mk | 4 +-
sys/sys/sysproto.h | 10 +
sys/sys/user.h | 4 +
24 files changed, 1256 insertions(+), 54 deletions(-)
diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c
index 30282e67866c..931391055919 100644
--- a/lib/libjail/jail.c
+++ b/lib/libjail/jail.c
@@ -75,8 +75,9 @@ int
jail_setv(int flags, ...)
{
va_list ap, tap;
- struct jailparam *jp;
- const char *name, *value;
+ struct jailparam *jp, *jp_desc;
+ const char *name;
+ char *value, *desc_value;
int njp, jid;
/* Create the parameter list and import the parameters. */
@@ -86,15 +87,24 @@ jail_setv(int flags, ...)
(void)va_arg(tap, char *);
va_end(tap);
jp = alloca(njp * sizeof(struct jailparam));
- for (njp = 0; (name = va_arg(ap, char *)) != NULL;) {
+ jp_desc = NULL;
+ desc_value = NULL;
+ for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) {
value = va_arg(ap, char *);
if (jailparam_init(jp + njp, name) < 0)
goto error;
- if (jailparam_import(jp + njp++, value) < 0)
+ if (jailparam_import(jp + njp, value) < 0)
goto error;
+ if (!strcmp(name, "desc")
+ && (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) {
+ jp_desc = jp + njp;
+ desc_value = value;
+ }
}
va_end(ap);
jid = jailparam_set(jp, njp, flags);
+ if (jid > 0 && jp_desc != NULL)
+ sprintf(desc_value, "%d", *(int *)jp_desc->jp_value);
jailparam_free(jp, njp);
return (jid);
@@ -112,9 +122,10 @@ int
jail_getv(int flags, ...)
{
va_list ap, tap;
- struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+ struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
char *valarg, *value;
- const char *name, *key_value, *lastjid_value, *jid_value, *name_value;
+ const char *name, *key_value, *desc_value, *lastjid_value, *jid_value;
+ const char *name_value;
int njp, i, jid;
/* Create the parameter list and find the key. */
@@ -126,15 +137,19 @@ jail_getv(int flags, ...)
jp = alloca(njp * sizeof(struct jailparam));
va_copy(tap, ap);
- jp_lastjid = jp_jid = jp_name = NULL;
- lastjid_value = jid_value = name_value = NULL;
+ jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
+ desc_value = lastjid_value = jid_value = name_value = NULL;
for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) {
value = va_arg(tap, char *);
if (jailparam_init(jp + njp, name) < 0) {
va_end(tap);
goto error;
}
- if (!strcmp(jp[njp].jp_name, "lastjid")) {
+ if (!strcmp(jp[njp].jp_name, "desc")
+ && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) {
+ jp_desc = jp + njp;
+ desc_value = value;
+ } else if (!strcmp(jp[njp].jp_name, "lastjid")) {
jp_lastjid = jp + njp;
lastjid_value = value;
} else if (!strcmp(jp[njp].jp_name, "jid")) {
@@ -147,7 +162,10 @@ jail_getv(int flags, ...)
}
va_end(tap);
/* Import the key parameter. */
- if (jp_lastjid != NULL) {
+ if (jp_desc != NULL && (flags & JAIL_USE_DESC)) {
+ jp_key = jp_desc;
+ key_value = desc_value;
+ } else if (jp_lastjid != NULL) {
jp_key = jp_lastjid;
key_value = lastjid_value;
} else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) {
@@ -163,6 +181,9 @@ jail_getv(int flags, ...)
}
if (jailparam_import(jp_key, key_value) < 0)
goto error;
+ if (jp_desc != NULL && jp_desc != jp_key
+ && jailparam_import(jp_desc, desc_value) < 0)
+ goto error;
/* Get the jail and export the parameters. */
jid = jailparam_get(jp, njp, flags);
if (jid < 0)
@@ -571,7 +592,7 @@ int
jailparam_get(struct jailparam *jp, unsigned njp, int flags)
{
struct iovec *jiov;
- struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+ struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
int i, ai, ki, jid, arrays, sanity;
unsigned j;
@@ -580,10 +601,13 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
* Find the key and any array parameters.
*/
jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1));
- jp_lastjid = jp_jid = jp_name = NULL;
+ jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
arrays = 0;
for (ai = j = 0; j < njp; j++) {
- if (!strcmp(jp[j].jp_name, "lastjid"))
+ if (!strcmp(jp[j].jp_name, "desc")
+ && (flags & (JAIL_USE_DESC | JAIL_AT_DESC)))
+ jp_desc = jp + j;
+ else if (!strcmp(jp[j].jp_name, "lastjid"))
jp_lastjid = jp + j;
else if (!strcmp(jp[j].jp_name, "jid"))
jp_jid = jp + j;
@@ -599,7 +623,9 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
ai++;
}
}
- jp_key = jp_lastjid ? jp_lastjid :
+ jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) &&
+ jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc :
+ jp_lastjid ? jp_lastjid :
jp_jid && jp_jid->jp_valuelen == sizeof(int) &&
jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name;
if (jp_key == NULL || jp_key->jp_value == NULL) {
@@ -622,6 +648,14 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
jiov[ki].iov_len = JAIL_ERRMSGLEN;
ki++;
jail_errmsg[0] = 0;
+ if (jp_desc != NULL && jp_desc != jp_key) {
+ jiov[ki].iov_base = jp_desc->jp_name;
+ jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1;
+ ki++;
+ jiov[ki].iov_base = jp_desc->jp_value;
+ jiov[ki].iov_len = jp_desc->jp_valuelen;
+ ki++;
+ }
if (arrays && jail_get(jiov, ki, flags) < 0) {
if (!jail_errmsg[0])
snprintf(jail_errmsg, sizeof(jail_errmsg),
@@ -649,7 +683,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
jiov[ai].iov_base = jp[j].jp_value;
memset(jiov[ai].iov_base, 0, jiov[ai].iov_len);
ai++;
- } else if (jp + j != jp_key) {
+ } else if (jp + j != jp_key && jp + j != jp_desc) {
jiov[i].iov_base = jp[j].jp_name;
jiov[i].iov_len = strlen(jp[j].jp_name) + 1;
i++;
diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
index 1a297f9df581..e3fd8ac10621 100644
--- a/lib/libsys/Symbol.sys.map
+++ b/lib/libsys/Symbol.sys.map
@@ -382,6 +382,8 @@ FBSD_1.8 {
getrlimitusage;
inotify_add_watch_at;
inotify_rm_watch;
+ jail_attach_jd;
+ jail_remove_jd;
kcmp;
setcred;
setgroups;
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
index 34eebc1aa67a..6bd768708a78 100644
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -468,6 +468,8 @@ typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t);
typedef int (__sys_inotify_rm_watch_t)(int, int);
typedef int (__sys_getgroups_t)(int, gid_t *);
typedef int (__sys_setgroups_t)(int, const gid_t *);
+typedef int (__sys_jail_attach_jd_t)(int);
+typedef int (__sys_jail_remove_jd_t)(int);
_Noreturn void __sys__exit(int rval);
int __sys_fork(void);
@@ -872,6 +874,8 @@ int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask
int __sys_inotify_rm_watch(int fd, int wd);
int __sys_getgroups(int gidsetsize, gid_t * gidset);
int __sys_setgroups(int gidsetsize, const gid_t * gidset);
+int __sys_jail_attach_jd(int fd);
+int __sys_jail_remove_jd(int fd);
__END_DECLS
#endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2
index 8f8b9925c712..a0f47cc61cb3 100644
--- a/lib/libsys/jail.2
+++ b/lib/libsys/jail.2
@@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd November 29, 2023
+.Dd September 4, 2025
.Dt JAIL 2
.Os
.Sh NAME
@@ -31,7 +31,9 @@
.Nm jail_get ,
.Nm jail_set ,
.Nm jail_remove ,
-.Nm jail_attach
+.Nm jail_attach ,
+.Nm jail_remove_jd ,
+.Nm jail_attach_jd
.Nd create and manage system jails
.Sh LIBRARY
.Lb libc
@@ -44,6 +46,10 @@
.Fn jail_attach "int jid"
.Ft int
.Fn jail_remove "int jid"
+.Ft int
+.Fn jail_attach_jd "int fd"
+.Ft int
+.Fn jail_remove_jd "int fd"
.In sys/uio.h
.Ft int
.Fn jail_get "struct iovec *iov" "u_int niov" "int flags"
@@ -188,6 +194,29 @@ system call.
This is deprecated in
.Fn jail_set
and has no effect.
+.It Dv JAIL_USE_DESC
+Identify the jail by a descriptor in the
+.Va desc
+parameter.
+.It Dv JAIL_AT_DESC
+Operate in the context of the jail described by the
+.Va desc
+parameter, instead of the current jail.
+Only one of
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+may be specified.
+.It Dv JAIL_GET_DESC
+Return a new jail descriptor for the jail in the
+.Va desc
+parameter.
+.It Dv JAIL_OWN_DESC
+Return an
+.Dq owning
+jail descriptor in the
+.Va desc
+parameter.
.El
.Pp
The
@@ -221,6 +250,9 @@ arguments consists of one or more following flags:
.Bl -tag -width indent
.It Dv JAIL_DYING
Allow getting a jail that is in the process of being removed.
+.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC
+These have the same meaning as they do in
+.Fn jail_set .
.El
.Pp
The
@@ -238,6 +270,101 @@ system call removes the jail identified by
.Fa jid .
It will kill all processes belonging to the jail, and remove any children
of that jail.
+.Pp
+The
+.Fn jail_attach_fd
+and
+.Fn jail_remove_fd
+system calls work the same as
+.Fn jail_attach
+and
+.Fn jail_remove ,
+except that they operate on the jail identified by jail descriptor
+.Fa fd .
+.Ss Jail Descriptors
+In addition to the jail ID,
+jails can be referred to using a jail descriptor,
+a type of file descriptor tied to a particular jail.
+Jail descriptors are created by calling
+.Fn jail_set
+or
+.Fn jail_get
+with the special parameter
+.Va desc ,
+and either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flags set.
+The difference between the two flags is that descriptors created with
+.Dv JAIL_OWN_DESC
+.Po
+called
+.Dq owning
+descriptors
+.Pc
+will automatically remove the jail when the descriptor is closed.
+.Pp
+Jail descriptors can be passed back to
+.Fn jail_set
+or
+.Fm jail_get
+with the
+.Va desc
+parameter,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flags set.
+With
+.Dv JAIL_USE_DESC ,
+the descriptor identifies the jail to operate on,
+instead of the
+.Va jid
+or
+.Va name
+parameter.
+With
+.Dv JAIL_AT_DESC ,
+the descriptor is used in place of the current jail,
+allowing accessing or creating jails that are children of the
+descriptor jail.
+.Pp
+The system calls
+.Fn jail_attach_jd
+and
+.Fn jail_aremove_jd
+work the same as
+.Fn jail_attach
+and
+.Fn jail_remove ,
+except that they operate on the jail referred to by the passed descriptor.
+.Pp
+Jail operations via descriptors can be done by processes that do not
+normally have permission to see or affect the jail,
+as long as they are allowed by the file permissions of the jail
+descriptor itself.
+These permissions can be changed by the descriptor owner via
+.Xr fchmod 2
+and
+.Xr fchown 2 .
+.Fn jail_get
+requires read permission,
+.Fn jail_set
+and
+.Fn jail_remove
+require write permission,
+and
+.Fn jail_attach
+requires execute permission.
+Also, use of a descriptor with the
+.Dv JAIL_AT_DESC
+flag requires execute permission.
+An owning descriptor is identified by the
+.Em sticky bit ,
+which may also be changed via
+.Xr fchmod 2 .
.Sh RETURN VALUES
If successful,
.Fn jail ,
@@ -249,7 +376,7 @@ They return \-1 on failure, and set
.Va errno
to indicate the error.
.Pp
-.Rv -std jail_attach jail_remove
+.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd
.Sh ERRORS
The
.Fn jail
@@ -275,12 +402,44 @@ The
system call
will fail if:
.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Va desc
+parameter does not refer to a valid jail descriptor,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flag was set.
+.It Bq Er EACCES
+Write permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EACCES
+Execute permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and either the
+.Dv JAIL_AT_DESC
+or
+.Dv JAIL_ATTACH
+flag was set.
.It Bq Er EPERM
This process is not allowed to create a jail, either because it is not
the super-user, or because it would exceed the jail's
.Va children.max
limit.
.It Bq Er EPERM
+The jail descriptor in the
+.Va desc
+parameter was created by a user other than the super-user,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EPERM
A jail parameter was set to a less restrictive value then the current
environment.
.It Bq Er EFAULT
@@ -298,8 +457,12 @@ flag is not set.
.It Bq Er ENOENT
The jail referred to by a
.Va jid
-is not accessible by the process, because the process is in a different
-jail.
+parameter is not accessible by the process, because the process is in a
+different jail.
+.It Bq Er ENOENT
+The jail referred to by a
+.Va desc
+parameter has been removed.
.It Bq Er EEXIST
The jail referred to by a
.Va jid
@@ -326,6 +489,24 @@ flags is not set.
A supplied string parameter is longer than allowed.
.It Bq Er EAGAIN
There are no jail IDs left.
+.It Bq Er EMFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the process has already reached its limit for open file descriptors.
+.It Bq Er ENFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the system file table is full.
.El
.Pp
The
@@ -333,6 +514,29 @@ The
system call
will fail if:
.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Va desc
+parameter does not refer to a valid jail descriptor,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flag was set.
+.It Bq Er EACCES
+Read permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EACCES
+Execute permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_AT_DESC
+flag was set.
.It Bq Er EFAULT
.Fa Iov ,
or one of the addresses contained within it,
@@ -352,10 +556,33 @@ jail.
The
.Va lastjid
parameter is greater than the highest current jail ID.
+.It Bq Er ENOENT
+The jail referred to by a
+.Va desc
+parameter has been removed
+.Pq even if the Dv JAIL_CREATE flag has been set .
.It Bq Er EINVAL
A supplied parameter is the wrong size.
.It Bq Er EINVAL
A supplied parameter name does not match any known parameters.
+.It Bq Er EMFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the process has already reached its limit for open file descriptors.
+.It Bq Er ENFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the system file table is full.
.El
.Pp
The
@@ -373,11 +600,39 @@ The jail specified by
does not exist.
.El
.Pp
+The
+.Fn jail_attach_jd
+and
+.Fn jail_remove_jd
+system calls
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid jail descriptor.
+.It Bq Er EACCES
+Permission is denied on the jail descriptor
+.Po
+execute permission for
+.Fn jail_attach_fd ,
+or write permission for
+.Fn jail_remove_fd
+.Pc .
+.It Bq Er EPERM
+The jail descriptor was created by a user other than the super-user.
+.It Bq Er EINVAL
+The jail specified by
+.Fa jid
+has been removed.
+.El
+.Pp
Further
.Fn jail ,
.Fn jail_set ,
+.Fn jail_attach ,
and
-.Fn jail_attach
+.Fn jail_attach_jd
call
.Xr chroot 2
internally, so they can fail for all the same reasons.
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
index 4cf80a2ffc69..b5400b9849b3 100644
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -813,4 +813,8 @@ FBSDprivate_1.0 {
__sys_getgroups;
_setgroups;
__sys_setgroups;
+ _jail_attach_jd;
+ __sys_jail_attach_jd;
+ _jail_remove_jd;
+ __sys_jail_remove_jd;
};
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 90cd21a80923..54063150eef9 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -515,4 +515,6 @@
#define FREEBSD32_SYS_inotify_rm_watch 594
#define FREEBSD32_SYS_getgroups 595
#define FREEBSD32_SYS_setgroups 596
-#define FREEBSD32_SYS_MAXSYSCALL 597
+#define FREEBSD32_SYS_jail_attach_jd 597
+#define FREEBSD32_SYS_jail_remove_jd 598
+#define FREEBSD32_SYS_MAXSYSCALL 599
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index f0f8d26554b5..f7cc4c284e4d 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
+ "jail_attach_jd", /* 597 = jail_attach_jd */
+ "jail_remove_jd", /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 12f1a346c3e9..18f809ef04e3 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index e471c5148021..29a5497e9efa 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 597: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 598: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* jail_attach_jd */
+ case 597:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 598:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 597:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 598:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/conf/files b/sys/conf/files
index d89813c70355..9661bafea8f9 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
+kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index fcd232cde21e..e42e7dcf8b44 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -663,4 +663,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index a27ab33b34da..057235574eb5 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5250,6 +5250,8 @@ file_type_to_name(short type)
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
+ case DTYPE_JAILDESC:
+ return ("jail");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 52210553016b..5a1fbe23ddeb 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,6 +39,7 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
@@ -49,6 +50,7 @@
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@@ -988,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
+ struct file *jfp_out;
+ struct jaildesc *desc_in;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@@ -998,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+ struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@@ -1011,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int deadid, jid, jsys, len, level;
+ int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@@ -1027,17 +1032,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
unsigned tallow;
char numbuf[12];
- error = priv_check(td, PRIV_JAIL_SET);
- if (!error && (flags & JAIL_ATTACH))
- error = priv_check(td, PRIV_JAIL_ATTACH);
- if (error)
- return (error);
mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+ if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
+ && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+ == (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
+ prison_hold(mypr);
+#ifdef INET
+ ip4 = NULL;
+#endif
+#ifdef INET6
+ ip6 = NULL;
+#endif
+ g_path = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@@ -1050,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
*/
error = vfs_buildopts(optuio, &opts);
if (error)
- return (error);
-#ifdef INET
- ip4 = NULL;
-#endif
-#ifdef INET6
- ip6 = NULL;
-#endif
- g_path = NULL;
+ goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@@ -1066,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_errmsg;
}
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done_errmsg;
+ }
+ jfd_in = -1;
+ } else if (error != 0)
+ goto done_free;
+ else {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done_errmsg;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /*
+ * Look up and create jails based on the
+ * descriptor's prison.
+ */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+ NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_errmsg;
+ }
+ /*
+ * Check file permissions using the current
+ * credentials, and operation permissions
+ * using the descriptor's credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done_free;
+ if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+ error = EPERM;
+ goto done_free;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done_free;
+ }
+ }
+
+ /*
+ * Delay the permission check if using a jail descriptor,
+ * until we get the descriptor's credentials.
+ */
+ if (!(flags & JAIL_USE_DESC)) {
+ error = priv_check(td, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check(td, PRIV_JAIL_ATTACH);
+ if (error)
+ goto done_free;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1441,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = EAGAIN;
goto done_deref;
}
- if (jid != 0) {
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_deref;
+ }
+ drflags |= PD_DEREF;
+ /*
+ * Check file permissions using the current credentials,
+ * and operation permissions using the descriptor's
+ * credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VWRITE, td->td_ucred);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto done_deref;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (cuflags == JAIL_CREATE) {
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists",
+ pr->pr_id);
+ goto done_deref;
+ }
+ if (!prison_isalive(pr)) {
+ /* While a jid can be resurrected, the prison
+ * itself cannot.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying", pr->pr_id);
*** 1065 LINES SKIPPED ***