git: f35525ff2053 - main - file: Add a fd flag with O_RESOLVE_BENEATH semantics
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 24 Jun 2025 21:04:34 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=f35525ff2053e026a423e852136d73ed93c95803
commit f35525ff2053e026a423e852136d73ed93c95803
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-06-24 20:17:07 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-06-24 21:04:18 +0000
file: Add a fd flag with O_RESOLVE_BENEATH semantics
The O_RESOLVE_BENEATH openat(2) flag restricts name lookups such that
they remain under the directory referenced by the dirfd. This commit
introduces an implicit version of the flag, FD_RESOLVE_BENEATH, stored
in the file descriptor entry. When the flag is set, any lookup relative
to that fd automatically has O_RESOLVE_BENEATH semantics. Furthermore,
the flag is sticky, meaning that it cannot be cleared, and it is copied
by dup() and openat().
File descriptors with FD_RESOLVE_BENEATH set may not be passed to
fchdir(2) or fchroot(2). Various fd lookup routines are modified to
return fd flags to the caller.
This flag will be used to address a case where jails with different root
directories and the ability to pass SCM_RIGHTS messages across the jail
boundary can transfer directory fds in such as way as to allow a
filesystem escape.
PR: 262180
Reviewed by: kib
MFC after: 3 weeks
Differential Revision: https://reviews.freebsd.org/D50371
---
lib/libsys/fcntl.2 | 63 +++++++++++++++++++----------
sys/fs/fdescfs/fdesc_vnops.c | 4 +-
sys/kern/kern_descrip.c | 95 ++++++++++++++++++++++++++++++++------------
sys/kern/uipc_syscalls.c | 2 +-
sys/kern/vfs_acl.c | 4 +-
sys/kern/vfs_cache.c | 14 +++++--
sys/kern/vfs_extattr.c | 8 ++--
sys/kern/vfs_syscalls.c | 28 +++++++++----
sys/sys/fcntl.h | 2 +
sys/sys/file.h | 2 +-
sys/sys/filedesc.h | 8 +++-
sys/sys/namei.h | 1 +
12 files changed, 162 insertions(+), 69 deletions(-)
diff --git a/lib/libsys/fcntl.2 b/lib/libsys/fcntl.2
index b5d4abe35aeb..604de43e5e8c 100644
--- a/lib/libsys/fcntl.2
+++ b/lib/libsys/fcntl.2
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd December 7, 2021
+.Dd June 5, 2025
.Dt FCNTL 2
.Os
.Sh NAME
@@ -80,6 +80,11 @@ associated with the new file descriptor is cleared, so the file descriptor is
to remain open across
.Xr execve 2
system calls.
+.It
+The
+.Dv FD_RESOLVE_BENEATH
+flag, described below, will be set if it was set on the original
+descriptor.
.El
.It Dv F_DUPFD_CLOEXEC
Like
@@ -113,29 +118,47 @@ Use
instead of
.Dv F_DUP2FD .
.It Dv F_GETFD
-Get the close-on-exec flag associated with the file descriptor
-.Fa fd
-as
-.Dv FD_CLOEXEC .
-If the returned value ANDed with
-.Dv FD_CLOEXEC
-is 0,
-the file will remain open across
-.Fn exec ,
-otherwise the file will be closed upon execution of
+Get the flags associated with the file descriptor
+.Fa fd .
+The following flags are defined:
+.Bl -tag -width FD_RESOLVE_BENEATH
+.It Dv FD_CLOEXEC
+The file will be closed upon execution of
.Fn exec
.Fa ( arg
is ignored).
+Otherwise, the file descriptor will remain open.
+.It Dv FD_RESOLVE_BENEATH
+All path name lookups relative to that file descriptor
+will behave as if the lookup had
+.Dv O_RESOLVE_BENEATH
+or
+.Dv AT_RESOLVE_BENEATH
+semantics.
+It is not permitted to call
+.Xr fchdir 2
+or
+.Xr fchroot 2
+on such a file descriptor.
+The
+.Dv FD_RESOLVE_BENEATH
+flag is sticky, meaning that it is preserved by
+.Xr dup 2
+and similar operations, and opening a directory with
+.Xr openat 2
+where the directory descriptor has the flag set causes the new directory
+descriptor to also have the flag set.
+.El
.It Dv F_SETFD
-Set the close-on-exec flag associated with
-.Fa fd
-to
-.Fa arg ,
-where
-.Fa arg
-is either 0 or
-.Dv FD_CLOEXEC ,
-as described above.
+Set flags associated with
+.Fa fd .
+The available flags are
+.Dv FD_CLOEXEC
+and
+.Dv FD_RESOLVE_BENEATH .
+The
+.Dv FD_RESOLVE_BENEATH
+flag cannot be cleared once set.
.It Dv F_GETFL
Get descriptor status flags, as described below
.Fa ( arg
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 9ec80794e795..676ea5de12b8 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -502,7 +502,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
} else {
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
}
if (error) {
/*
@@ -639,7 +639,7 @@ fdesc_readlink(struct vop_readlink_args *va)
VOP_UNLOCK(vn);
td = curthread;
- error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL);
+ error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL);
if (error != 0)
goto out;
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index e70f2d248365..bbd6d530f478 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -109,8 +109,8 @@ static void fdgrowtable_exp(struct filedesc *fdp, int nfd);
static void fdunused(struct filedesc *fdp, int fd);
static void fdused(struct filedesc *fdp, int fd);
static int fget_unlocked_seq(struct thread *td, int fd,
- const cap_rights_t *needrightsp, struct file **fpp,
- seqc_t *seqp);
+ const cap_rights_t *needrightsp, uint8_t *flagsp,
+ struct file **fpp, seqc_t *seqp);
static int getmaxfd(struct thread *td);
static u_long *filecaps_copy_prep(const struct filecaps *src);
static void filecaps_copy_finish(const struct filecaps *src,
@@ -527,7 +527,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
fde = fdeget_noref(fdp, fd);
if (fde != NULL) {
td->td_retval[0] =
- (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+ ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) |
+ ((fde->fde_flags & UF_RESOLVE_BENEATH) ?
+ FD_RESOLVE_BENEATH : 0);
error = 0;
}
FILEDESC_SUNLOCK(fdp);
@@ -538,8 +540,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
FILEDESC_XLOCK(fdp);
fde = fdeget_noref(fdp, fd);
if (fde != NULL) {
+ /*
+ * UF_RESOLVE_BENEATH is sticky and cannot be cleared.
+ */
fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
- (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+ ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+ ((arg & FD_RESOLVE_BENEATH) != 0 ?
+ UF_RESOLVE_BENEATH : 0);
error = 0;
}
FILEDESC_XUNLOCK(fdp);
@@ -2164,7 +2171,8 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
seqc_write_begin(&fde->fde_seqc);
#endif
fde->fde_file = fp;
- fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
+ fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+ ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0);
if (fcaps != NULL)
filecaps_move(fcaps, &fde->fde_caps);
else
@@ -2912,7 +2920,7 @@ out:
#ifdef CAPABILITIES
int
fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp, struct filecaps *havecapsp)
+ uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
{
struct filedesc *fdp = td->td_proc->p_fd;
int error;
@@ -2921,7 +2929,8 @@ fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
*fpp = NULL;
for (;;) {
- error = fget_unlocked_seq(td, fd, needrightsp, &fp, &seq);
+ error = fget_unlocked_seq(td, fd, needrightsp, flagsp, &fp,
+ &seq);
if (error != 0)
return (error);
@@ -2952,10 +2961,10 @@ get_locked:
#else
int
fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp, struct filecaps *havecapsp)
+ uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
{
int error;
- error = fget_unlocked(td, fd, needrightsp, fpp);
+ error = fget_unlocked(td, fd, needrightsp, flagsp, fpp);
if (havecapsp != NULL && error == 0)
filecaps_fill(havecapsp);
@@ -3038,7 +3047,7 @@ out:
#ifdef CAPABILITIES
int
-fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
{
const struct filedescent *fde;
const struct fdescenttbl *fdt;
@@ -3048,7 +3057,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
const cap_rights_t *haverights;
cap_rights_t rights;
seqc_t seq;
- int fd;
+ int fd, flags;
VFS_SMR_ASSERT_ENTERED();
@@ -3068,7 +3077,9 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
return (EAGAIN);
if (__predict_false(cap_check_inline_transient(haverights, &rights)))
return (EAGAIN);
- *fsearch = ((fp->f_flag & FSEARCH) != 0);
+ flags = fp->f_flag & FSEARCH;
+ flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+ O_RESOLVE_BENEATH : 0;
vp = fp->f_vnode;
if (__predict_false(vp == NULL)) {
return (EAGAIN);
@@ -3102,17 +3113,19 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
#endif
}
*vpp = vp;
+ *flagsp = flags;
return (0);
}
#else
int
-fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
{
+ const struct filedescent *fde;
const struct fdescenttbl *fdt;
struct filedesc *fdp;
struct file *fp;
struct vnode *vp;
- int fd;
+ int fd, flags;
VFS_SMR_ASSERT_ENTERED();
@@ -3121,9 +3134,13 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
fdt = fdp->fd_files;
if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
return (EBADF);
- fp = fdt->fdt_ofiles[fd].fde_file;
+ fde = &fdt->fdt_ofiles[fd];
+ fp = fde->fde_file;
if (__predict_false(fp == NULL))
return (EAGAIN);
+ flags = fp->f_flag & FSEARCH;
+ flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+ O_RESOLVE_BENEATH : 0;
*fsearch = ((fp->f_flag & FSEARCH) != 0);
vp = fp->f_vnode;
if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
@@ -3139,6 +3156,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
return (EAGAIN);
filecaps_fill(&ndp->ni_filecaps);
*vpp = vp;
+ *flagsp = flags;
return (0);
}
#endif
@@ -3152,13 +3170,15 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
struct componentname *cnp;
cap_rights_t rights;
int error;
+ uint8_t flags;
td = curthread;
rights = *ndp->ni_rightsneeded;
cap_rights_set_one(&rights, CAP_LOOKUP);
cnp = &ndp->ni_cnd;
- error = fget_cap(td, ndp->ni_dirfd, &rights, &fp, &ndp->ni_filecaps);
+ error = fget_cap(td, ndp->ni_dirfd, &rights, &flags, &fp,
+ &ndp->ni_filecaps);
if (__predict_false(error != 0))
return (error);
if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3176,6 +3196,10 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
*/
if ((fp->f_flag & FSEARCH) != 0)
cnp->cn_flags |= NOEXECCHECK;
+ if ((flags & UF_RESOLVE_BENEATH) != 0) {
+ cnp->cn_flags |= RBENEATH;
+ ndp->ni_resflags |= NIRES_BENEATH;
+ }
fdrop(fp, td);
#ifdef CAPABILITIES
@@ -3223,7 +3247,7 @@ out_free:
#ifdef CAPABILITIES
static int
fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp, seqc_t *seqp)
+ uint8_t *flagsp, struct file **fpp, seqc_t *seqp)
{
struct filedesc *fdp;
const struct filedescent *fde;
@@ -3232,6 +3256,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
seqc_t seq;
cap_rights_t haverights;
int error;
+ uint8_t flags;
fdp = td->td_proc->p_fd;
fdt = fdp->fd_files;
@@ -3243,6 +3268,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
fde = &fdt->fdt_ofiles[fd];
haverights = *cap_rights_fde_inline(fde);
fp = fde->fde_file;
+ flags = fde->fde_flags;
if (__predict_false(fp == NULL)) {
if (seqc_consistent(fd_seqc(fdt, fd), seq))
return (EBADF);
@@ -3271,19 +3297,21 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
fdrop(fp, td);
}
*fpp = fp;
- if (seqp != NULL) {
+ if (flagsp != NULL)
+ *flagsp = flags;
+ if (seqp != NULL)
*seqp = seq;
- }
return (0);
}
#else
static int
fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp, seqc_t *seqp __unused)
+ uint8_t *flagsp, struct file **fpp, seqc_t *seqp __unused)
{
struct filedesc *fdp;
const struct fdescenttbl *fdt;
struct file *fp;
+ uint8_t flags;
fdp = td->td_proc->p_fd;
fdt = fdp->fd_files;
@@ -3292,6 +3320,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
for (;;) {
fp = fdt->fdt_ofiles[fd].fde_file;
+ flags = fdt->fdt_ofiles[fd].fde_flags;
if (__predict_false(fp == NULL))
return (EBADF);
if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) {
@@ -3308,6 +3337,8 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
break;
fdrop(fp, td);
}
+ if (flagsp != NULL)
+ *flagsp = flags;
*fpp = fp;
return (0);
}
@@ -3321,8 +3352,8 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
* racing with itself.
*/
int
-fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp)
+fget_unlocked_flags(struct thread *td, int fd, const cap_rights_t *needrightsp,
+ uint8_t *flagsp, struct file **fpp)
{
struct filedesc *fdp;
#ifdef CAPABILITIES
@@ -3334,6 +3365,7 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
seqc_t seq;
const cap_rights_t *haverights;
#endif
+ uint8_t flags;
fdp = td->td_proc->p_fd;
fdt = fdp->fd_files;
@@ -3346,8 +3378,10 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
fde = &fdt->fdt_ofiles[fd];
haverights = cap_rights_fde_inline(fde);
fp = fde->fde_file;
+ flags = fde->fde_flags;
#else
fp = fdt->fdt_ofiles[fd].fde_file;
+ flags = fdt->fdt_ofiles[fd].fde_flags;
#endif
if (__predict_false(fp == NULL))
goto out_fallback;
@@ -3371,12 +3405,21 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
#endif
goto out_fdrop;
*fpp = fp;
+ if (flagsp != NULL)
+ *flagsp = flags;
return (0);
out_fdrop:
fdrop(fp, td);
out_fallback:
*fpp = NULL;
- return (fget_unlocked_seq(td, fd, needrightsp, fpp, NULL));
+ return (fget_unlocked_seq(td, fd, needrightsp, flagsp, fpp, NULL));
+}
+
+int
+fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
+ struct file **fpp)
+{
+ return (fget_unlocked_flags(td, fd, needrightsp, NULL, fpp));
}
/*
@@ -3528,7 +3571,7 @@ fget_mmap(struct thread *td, int fd, const cap_rights_t *rightsp,
fdp = td->td_proc->p_fd;
MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
for (;;) {
- error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq);
+ error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
if (__predict_false(error != 0))
return (error);
if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3583,7 +3626,7 @@ fget_fcntl(struct thread *td, int fd, const cap_rights_t *rightsp,
*fpp = NULL;
MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
for (;;) {
- error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq);
+ error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
if (error != 0)
return (error);
error = cap_fcntl_check(fdp, fd, needfcntl);
@@ -3645,7 +3688,7 @@ fgetvp_rights(struct thread *td, int fd, const cap_rights_t *needrightsp,
struct file *fp;
int error;
- error = fget_cap(td, fd, needrightsp, &fp, &caps);
+ error = fget_cap(td, fd, needrightsp, NULL, &fp, &caps);
if (error != 0)
return (error);
if (fp->f_ops == &badfileops) {
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 0bbf13936bf9..ad8485028987 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -91,7 +91,7 @@ getsock_cap(struct thread *td, int fd, const cap_rights_t *rightsp,
struct file *fp;
int error;
- error = fget_cap(td, fd, rightsp, &fp, havecapsp);
+ error = fget_cap(td, fd, rightsp, NULL, &fp, havecapsp);
if (__predict_false(error != 0))
return (error);
if (__predict_false(fp->f_type != DTYPE_SOCKET)) {
diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c
index 3106218abce6..6076a5f7bdf8 100644
--- a/sys/kern/vfs_acl.c
+++ b/sys/kern/vfs_acl.c
@@ -434,7 +434,7 @@ sys___acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap)
AUDIT_ARG_FD(uap->filedes);
error = getvnode_path(td, uap->filedes,
- cap_rights_init_one(&rights, CAP_ACL_GET), &fp);
+ cap_rights_init_one(&rights, CAP_ACL_GET), NULL, &fp);
if (error == 0) {
error = vacl_get_acl(td, fp->f_vnode, uap->type, uap->aclp);
fdrop(fp, td);
@@ -569,7 +569,7 @@ sys___acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap)
AUDIT_ARG_FD(uap->filedes);
error = getvnode_path(td, uap->filedes,
- cap_rights_init_one(&rights, CAP_ACL_CHECK), &fp);
+ cap_rights_init_one(&rights, CAP_ACL_CHECK), NULL, &fp);
if (error == 0) {
error = vacl_aclcheck(td, fp->f_vnode, uap->type, uap->aclp);
fdrop(fp, td);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 4ab00698b311..883beaf6d1da 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4528,17 +4528,23 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
{
struct nameidata *ndp;
struct componentname *cnp;
- int error;
- bool fsearch;
+ int error, flags;
ndp = fpl->ndp;
cnp = fpl->cnp;
- error = fgetvp_lookup_smr(ndp, vpp, &fsearch);
+ error = fgetvp_lookup_smr(ndp, vpp, &flags);
if (__predict_false(error != 0)) {
return (cache_fpl_aborted(fpl));
}
- fpl->fsearch = fsearch;
+ if (__predict_false((flags & O_RESOLVE_BENEATH) != 0)) {
+ _Static_assert((CACHE_FPL_SUPPORTED_CN_FLAGS & RBENEATH) == 0,
+ "RBENEATH supported by fplookup");
+ cache_fpl_smr_exit(fpl);
+ cache_fpl_aborted(fpl);
+ return (EOPNOTSUPP);
+ }
+ fpl->fsearch = (flags & FSEARCH) != 0;
if ((*vpp)->v_type != VDIR) {
if (!((cnp->cn_flags & EMPTYPATH) != 0 && cnp->cn_pnbuf[0] == '\0')) {
cache_fpl_smr_exit(fpl);
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 8debf487cc54..1fe7494f3998 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -253,7 +253,7 @@ kern_extattr_set_fd(struct thread *td, int fd, int attrnamespace,
AUDIT_ARG_TEXT(attrname);
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
if (error)
return (error);
@@ -441,7 +441,7 @@ kern_extattr_get_fd(struct thread *td, int fd, int attrnamespace,
AUDIT_ARG_TEXT(attrname);
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_GET), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_GET), NULL, &fp);
if (error)
return (error);
@@ -597,7 +597,7 @@ kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace,
AUDIT_ARG_TEXT(attrname);
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), NULL, &fp);
if (error)
return (error);
@@ -764,7 +764,7 @@ kern_extattr_list_fd(struct thread *td, int fd, int attrnamespace,
AUDIT_ARG_FD(fd);
AUDIT_ARG_VALUE(attrnamespace);
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_LIST), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_LIST), NULL, &fp);
if (error)
return (error);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 0e4847e1c634..c236f241bf20 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -370,7 +370,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
int error;
AUDIT_ARG_FD(fd);
- error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
+ error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
@@ -893,12 +893,17 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
struct mount *mp;
struct file *fp;
int error;
+ uint8_t fdflags;
AUDIT_ARG_FD(uap->fd);
- error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
+ error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags,
&fp);
if (error != 0)
return (error);
+ if ((fdflags & UF_RESOLVE_BENEATH) != 0) {
+ fdrop(fp, td);
+ return (ENOTCAPABLE);
+ }
vp = fp->f_vnode;
vrefact(vp);
fdrop(fp, td);
@@ -1041,10 +1046,15 @@ sys_fchroot(struct thread *td, struct fchroot_args *uap)
struct vnode *vp;
struct file *fp;
int error;
+ uint8_t fdflags;
- error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp);
+ error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp);
if (error != 0)
return (error);
+ if ((fdflags & UF_RESOLVE_BENEATH) != 0) {
+ fdrop(fp, td);
+ return (ENOTCAPABLE);
+ }
vp = fp->f_vnode;
vrefact(vp);
fdrop(fp, td);
@@ -1309,6 +1319,10 @@ success:
else
#endif
fcaps = NULL;
+ if ((nd.ni_resflags & NIRES_BENEATH) != 0)
+ flags |= O_RESOLVE_BENEATH;
+ else
+ flags &= ~O_RESOLVE_BENEATH;
error = finstall_refed(td, fp, &indx, flags, fcaps);
/* On success finstall_refed() consumes fcaps. */
if (error != 0) {
@@ -2013,7 +2027,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode_path(td, fd, &cap_no_rights, &fp);
+ error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp);
if (error != 0)
return (error);
}
@@ -4409,12 +4423,12 @@ out:
*/
int
getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
- struct file **fpp)
+ uint8_t *flagsp, struct file **fpp)
{
struct file *fp;
int error;
- error = fget_unlocked(td, fd, rightsp, &fp);
+ error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp);
if (error != 0)
return (error);
@@ -4451,7 +4465,7 @@ getvnode(struct thread *td, int fd, const cap_rights_t *rightsp,
{
int error;
- error = getvnode_path(td, fd, rightsp, fpp);
+ error = getvnode_path(td, fd, rightsp, NULL, fpp);
if (__predict_false(error != 0))
return (error);
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index c23c7eba0544..9329ecc29c47 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -289,6 +289,8 @@ typedef __pid_t pid_t;
/* file descriptor flags (F_GETFD, F_SETFD) */
#define FD_CLOEXEC 1 /* close-on-exec flag */
+#define FD_RESOLVE_BENEATH 2 /* all lookups relative to fd have
+ O_RESOLVE_BENEATH semantics */
/* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
#define F_RDLCK 1 /* shared or read lock */
diff --git a/sys/sys/file.h b/sys/sys/file.h
index c79759a3f966..284d523147b6 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -301,7 +301,7 @@ int fgetvp_read(struct thread *td, int fd, const cap_rights_t *rightsp,
struct vnode **vpp);
int fgetvp_write(struct thread *td, int fd, const cap_rights_t *rightsp,
struct vnode **vpp);
-int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch);
+int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp);
int fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp);
static __inline __result_use_check bool
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 602d236ff853..55969b2ff4b3 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -148,6 +148,7 @@ struct filedesc_to_leader {
* Per-process open flags.
*/
#define UF_EXCLOSE 0x01 /* auto-close on exec */
+#define UF_RESOLVE_BENEATH 0x02 /* lookups must be beneath this dir */
#ifdef _KERNEL
@@ -278,17 +279,20 @@ struct filedesc_to_leader *
int getvnode(struct thread *td, int fd, const cap_rights_t *rightsp,
struct file **fpp);
int getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
- struct file **fpp);
+ uint8_t *flagsp, struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
int fget_cap_noref(struct filedesc *fdp, int fd,
const cap_rights_t *needrightsp, struct file **fpp,
struct filecaps *havecapsp);
int fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
- struct file **fpp, struct filecaps *havecapsp);
+ uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp);
/* Return a referenced file from an unlocked descriptor. */
int fget_unlocked(struct thread *td, int fd,
const cap_rights_t *needrightsp, struct file **fpp);
+int fget_unlocked_flags(struct thread *td, int fd,
+ const cap_rights_t *needrightsp, uint8_t *flagsp,
+ struct file **fpp);
/* Return a file pointer without a ref. FILEDESC_IS_ONLY_USER must be true. */
int fget_only_user(struct filedesc *fdp, int fd,
const cap_rights_t *needrightsp, struct file **fpp);
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index eda3cc9b6f24..5c245235ace5 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -196,6 +196,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
#define NIRES_ABS 0x00000001 /* Path was absolute */
#define NIRES_STRICTREL 0x00000002 /* Restricted lookup result */
#define NIRES_EMPTYPATH 0x00000004 /* EMPTYPATH used */
+#define NIRES_BENEATH 0x00000008 /* O_RESOLVE_BENEATH is to be inherited */
/*
* Flags in ni_lcf, valid for the duration of the namei call.