svn commit: r366597 - in head/sys: kern sys

Mateusz Guzik mjg at FreeBSD.org
Sat Oct 10 03:48:18 UTC 2020


Author: mjg
Date: Sat Oct 10 03:48:17 2020
New Revision: 366597
URL: https://svnweb.freebsd.org/changeset/base/366597

Log:
  vfs: support lockless dirfd lookups

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/vfs_cache.c
  head/sys/sys/file.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Sat Oct 10 01:13:14 2020	(r366596)
+++ head/sys/kern/kern_descrip.c	Sat Oct 10 03:48:17 2020	(r366597)
@@ -2708,6 +2708,111 @@ get_locked:
 	return (error);
 }
 
+#ifdef CAPABILITIES
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+{
+	const struct filedescent *fde;
+	const struct fdescenttbl *fdt;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct vnode *vp;
+	const cap_rights_t *haverights;
+	cap_rights_t rights;
+	seqc_t seq;
+
+	VFS_SMR_ASSERT_ENTERED();
+
+	rights = *ndp->ni_rightsneeded;
+	cap_rights_set_one(&rights, CAP_LOOKUP);
+
+	fdp = curproc->p_fd;
+	fdt = fdp->fd_files;
+	if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+		return (EBADF);
+	seq = seqc_read_any(fd_seqc(fdt, fd));
+	if (__predict_false(seqc_in_modify(seq)))
+		return (EAGAIN);
+	fde = &fdt->fdt_ofiles[fd];
+	haverights = cap_rights_fde_inline(fde);
+	fp = fde->fde_file;
+	if (__predict_false(fp == NULL))
+		return (EAGAIN);
+	if (__predict_false(cap_check_inline_transient(haverights, &rights)))
+		return (EAGAIN);
+	*fsearch = ((fp->f_flag & FSEARCH) != 0);
+	vp = fp->f_vnode;
+	if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+		return (EAGAIN);
+	}
+	if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) {
+		return (EAGAIN);
+	}
+	/*
+	 * Use an acquire barrier to force re-reading of fdt so it is
+	 * refreshed for verification.
+	 */
+	atomic_thread_fence_acq();
+	fdt = fdp->fd_files;
+	if (__predict_false(!seqc_consistent_nomb(fd_seqc(fdt, fd), seq)))
+		return (EAGAIN);
+	/*
+	 * If file descriptor doesn't have all rights,
+	 * all lookups relative to it must also be
+	 * strictly relative.
+	 *
+	 * Not yet supported by fast path.
+	 */
+	CAP_ALL(&rights);
+	if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
+	    ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
+	    ndp->ni_filecaps.fc_nioctls != -1) {
+#ifdef notyet
+		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
+#else
+		return (EAGAIN);
+#endif
+	}
+	*vpp = vp;
+	return (0);
+}
+#else
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+{
+	const struct fdescenttbl *fdt;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct vnode *vp;
+
+	VFS_SMR_ASSERT_ENTERED();
+
+	fdp = curproc->p_fd;
+	fdt = fdp->fd_files;
+	if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+		return (EBADF);
+	fp = fdt->fdt_ofiles[fd].fde_file;
+	if (__predict_false(fp == NULL))
+		return (EAGAIN);
+	*fsearch = ((fp->f_flag & FSEARCH) != 0);
+	vp = fp->f_vnode;
+	if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+		return (EAGAIN);
+	}
+	/*
+	 * Use an acquire barrier to force re-reading of fdt so it is
+	 * refreshed for verification.
+	 */
+	atomic_thread_fence_acq();
+	fdt = fdp->fd_files;
+	if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file))
+		return (EAGAIN);
+	filecaps_fill(&ndp->ni_filecaps);
+	*vpp = vp;
+	return (0);
+}
+#endif
+
 int
 fget_unlocked_seq(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, seqc_t *seqp)

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c	Sat Oct 10 01:13:14 2020	(r366596)
+++ head/sys/kern/vfs_cache.c	Sat Oct 10 03:48:17 2020	(r366597)
@@ -3189,6 +3189,7 @@ struct cache_fpl {
 	int line;
 	enum cache_fpl_status status:8;
 	bool in_smr;
+	bool fsearch;
 };
 
 static void
@@ -3346,10 +3347,6 @@ cache_can_fplookup(struct cache_fpl *fpl)
 		cache_fpl_aborted(fpl);
 		return (false);
 	}
-	if (ndp->ni_dirfd != AT_FDCWD) {
-		cache_fpl_aborted(fpl);
-		return (false);
-	}
 	if (IN_CAPABILITY_MODE(td)) {
 		cache_fpl_aborted(fpl);
 		return (false);
@@ -3365,6 +3362,23 @@ cache_can_fplookup(struct cache_fpl *fpl)
 	return (true);
 }
 
+static int
+cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
+{
+	struct nameidata *ndp;
+	int error;
+	bool fsearch;
+
+	ndp = fpl->ndp;
+	error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &fsearch);
+	if (__predict_false(error != 0)) {
+		cache_fpl_smr_exit(fpl);
+		return (cache_fpl_aborted(fpl));
+	}
+	fpl->fsearch = fsearch;
+	return (0);
+}
+
 static bool
 cache_fplookup_vnode_supported(struct vnode *vp)
 {
@@ -4046,9 +4060,11 @@ cache_fplookup_parse_advance(struct cache_fpl *fpl)
 static int __noinline
 cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
 {
+	struct componentname *cnp;
 	struct vnode *dvp;
 	seqc_t dvp_seqc;
 
+	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
@@ -4070,6 +4086,32 @@ cache_fplookup_failed_vexec(struct cache_fpl *fpl, int
 		error = ENOTDIR;
 	}
 
+	/*
+	 * Hack: handle O_SEARCH.
+	 *
+	 * Open Group Base Specifications Issue 7, 2018 edition states:
+	 * If the access mode of the open file description associated with the
+	 * file descriptor is not O_SEARCH, the function shall check whether
+	 * directory searches are permitted using the current permissions of
+	 * the directory underlying the file descriptor. If the access mode is
+	 * O_SEARCH, the function shall not perform the check.
+	 *
+	 * Regular lookup tests for the NOEXECCHECK flag for every path
+	 * component to decide whether to do the permission check. However,
+	 * since most lookups never have the flag (and when they do it is only
+	 * present for the first path component), lockless lookup only acts on
+	 * it if there is a permission problem. Here the flag is represented
+	 * with a boolean so that we don't have to clear it on the way out.
+	 *
+	 * For simplicity this always aborts.
+	 * TODO: check if this is the first lookup and ignore the permission
+	 * problem. Note the flag has to survive fallback (if it happens to be
+	 * performed).
+	 */
+	if (fpl->fsearch) {
+		return (cache_fpl_aborted(fpl));
+	}
+
 	switch (error) {
 	case EAGAIN:
 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
@@ -4308,6 +4350,7 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_s
 	cache_fpl_checkpoint(&fpl, &orig);
 
 	cache_fpl_smr_enter_initial(&fpl);
+	fpl.fsearch = false;
 	pwd = pwd_get_smr();
 	fpl.pwd = pwd;
 	ndp->ni_rootdir = pwd->pwd_rdir;
@@ -4318,13 +4361,20 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_s
 	if (cnp->cn_pnbuf[0] == '/') {
 		cache_fpl_handle_root(ndp, &dvp);
 	} else {
-		MPASS(ndp->ni_dirfd == AT_FDCWD);
-		dvp = pwd->pwd_cdir;
+		if (ndp->ni_dirfd == AT_FDCWD) {
+			dvp = pwd->pwd_cdir;
+		} else {
+			error = cache_fplookup_dirfd(&fpl, &dvp);
+			if (__predict_false(error != 0)) {
+				goto out;
+			}
+		}
 	}
 
 	SDT_PROBE4(vfs, namei, lookup, entry, dvp, cnp->cn_pnbuf, cnp->cn_flags, true);
 
 	error = cache_fplookup_impl(dvp, &fpl);
+out:
 	cache_fpl_smr_assert_not_entered(&fpl);
 	SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
 

Modified: head/sys/sys/file.h
==============================================================================
--- head/sys/sys/file.h	Sat Oct 10 01:13:14 2020	(r366596)
+++ head/sys/sys/file.h	Sat Oct 10 03:48:17 2020	(r366597)
@@ -52,6 +52,7 @@ struct thread;
 struct uio;
 struct knote;
 struct vnode;
+struct nameidata;
 
 #endif /* _KERNEL */
 
@@ -279,6 +280,7 @@ int fgetvp_read(struct thread *td, int fd, cap_rights_
     struct vnode **vpp);
 int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
     struct vnode **vpp);
+int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch);
 
 static __inline __result_use_check bool
 fhold(struct file *fp)


More information about the svn-src-all mailing list