git: 6dae49bb3b4d - stable/13 - vfs: vn_dir_next_dirent(): Simplify interface and harden

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Fri, 05 May 2023 06:38:56 UTC
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=6dae49bb3b4de1de5648485530558d4a868d9b61

commit 6dae49bb3b4de1de5648485530558d4a868d9b61
Author:     Olivier Certner <olce.freebsd@certner.fr>
AuthorDate: 2023-04-24 08:25:15 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-05-05 06:20:58 +0000

    vfs: vn_dir_next_dirent(): Simplify interface and harden
    
    (cherry picked from commit 3d8450db4c603d18aa45422159170e133c95214d)
---
 sys/kern/vfs_default.c |  94 ++++++++++++++++-------------
 sys/kern/vfs_vnops.c   | 156 ++++++++++++++++++++++++++++++++++++++++---------
 sys/sys/dirent.h       |   5 +-
 sys/sys/vnode.h        |   6 +-
 4 files changed, 188 insertions(+), 73 deletions(-)

diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 502a81d3036c..75738aba359b 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -275,49 +275,56 @@ vop_nostrategy (struct vop_strategy_args *ap)
 }
 
 /*
- * Check if a named file exists in a given directory vnode.
+ * Check if a named file exists in a given directory vnode
+ *
+ * Returns 0 if the file exists, ENOENT if it doesn't, or errors returned by
+ * vn_dir_next_dirent().
  */
 static int
 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
 {
-	char *dirbuf, *cpos;
-	int error, eofflag, dirbuflen, len, found;
+	char *dirbuf;
+	int error, eofflag;
+	size_t dirbuflen, len;
 	off_t off;
 	struct dirent *dp;
 	struct vattr va;
 
-	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
+	ASSERT_VOP_LOCKED(vp, "vnode not locked");
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
-	found = 0;
-
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
-	if (error)
-		return (found);
+	if (error != 0)
+		return (error);
 
-	dirbuflen = DEV_BSIZE;
+	dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
-	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
+	dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
 
-	off = 0;
 	len = 0;
-	do {
-		error = vn_dir_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
-		    &cpos, &len, &eofflag, td);
-		if (error)
+	off = 0;
+	eofflag = 0;
+
+	for (;;) {
+		error = vn_dir_next_dirent(vp, td, dirbuf, dirbuflen,
+		    &dp, &len, &off, &eofflag);
+		if (error != 0)
 			goto out;
 
+		if (len == 0)
+			break;
+
 		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
-		    strcmp(dp->d_name, dirname) == 0) {
-			found = 1;
+		    strcmp(dp->d_name, dirname) == 0)
 			goto out;
-		}
-	} while (len > 0 || !eofflag);
+	}
+
+	error = ENOENT;
 
 out:
 	free(dirbuf, M_TEMP);
-	return (found);
+	return (error);
 }
 
 int
@@ -670,27 +677,24 @@ vop_stdvptofh(struct vop_vptofh_args *ap)
 int
 vop_stdvptocnp(struct vop_vptocnp_args *ap)
 {
-	struct vnode *vp = ap->a_vp;
-	struct vnode **dvp = ap->a_vpp;
-	struct ucred *cred;
+	struct vnode *const vp = ap->a_vp;
+	struct vnode **const dvp = ap->a_vpp;
 	char *buf = ap->a_buf;
 	size_t *buflen = ap->a_buflen;
-	char *dirbuf, *cpos;
-	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
+	char *dirbuf;
+	int i = *buflen;
+	int error = 0, covered = 0;
+	int eofflag, flags, locked;
+	size_t dirbuflen, len;
 	off_t off;
 	ino_t fileno;
 	struct vattr va;
 	struct nameidata nd;
-	struct thread *td;
+	struct thread *const td = curthread;
+	struct ucred *const cred = td->td_ucred;
 	struct dirent *dp;
 	struct vnode *mvp;
 
-	i = *buflen;
-	error = 0;
-	covered = 0;
-	td = curthread;
-	cred = td->td_ucred;
-
 	if (vp->v_type != VDIR)
 		return (ENOENT);
 
@@ -727,31 +731,38 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap)
 
 	fileno = va.va_fileid;
 
-	dirbuflen = DEV_BSIZE;
+	dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
-	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
+	dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	if ((*dvp)->v_type != VDIR) {
 		error = ENOENT;
 		goto out;
 	}
 
-	off = 0;
 	len = 0;
-	do {
+	off = 0;
+	eofflag = 0;
+
+	for (;;) {
 		/* call VOP_READDIR of parent */
-		error = vn_dir_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
-		    &cpos, &len, &eofflag, td);
-		if (error)
+		error = vn_dir_next_dirent(*dvp, td,
+		    dirbuf, dirbuflen, &dp, &len, &off, &eofflag);
+		if (error != 0)
+			goto out;
+
+		if (len == 0) {
+			error = ENOENT;
 			goto out;
+		}
 
 		if ((dp->d_type != DT_WHT) &&
 		    (dp->d_fileno == fileno)) {
 			if (covered) {
 				VOP_UNLOCK(*dvp);
 				vn_lock(mvp, LK_SHARED | LK_RETRY);
-				if (dirent_exists(mvp, dp->d_name, td)) {
+				if (dirent_exists(mvp, dp->d_name, td) == 0) {
 					error = ENOENT;
 					VOP_UNLOCK(mvp);
 					vn_lock(*dvp, LK_SHARED | LK_RETRY);
@@ -774,8 +785,7 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap)
 			}
 			goto out;
 		}
-	} while (len > 0 || !eofflag);
-	error = ENOENT;
+	}
 
 out:
 	free(dirbuf, M_TEMP);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index c35370d9d6b3..6206c521ba48 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -3597,20 +3597,112 @@ vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
 
 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
 
+/*
+ * Keep this assert as long as sizeof(struct dirent) is used as the maximum
+ * entry size.
+ */
+_Static_assert(_GENERIC_MAXDIRSIZ == sizeof(struct dirent),
+    "'struct dirent' size must be a multiple of its alignment "
+    "(see _GENERIC_DIRLEN())");
+
+/*
+ * Returns successive directory entries through some caller's provided buffer
+ *
+ * This function automatically refills the provided buffer with calls to
+ * VOP_READDIR() (after MAC permission checks).
+ *
+ * 'td' is used for credentials and passed to uiomove(). 'dirbuf' is the
+ * caller's buffer to fill and 'dirbuflen' its allocated size. 'dirbuf' must be
+ * properly aligned to access 'struct dirent' structures and 'dirbuflen' must
+ * be greater than GENERIC_MAXDIRSIZ to avoid VOP_READDIR() returning EINVAL
+ * (the latter is not a strong guarantee (yet); but EINVAL will always be
+ * returned if this requirement is not verified). '*dpp' points to the current
+ * directory entry in the buffer and '*len' contains the remaining valid bytes
+ * in 'dirbuf' after 'dpp' (including the pointed entry).
+ *
+ * At first call (or when restarting the read), '*len' must have been set to 0,
+ * '*off' to 0 (or any valid start offset) and '*eofflag' to 0. There are no
+ * more entries as soon as '*len' is 0 after a call that returned 0. Calling
+ * again this function after such a condition is considered an error and EINVAL
+ * will be returned. Other possible error codes are those of VOP_READDIR(),
+ * EINTEGRITY if the returned entries do not pass coherency tests, or EINVAL
+ * (bad call). All errors are unrecoverable, i.e., the state ('*len', '*off'
+ * and '*eofflag') must be re-initialized before a subsequent call. On error or
+ * at end of directory, '*dpp' is reset to NULL.
+ *
+ * '*len', '*off' and '*eofflag' are internal state the caller should not
+ * tamper with except as explained above. '*off' is the next directory offset
+ * to read from to refill the buffer. '*eofflag' is set to 0 or 1 by the last
+ * internal call to VOP_READDIR() that returned without error, indicating
+ * whether it reached the end of the directory, and to 2 by this function after
+ * all entries have been read.
+ */
 int
-vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
-    int dirbuflen, off_t *off, char **cpos, int *len,
-    int *eofflag, struct thread *td)
+vn_dir_next_dirent(struct vnode *vp, struct thread *td,
+    char *dirbuf, size_t dirbuflen,
+    struct dirent **dpp, size_t *len, off_t *off, int *eofflag)
 {
-	int error, reclen;
+	struct dirent *dp = NULL;
+	int reclen;
+	int error;
 	struct uio uio;
 	struct iovec iov;
-	struct dirent *dp;
 
-	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
-	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
+	ASSERT_VOP_LOCKED(vp, "vnode not locked");
+	VNASSERT(vp->v_type == VDIR, vp, ("vnode is not a directory"));
+	MPASS2((uintptr_t)dirbuf < (uintptr_t)dirbuf + dirbuflen,
+	    "Address space overflow");
+
+	if (__predict_false(dirbuflen < GENERIC_MAXDIRSIZ)) {
+		/* Don't take any chances in this case */
+		error = EINVAL;
+		goto out;
+	}
+
+	if (*len != 0) {
+		dp = *dpp;
+
+		/*
+		 * The caller continued to call us after an error (we set dp to
+		 * NULL in a previous iteration). Bail out right now.
+		 */
+		if (__predict_false(dp == NULL))
+			return (EINVAL);
+
+		MPASS(*len <= dirbuflen);
+		MPASS2((uintptr_t)dirbuf <= (uintptr_t)dp &&
+		    (uintptr_t)dp + *len <= (uintptr_t)dirbuf + dirbuflen,
+		    "Filled range not inside buffer");
+
+		reclen = dp->d_reclen;
+		if (reclen >= *len) {
+			/* End of buffer reached */
+			*len = 0;
+		} else {
+			dp = (struct dirent *)((char *)dp + reclen);
+			*len -= reclen;
+		}
+	}
 
 	if (*len == 0) {
+		dp = NULL;
+
+		/* Have to refill. */
+		switch (*eofflag) {
+		case 0:
+			break;
+
+		case 1:
+			/* Nothing more to read. */
+			*eofflag = 2; /* Remember the caller reached EOF. */
+			goto success;
+
+		default:
+			/* The caller didn't test for EOF. */
+			error = EINVAL;
+			goto out;
+		}
+
 		iov.iov_base = dirbuf;
 		iov.iov_len = dirbuflen;
 
@@ -3622,40 +3714,50 @@ vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
 		uio.uio_rw = UIO_READ;
 		uio.uio_td = td;
 
-		*eofflag = 0;
-
 #ifdef MAC
 		error = mac_vnode_check_readdir(td->td_ucred, vp);
 		if (error == 0)
 #endif
 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
-		    		NULL, NULL);
-		if (error)
-			return (error);
+			    NULL, NULL);
+		if (error != 0)
+			goto out;
 
+		*len = dirbuflen - uio.uio_resid;
 		*off = uio.uio_offset;
 
-		*cpos = dirbuf;
-		*len = (dirbuflen - uio.uio_resid);
-
-		if (*len == 0)
-			return (ENOENT);
-	}
+		if (*len == 0) {
+			/* Sanity check on INVARIANTS. */
+			MPASS(*eofflag != 0);
+			*eofflag = 1;
+			goto success;
+		}
 
-	dp = (struct dirent *)(*cpos);
-	reclen = dp->d_reclen;
-	*dpp = dp;
+		/*
+		 * Normalize the flag returned by VOP_READDIR(), since we use 2
+		 * as a sentinel value.
+		 */
+		if (*eofflag != 0)
+			*eofflag = 1;
 
-	/* check for malformed directory.. */
-	if (reclen < DIRENT_MINSIZE)
-		return (EINVAL);
+		dp = (struct dirent *)dirbuf;
+	}
 
-	*cpos += reclen;
-	*len -= reclen;
+	if (__predict_false(*len < GENERIC_MINDIRSIZ ||
+	    dp->d_reclen < GENERIC_MINDIRSIZ)) {
+		error = EINTEGRITY;
+		dp = NULL;
+		goto out;
+	}
 
-	return (0);
+success:
+	error = 0;
+out:
+	*dpp = dp;
+	return (error);
 }
 
+
 static u_long vn_lock_pair_pause_cnt;
 SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
     &vn_lock_pair_pause_cnt, 0,
diff --git a/sys/sys/dirent.h b/sys/sys/dirent.h
index b3c5e00cd9ad..9087b01fa597 100644
--- a/sys/sys/dirent.h
+++ b/sys/sys/dirent.h
@@ -122,11 +122,14 @@ struct freebsd11_dirent {
 #define	_GENERIC_DIRLEN(namlen)					\
 	((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7)
 #define	_GENERIC_DIRSIZ(dp)	_GENERIC_DIRLEN((dp)->d_namlen)
+#define	_GENERIC_MINDIRSIZ	_GENERIC_DIRLEN(1) /* Name must not be empty */
+#define	_GENERIC_MAXDIRSIZ	_GENERIC_DIRLEN(MAXNAMLEN)
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 #define	GENERIC_DIRSIZ(dp)	_GENERIC_DIRSIZ(dp)
-
+#define	GENERIC_MINDIRSIZ	_GENERIC_MINDIRSIZ
+#define	GENERIC_MAXDIRSIZ	_GENERIC_MAXDIRSIZ
 /*
  * Ensure that padding bytes are zeroed and that the name is NUL-terminated.
  */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index d5d2776b2f5e..7b15cb95f63f 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1096,9 +1096,9 @@ void vfs_hash_remove(struct vnode *vp);
 
 int vfs_kqfilter(struct vop_kqfilter_args *);
 struct dirent;
-int vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
-    int dirbuflen, off_t *off, char **cpos, int *len,
-    int *eofflag, struct thread *td);
+int vn_dir_next_dirent(struct vnode *vp, struct thread *td,
+    char *dirbuf, size_t dirbuflen,
+    struct dirent **dpp, size_t *len, off_t *off, int *eofflag);
 int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
 int vfs_emptydir(struct vnode *vp);