svn commit: r364044 - in head: share/man/man9 sys/compat/linuxkpi/common/src sys/kern sys/security/audit sys/sys

Mateusz Guzik mjg at FreeBSD.org
Fri Aug 7 23:06:43 UTC 2020


Author: mjg
Date: Fri Aug  7 23:06:40 2020
New Revision: 364044
URL: https://svnweb.freebsd.org/changeset/base/364044

Log:
  vfs: add VOP_STAT
  
  The current scheme of calling VOP_GETATTR adds avoidable overhead.
  
  An example with tmpfs doing fstat (ops/s):
  before: 7488958
  after:  7913833
  
  Reviewed by:	kib (previous version)
  Differential Revision:	https://reviews.freebsd.org/D25910

Modified:
  head/share/man/man9/Makefile
  head/share/man/man9/VOP_ATTRIB.9
  head/sys/compat/linuxkpi/common/src/linux_compat.c
  head/sys/kern/vfs_default.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vfs_vnops.c
  head/sys/kern/vnode_if.src
  head/sys/security/audit/audit_arg.c
  head/sys/sys/vnode.h

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/share/man/man9/Makefile	Fri Aug  7 23:06:40 2020	(r364044)
@@ -2308,7 +2308,8 @@ MLINKS+=vm_page_insert.9 vm_page_remove.9
 MLINKS+=vm_page_wire.9 vm_page_unwire.9
 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
-	VOP_ATTRIB.9 VOP_SETATTR.9
+	VOP_ATTRIB.9 VOP_SETATTR.9 \
+	VOP_ATTRIB.9 VOP_STAT.9
 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
 	VOP_CREATE.9 VOP_MKNOD.9 \
 	VOP_CREATE.9 VOP_SYMLINK.9

Modified: head/share/man/man9/VOP_ATTRIB.9
==============================================================================
--- head/share/man/man9/VOP_ATTRIB.9	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/share/man/man9/VOP_ATTRIB.9	Fri Aug  7 23:06:40 2020	(r364044)
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 29, 2008
+.Dd August 8, 2020
 .Dt VOP_ATTRIB 9
 .Os
 .Sh NAME
@@ -42,21 +42,51 @@
 .Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
 .Ft int
 .Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
+.Ft int
+.Fn VOP_STAT "struct vnode *vp" "struct stat *sb" "struct ucred *active_cred" \
+"struct ucred *file_cred" "struct thread *td"
 .Sh DESCRIPTION
 These entry points manipulate various attributes of a file or directory,
 including file permissions, owner, group, size,
 access time and modification time.
 .Pp
-The arguments are:
+.Fn VOP_STAT
+returns data in a format suitable for the
+.Xr stat 2
+system call and by default is implemented as a wrapper around
+.Fn VOP_GETATTR .
+Filesystems may want to implement their own variant for performance reasons.
+.Pp
+For
+.Fn VOP_GETATTR
+and
+.Fn VOP_SETATTR
+the arguments are:
 .Bl -tag -width cred
 .It Fa vp
 The vnode of the file.
 .It Fa vap
 The attributes of the file.
 .It Fa cred
-The user credentials of the calling process.
+The user credentials of the calling thread.
 .El
 .Pp
+For
+.Fn VOP_STAT
+the arguments are:
+.Bl -tag -width active_cred
+.It Fa vp
+The vnode of the file.
+.It Fa sb
+The attributes of the file.
+.It Fa active_cred
+The user credentials of the calling thread.
+.It Fa file_cred
+The credentials installed on the file description pointing to the vnode or NOCRED.
+.It Fa td
+The calling thread.
+.El
+.Pp
 Attributes which are not being modified by
 .Fn VOP_SETATTR
 should be set to the value
@@ -67,8 +97,11 @@ the contents of
 .Fa *vap
 prior to setting specific values.
 .Sh LOCKS
+Both
 .Fn VOP_GETATTR
-expects the vnode to be locked on entry and will leave the vnode locked on
+and
+.Fn VOP_STAT
+expect the vnode to be locked on entry and will leave the vnode locked on
 return.
 The lock type can be either shared or exclusive.
 .Pp
@@ -84,6 +117,10 @@ otherwise an appropriate error is returned.
 .Fn VOP_SETATTR
 returns zero if the attributes were changed successfully, otherwise an
 appropriate error is returned.
+.Fn VOP_STAT
+returns 0 if it was able to retrieve the attribute data
+.Fa *sb ,
+otherwise an appropriate error is returned.
 .Sh ERRORS
 .Bl -tag -width Er
 .It Bq Er EPERM

Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_compat.c	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c	Fri Aug  7 23:06:40 2020	(r364044)
@@ -1691,7 +1691,7 @@ linux_file_stat(struct file *fp, struct stat *sb, stru
 	vp = filp->f_vnode;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
-	error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+	error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
 	VOP_UNLOCK(vp);
 
 	return (error);

Modified: head/sys/kern/vfs_default.c
==============================================================================
--- head/sys/kern/vfs_default.c	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/kern/vfs_default.c	Fri Aug  7 23:06:40 2020	(r364044)
@@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/poll.h>
+#include <sys/stat.h>
+#include <security/audit/audit.h>
+#include <sys/priv.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -87,6 +90,7 @@ static int vop_stdadd_writecount(struct vop_add_writec
 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
+static int vop_stdstat(struct vop_stat_args *ap);
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
@@ -114,6 +118,7 @@ struct vop_vector default_vnodeops = {
 	.vop_bmap =		vop_stdbmap,
 	.vop_close =		VOP_NULL,
 	.vop_fsync =		VOP_NULL,
+	.vop_stat =		vop_stdstat,
 	.vop_fdatasync =	vop_stdfdatasync,
 	.vop_getpages =		vop_stdgetpages,
 	.vop_getpages_async =	vop_stdgetpages_async,
@@ -1460,4 +1465,112 @@ vop_sigdefer(struct vop_vector *vop, struct vop_generi
 	rc = bp(a);
 	sigallowstop(prev_stops);
 	return (rc);
+}
+
+static int
+vop_stdstat(struct vop_stat_args *a)
+{
+	struct vattr vattr;
+	struct vattr *vap;
+	struct vnode *vp;
+	struct stat *sb;
+	int error;
+	u_short mode;
+
+	vp = a->a_vp;
+	sb = a->a_sb;
+
+	error = vop_stat_helper_pre(a);
+	if (error != 0)
+		return (error);
+
+	vap = &vattr;
+
+	/*
+	 * Initialize defaults for new and unusual fields, so that file
+	 * systems which don't support these fields don't need to know
+	 * about them.
+	 */
+	vap->va_birthtime.tv_sec = -1;
+	vap->va_birthtime.tv_nsec = 0;
+	vap->va_fsid = VNOVAL;
+	vap->va_rdev = NODEV;
+
+	error = VOP_GETATTR(vp, vap, a->a_active_cred);
+	if (error)
+		goto out;
+
+	/*
+	 * Zero the spare stat fields
+	 */
+	bzero(sb, sizeof *sb);
+
+	/*
+	 * Copy from vattr table
+	 */
+	if (vap->va_fsid != VNOVAL)
+		sb->st_dev = vap->va_fsid;
+	else
+		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
+	sb->st_ino = vap->va_fileid;
+	mode = vap->va_mode;
+	switch (vap->va_type) {
+	case VREG:
+		mode |= S_IFREG;
+		break;
+	case VDIR:
+		mode |= S_IFDIR;
+		break;
+	case VBLK:
+		mode |= S_IFBLK;
+		break;
+	case VCHR:
+		mode |= S_IFCHR;
+		break;
+	case VLNK:
+		mode |= S_IFLNK;
+		break;
+	case VSOCK:
+		mode |= S_IFSOCK;
+		break;
+	case VFIFO:
+		mode |= S_IFIFO;
+		break;
+	default:
+		error = EBADF;
+		goto out;
+	}
+	sb->st_mode = mode;
+	sb->st_nlink = vap->va_nlink;
+	sb->st_uid = vap->va_uid;
+	sb->st_gid = vap->va_gid;
+	sb->st_rdev = vap->va_rdev;
+	if (vap->va_size > OFF_MAX) {
+		error = EOVERFLOW;
+		goto out;
+	}
+	sb->st_size = vap->va_size;
+	sb->st_atim.tv_sec = vap->va_atime.tv_sec;
+	sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
+	sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
+	sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
+	sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
+	sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
+	sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
+	sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
+
+	/*
+	 * According to www.opengroup.org, the meaning of st_blksize is
+	 *   "a filesystem-specific preferred I/O block size for this
+	 *    object.  In some filesystem types, this may vary from file
+	 *    to file"
+	 * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
+	 */
+
+	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
+	sb->st_flags = vap->va_flags;
+	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+	sb->st_gen = vap->va_gen;
+out:
+	return (vop_stat_helper_post(a, error));
 }

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/kern/vfs_syscalls.c	Fri Aug  7 23:06:40 2020	(r364044)
@@ -1867,7 +1867,7 @@ restart:
 	if (vp->v_type == VDIR && oldinum == 0) {
 		error = EPERM;		/* POSIX */
 	} else if (oldinum != 0 &&
-		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+		  ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 		  sb.st_ino != oldinum) {
 		error = EIDRM;	/* Identifier removed */
 	} else if (fp != NULL && fp->f_vnode != vp) {
@@ -2381,7 +2381,7 @@ kern_statat(struct thread *td, int flag, int fd, const
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
+	error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
 	if (error == 0) {
 		SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode);
 		if (S_ISREG(sbp->st_mode))
@@ -4566,7 +4566,7 @@ kern_fhstat(struct thread *td, struct fhandle fh, stru
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
-	error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+	error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	return (error);
 }

Modified: head/sys/kern/vfs_vnops.c
==============================================================================
--- head/sys/kern/vfs_vnops.c	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/kern/vfs_vnops.c	Fri Aug  7 23:06:40 2020	(r364044)
@@ -1455,121 +1455,10 @@ vn_statfile(struct file *fp, struct stat *sb, struct u
 	int error;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
-	error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
+	error = VOP_STAT(vp, sb, active_cred, fp->f_cred, td);
 	VOP_UNLOCK(vp);
 
 	return (error);
-}
-
-/*
- * Stat a vnode; implementation for the stat syscall
- */
-int
-vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
-    struct ucred *file_cred, struct thread *td)
-{
-	struct vattr vattr;
-	struct vattr *vap;
-	int error;
-	u_short mode;
-
-	AUDIT_ARG_VNODE1(vp);
-#ifdef MAC
-	error = mac_vnode_check_stat(active_cred, file_cred, vp);
-	if (error)
-		return (error);
-#endif
-
-	vap = &vattr;
-
-	/*
-	 * Initialize defaults for new and unusual fields, so that file
-	 * systems which don't support these fields don't need to know
-	 * about them.
-	 */
-	vap->va_birthtime.tv_sec = -1;
-	vap->va_birthtime.tv_nsec = 0;
-	vap->va_fsid = VNOVAL;
-	vap->va_rdev = NODEV;
-
-	error = VOP_GETATTR(vp, vap, active_cred);
-	if (error)
-		return (error);
-
-	/*
-	 * Zero the spare stat fields
-	 */
-	bzero(sb, sizeof *sb);
-
-	/*
-	 * Copy from vattr table
-	 */
-	if (vap->va_fsid != VNOVAL)
-		sb->st_dev = vap->va_fsid;
-	else
-		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
-	sb->st_ino = vap->va_fileid;
-	mode = vap->va_mode;
-	switch (vap->va_type) {
-	case VREG:
-		mode |= S_IFREG;
-		break;
-	case VDIR:
-		mode |= S_IFDIR;
-		break;
-	case VBLK:
-		mode |= S_IFBLK;
-		break;
-	case VCHR:
-		mode |= S_IFCHR;
-		break;
-	case VLNK:
-		mode |= S_IFLNK;
-		break;
-	case VSOCK:
-		mode |= S_IFSOCK;
-		break;
-	case VFIFO:
-		mode |= S_IFIFO;
-		break;
-	default:
-		return (EBADF);
-	}
-	sb->st_mode = mode;
-	sb->st_nlink = vap->va_nlink;
-	sb->st_uid = vap->va_uid;
-	sb->st_gid = vap->va_gid;
-	sb->st_rdev = vap->va_rdev;
-	if (vap->va_size > OFF_MAX)
-		return (EOVERFLOW);
-	sb->st_size = vap->va_size;
-	sb->st_atim.tv_sec = vap->va_atime.tv_sec;
-	sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
-	sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
-	sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
-	sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
-	sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
-	sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
-	sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
-
-	/*
-	 * According to www.opengroup.org, the meaning of st_blksize is 
-	 *   "a filesystem-specific preferred I/O block size for this 
-	 *    object.  In some filesystem types, this may vary from file
-	 *    to file"
-	 * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
-	 */
-
-	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
-
-	sb->st_flags = vap->va_flags;
-	if (priv_check_cred_vfs_generation(td->td_ucred))
-		sb->st_gen = 0;
-	else
-		sb->st_gen = vap->va_gen;
-
-	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
-	return (0);
 }
 
 /*

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/kern/vnode_if.src	Fri Aug  7 23:06:40 2020	(r364044)
@@ -177,6 +177,17 @@ vop_accessx {
 };
 
 
+%% stat	vp	L L L
+
+vop_stat {
+	IN struct vnode *vp;
+	OUT struct stat *sb;
+	IN struct ucred *active_cred;
+	IN struct ucred *file_cred;
+	IN struct thread *td;
+};
+
+
 %% getattr	vp	L L L
 
 vop_getattr {

Modified: head/sys/security/audit/audit_arg.c
==============================================================================
--- head/sys/security/audit/audit_arg.c	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/security/audit/audit_arg.c	Fri Aug  7 23:06:40 2020	(r364044)
@@ -854,7 +854,7 @@ audit_arg_upath2_canon(char *upath)
  * It is assumed that the caller will hold any vnode locks necessary to
  * perform a VOP_GETATTR() on the passed vnode.
  *
- * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always
+ * XXX: The attr code is very similar to vfs_default.c:vop_stdstat(), but always
  * provides access to the generation number as we need that to construct the
  * BSM file ID.
  *

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h	Fri Aug  7 19:58:16 2020	(r364043)
+++ head/sys/sys/vnode.h	Fri Aug  7 23:06:40 2020	(r364044)
@@ -737,8 +737,6 @@ int	vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp,
 	    struct thread *td);
 int	vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
 	    struct thread *td);
-int	vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
-	    struct ucred *file_cred, struct thread *td);
 int	vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
 int	vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
 	    int flags);
@@ -892,6 +890,22 @@ void	vop_need_inactive_debugpost(void *a, int rc);
 #endif
 
 void	vop_rename_fail(struct vop_rename_args *ap);
+
+#define	vop_stat_helper_pre(ap)	({						\
+	int _error;								\
+	AUDIT_ARG_VNODE1(ap->a_vp);						\
+	_error = mac_vnode_check_stat(ap->a_active_cred, ap->a_file_cred, ap->a_vp);\
+	if (__predict_true(_error == 0))					\
+		bzero(ap->a_sb, sizeof(*ap->a_sb));				\
+	_error;									\
+})
+
+#define	vop_stat_helper_post(ap, error)	({					\
+	int _error = (error);							\
+	if (priv_check_cred_vfs_generation(ap->a_td->td_ucred))			\
+		ap->a_sb->st_gen = 0;						\
+	_error;									\
+})
 
 #define	VOP_WRITE_PRE(ap)						\
 	struct vattr va;						\


More information about the svn-src-head mailing list