svn commit: r211932 - in head/sys/cddl: compat/opensolaris/kern compat/opensolaris/sys contrib/opensolaris/uts/common contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys...

Martin Matuska mm at FreeBSD.org
Sat Aug 28 09:24:11 UTC 2010


Author: mm
Date: Sat Aug 28 09:24:11 2010
New Revision: 211932
URL: http://svn.freebsd.org/changeset/base/211932

Log:
  Import changes from OpenSolaris that provide
  - better ACL caching and speedup of ACL permission checks
  - faster handling of stat()
  - lowered mutex contention in the read/writer lock (rrwlock)
  - several related bugfixes
  
  Detailed information (OpenSolaris onnv changesets and Bug IDs):
  
  9749:105f407a2680
  6802734	Support for Access Based Enumeration (not used on FreeBSD)
  6844861	inconsistent xattr readdir behavior with too-small buffer
  
  9866:ddc5f1d8eb4e
  6848431	zfs with rstchown=0 or file_chown_self privilege allows user to "take" ownership
  
  9981:b4907297e740
  6775100	stat() performance on files on zfs should be improved
  6827779	rrwlock is overly protective of its counters
  
  10143:d2d432dfe597
  6857433	memory leaks found at: zfs_acl_alloc/zfs_acl_node_alloc
  6860318	truncate() on zfsroot succeeds when file has a component of its path set without access permission
  
  10232:f37b85f7e03e
  6865875	zfs sometimes incorrectly giving search access to a dir
  
  10250:b179ceb34b62
  6867395	zpool_upgrade_007_pos testcase panic'd with BAD TRAP: type=e (#pf Page fault)
  
  10269:2788675568fd
  6868276	zfs_rezget() can be hazardous when znode has a cached ACL
  
  10295:f7a18a1e9610
  6870564	panic in zfs_getsecattr
  
  Approved by:	delphij (mentor)
  Obtained from:	OpenSolaris (multiple Bug IDs)
  MFC after:	2 weeks

Modified:
  head/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
  head/sys/cddl/compat/opensolaris/sys/policy.h
  head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
  head/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h

Modified: head/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c
==============================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_policy.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -332,7 +332,7 @@ secpolicy_vnode_owner(struct vnode *vp, 
 }
 
 int
-secpolicy_vnode_chown(struct vnode *vp, cred_t *cred, boolean_t check_self)
+secpolicy_vnode_chown(struct vnode *vp, cred_t *cred, uid_t owner)
 {
 
 	if (secpolicy_fs_owner(vp->v_mount, cred) == 0)

Modified: head/sys/cddl/compat/opensolaris/sys/policy.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/policy.h	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/compat/opensolaris/sys/policy.h	Sat Aug 28 09:24:11 2010	(r211932)
@@ -47,8 +47,7 @@ int	secpolicy_zinject(struct ucred *cred
 int	secpolicy_fs_unmount(struct ucred *cred, struct mount *vfsp);
 int	secpolicy_basic_link(struct vnode *vp, struct ucred *cred);
 int	secpolicy_vnode_owner(struct vnode *vp, cred_t *cred, uid_t owner);
-int	secpolicy_vnode_chown(struct vnode *vp, cred_t *cred,
-	    boolean_t check_self);
+int	secpolicy_vnode_chown(struct vnode *vp, cred_t *cred, uid_t owner);
 int	secpolicy_vnode_stky_modify(struct ucred *cred);
 int	secpolicy_vnode_remove(struct vnode *vp, struct ucred *cred);
 int	secpolicy_vnode_access(struct ucred *cred, struct vnode *vp,

Modified: head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	Sat Aug 28 09:24:11 2010	(r211932)
@@ -19,6 +19,9 @@
 # CDDL HEADER END
 #
 
+ZUT_OBJS +=			\
+	zut.o
+
 #
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/refcount.h>
 #include <sys/rrwlock.h>
 
@@ -84,7 +82,7 @@ rrn_find(rrwlock_t *rrl)
 	rrw_node_t *rn;
 
 	if (refcount_count(&rrl->rr_linked_rcount) == 0)
-		return (NULL);
+		return (B_FALSE);
 
 	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
 		if (rn->rn_rrl == rrl)
@@ -159,6 +157,14 @@ static void
 rrw_enter_read(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
+#if !defined(DEBUG) && defined(_KERNEL)
+	if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
+		rrl->rr_anon_rcount.rc_count++;
+		mutex_exit(&rrl->rr_lock);
+		return;
+	}
+	DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
+#endif
 	ASSERT(rrl->rr_writer != curthread);
 	ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
 
@@ -208,19 +214,28 @@ void
 rrw_exit(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
+#if !defined(DEBUG) && defined(_KERNEL)
+	if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
+		rrl->rr_anon_rcount.rc_count--;
+		if (rrl->rr_anon_rcount.rc_count == 0)
+			cv_broadcast(&rrl->rr_cv);
+		mutex_exit(&rrl->rr_lock);
+		return;
+	}
+	DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
+#endif
 	ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
 	    !refcount_is_zero(&rrl->rr_linked_rcount) ||
 	    rrl->rr_writer != NULL);
 
 	if (rrl->rr_writer == NULL) {
-		if (rrn_find_and_remove(rrl)) {
-			if (refcount_remove(&rrl->rr_linked_rcount, tag) == 0)
-				cv_broadcast(&rrl->rr_cv);
-
-		} else {
-			if (refcount_remove(&rrl->rr_anon_rcount, tag) == 0)
-				cv_broadcast(&rrl->rr_cv);
-		}
+		int64_t count;
+		if (rrn_find_and_remove(rrl))
+			count = refcount_remove(&rrl->rr_linked_rcount, tag);
+		else
+			count = refcount_remove(&rrl->rr_anon_rcount, tag);
+		if (count == 0)
+			cv_broadcast(&rrl->rr_cv);
 	} else {
 		ASSERT(rrl->rr_writer == curthread);
 		ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h	Sat Aug 28 09:24:11 2010	(r211932)
@@ -200,7 +200,9 @@ int zfs_setacl(struct znode *, vsecattr_
 void zfs_acl_rele(void *);
 void zfs_oldace_byteswap(ace_t *, int);
 void zfs_ace_byteswap(void *, size_t, boolean_t);
+extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
 extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
+int zfs_fastaccesschk_execute(struct znode *, cred_t *);
 extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
 extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
 extern int zfs_acl_access(struct znode *, int, cred_t *);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Sat Aug 28 09:24:11 2010	(r211932)
@@ -74,6 +74,7 @@ extern "C" {
 #define	ZFS_ACL_DEFAULTED	0x20		/* ACL should be defaulted */
 #define	ZFS_ACL_AUTO_INHERIT	0x40		/* ACL should be inherited */
 #define	ZFS_BONUS_SCANSTAMP	0x80		/* Scanstamp in bonus area */
+#define	ZFS_NO_EXECS_DENIED	0x100		/* exec was given to everyone */
 
 /*
  * Is ID ephemeral?
@@ -202,6 +203,7 @@ typedef struct znode {
 	uint64_t	z_gen;		/* generation (same as zp_gen) */
 	uint32_t	z_sync_cnt;	/* synchronous open count */
 	kmutex_t	z_acl_lock;	/* acl data lock */
+	zfs_acl_t	*z_acl_cached;	/* cached acl */
 	list_node_t	z_link_node;	/* all znodes in fs link */
 	/*
 	 * These are dmu managed fields.

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -91,6 +91,8 @@
 #define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
     ZFS_ACL_OBJ_ACE)
 
+#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
+
 static uint16_t
 zfs_ace_v0_get_type(void *acep)
 {
@@ -779,6 +781,7 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t 
 	uint64_t	who;
 	uint16_t	iflags, type;
 	uint32_t	access_mask;
+	boolean_t	an_exec_denied = B_FALSE;
 
 	mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
 
@@ -903,8 +906,32 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t 
 					}
 				}
 			}
+		} else {
+			/*
+			 * Only care if this IDENTIFIER_GROUP or
+			 * USER ACE denies execute access to someone,
+			 * mode is not affected
+			 */
+			if ((access_mask & ACE_EXECUTE) && type == DENY)
+				an_exec_denied = B_TRUE;
 		}
 	}
+
+	/*
+	 * Failure to allow is effectively a deny, so execute permission
+	 * is denied if it was never mentioned or if we explicitly
+	 * weren't allowed it.
+	 */
+	if (!an_exec_denied &&
+	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
+	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
+		an_exec_denied = B_TRUE;
+
+	if (an_exec_denied)
+		zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED;
+	else
+		zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED;
+
 	return (mode);
 }
 
@@ -944,7 +971,8 @@ zfs_acl_node_read_internal(znode_t *zp, 
 }
 
 /*
- * Read an external acl object.
+ * Read an external acl object.  If the intent is to modify, always
+ * create a new acl and leave any cached acl in place.
  */
 static int
 zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
@@ -958,8 +986,15 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t
 
 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 
+	if (zp->z_acl_cached && !will_modify) {
+		*aclpp = zp->z_acl_cached;
+		return (0);
+	}
+
 	if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
 		*aclpp = zfs_acl_node_read_internal(zp, will_modify);
+		if (!will_modify)
+			zp->z_acl_cached = *aclpp;
 		return (0);
 	}
 
@@ -993,6 +1028,8 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t
 	}
 
 	*aclpp = aclp;
+	if (!will_modify)
+		zp->z_acl_cached = aclp;
 	return (0);
 }
 
@@ -1017,11 +1054,16 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t
 
 	dmu_buf_will_dirty(zp->z_dbuf, tx);
 
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
 	zphys->zp_mode = zfs_mode_compute(zp, aclp);
 
 	/*
-	 * Decide which opbject type to use.  If we are forced to
-	 * use old ACL format than transform ACL into zfs_oldace_t
+	 * Decide which object type to use.  If we are forced to
+	 * use old ACL format then transform ACL into zfs_oldace_t
 	 * layout.
 	 */
 	if (!zfsvfs->z_use_fuids) {
@@ -1871,7 +1913,6 @@ zfs_acl_ids_create(znode_t *dzp, int fla
 			mutex_exit(&dzp->z_acl_lock);
 			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
 			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
-			zfs_acl_free(paclp);
 		} else {
 			acl_ids->z_aclp =
 			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
@@ -1972,8 +2013,6 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsec
 	if (mask & VSA_ACE) {
 		size_t aclsz;
 
-		zfs_acl_node_t *aclnode = list_head(&aclp->z_acl);
-
 		aclsz = count * sizeof (ace_t) +
 		    sizeof (ace_object_t) * largeace;
 
@@ -1984,8 +2023,17 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsec
 			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
 			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
 		else {
-			bcopy(aclnode->z_acldata, vsecp->vsa_aclentp,
-			    count * sizeof (ace_t));
+			zfs_acl_node_t *aclnode;
+			void *start = vsecp->vsa_aclentp;
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
+			    aclp->z_acl_bytes);
 		}
 	}
 	if (mask & VSA_ACE_ACLFLAGS) {
@@ -2000,8 +2048,6 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsec
 
 	mutex_exit(&zp->z_acl_lock);
 
-	zfs_acl_free(aclp);
-
 	return (0);
 }
 
@@ -2097,11 +2143,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsec
 		aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
 	}
 top:
-	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) {
-		zfs_acl_free(aclp);
-		return (error);
-	}
-
 	mutex_enter(&zp->z_lock);
 	mutex_enter(&zp->z_acl_lock);
 
@@ -2147,6 +2188,7 @@ top:
 
 	error = zfs_aclset_common(zp, aclp, cr, tx);
 	ASSERT(error == 0);
+	zp->z_acl_cached = aclp;
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
@@ -2156,7 +2198,6 @@ top:
 
 	if (fuidp)
 		zfs_fuid_info_free(fuidp);
-	zfs_acl_free(aclp);
 	dmu_tx_commit(tx);
 done:
 	mutex_exit(&zp->z_acl_lock);
@@ -2166,46 +2207,17 @@ done:
 }
 
 /*
- * working_mode returns the permissions that were not granted
+ * Check accesses of interest (AoI) against attributes of the dataset
+ * such as read-only.  Returns zero if no AoI conflict with dataset
+ * attributes, otherwise an appropriate errno is returned.
  */
 static int
-zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
-    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
 {
-	zfs_acl_t	*aclp;
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	int		error;
-	uid_t		uid = crgetuid(cr);
-	uint64_t 	who;
-	uint16_t	type, iflags;
-	uint16_t	entry_type;
-	uint32_t	access_mask;
-	uint32_t	deny_mask = 0;
-	zfs_ace_hdr_t	*acep = NULL;
-	boolean_t	checkit;
-	uid_t		fowner;
-	uid_t		gowner;
-
-	/*
-	 * Short circuit empty requests
-	 */
-	if (v4_mode == 0)
-		return (0);
-
-	*check_privs = B_TRUE;
-
-	if (zfsvfs->z_replay) {
-		*working_mode = 0;
-		return (0);
-	}
-
-	*working_mode = v4_mode;
-
 	if ((v4_mode & WRITE_MASK) &&
 	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 	    (!IS_DEVVP(ZTOV(zp)) ||
 	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
-		*check_privs = B_FALSE;
 		return (EROFS);
 	}
 
@@ -2217,14 +2229,12 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 	    (zp->z_phys->zp_flags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
 	    (ZTOV(zp)->v_type == VDIR &&
 	    (zp->z_phys->zp_flags & ZFS_IMMUTABLE)))) {
-		*check_privs = B_FALSE;
 		return (EPERM);
 	}
 
 #ifdef sun
 	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
 	    (zp->z_phys->zp_flags & ZFS_NOUNLINK)) {
-		*check_privs = B_FALSE;
 		return (EPERM);
 	}
 #else
@@ -2235,26 +2245,60 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 	 */
 	if ((v4_mode & ACE_DELETE) &&
 	    (zp->z_phys->zp_flags & ZFS_NOUNLINK)) {
-		*check_privs = B_FALSE;
 		return (EPERM);
 	}
 #endif
 
 	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
 	    (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED))) {
-		*check_privs = B_FALSE;
 		return (EACCES);
 	}
 
-	/*
-	 * The caller requested that the ACL check be skipped.  This
-	 * would only happen if the caller checked VOP_ACCESS() with a
-	 * 32 bit ACE mask and already had the appropriate permissions.
-	 */
-	if (skipaclchk) {
-		*working_mode = 0;
-		return (0);
-	}
+	return (0);
+}
+
+/*
+ * The primary usage of this function is to loop through all of the
+ * ACEs in the znode, determining what accesses of interest (AoI) to
+ * the caller are allowed or denied.  The AoI are expressed as bits in
+ * the working_mode parameter.  As each ACE is processed, bits covered
+ * by that ACE are removed from the working_mode.  This removal
+ * facilitates two things.  The first is that when the working mode is
+ * empty (= 0), we know we've looked at all the AoI. The second is
+ * that the ACE interpretation rules don't allow a later ACE to undo
+ * something granted or denied by an earlier ACE.  Removing the
+ * discovered access or denial enforces this rule.  At the end of
+ * processing the ACEs, all AoI that were found to be denied are
+ * placed into the working_mode, giving the caller a mask of denied
+ * accesses.  Returns:
+ *	0		if all AoI granted
+ *	EACCESS 	if the denied mask is non-zero
+ *	other error	if abnormal failure (e.g., IO error)
+ *
+ * A secondary usage of the function is to determine if any of the
+ * AoI are granted.  If an ACE grants any access in
+ * the working_mode, we immediately short circuit out of the function.
+ * This mode is chosen by setting anyaccess to B_TRUE.  The
+ * working_mode is not a denied access mask upon exit if the function
+ * is used in this manner.
+ */
+static int
+zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
+    boolean_t anyaccess, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zfs_acl_t	*aclp;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	uint64_t 	who;
+	uint16_t	type, iflags;
+	uint16_t	entry_type;
+	uint32_t	access_mask;
+	uint32_t	deny_mask = 0;
+	zfs_ace_hdr_t	*acep = NULL;
+	boolean_t	checkit;
+	uid_t		fowner;
+	uid_t		gowner;
 
 	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
 
@@ -2268,6 +2312,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 
 	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 	    &iflags, &type)) {
+		uint32_t mask_matched;
 
 		if (!zfs_acl_valid_ace_type(type, iflags))
 			continue;
@@ -2275,6 +2320,11 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
 			continue;
 
+		/* Skip ACE if it does not affect any AoI */
+		mask_matched = (access_mask & *working_mode);
+		if (!mask_matched)
+			continue;
+
 		entry_type = (iflags & ACE_TYPE_FLAGS);
 
 		checkit = B_FALSE;
@@ -2306,21 +2356,29 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 					checkit = B_TRUE;
 				break;
 			} else {
-				zfs_acl_free(aclp);
 				mutex_exit(&zp->z_acl_lock);
 				return (EIO);
 			}
 		}
 
 		if (checkit) {
-			uint32_t mask_matched = (access_mask & *working_mode);
-
-			if (mask_matched) {
-				if (type == DENY)
-					deny_mask |= mask_matched;
-
-				*working_mode &= ~mask_matched;
+			if (type == DENY) {
+				DTRACE_PROBE3(zfs__ace__denies,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				deny_mask |= mask_matched;
+			} else {
+				DTRACE_PROBE3(zfs__ace__allows,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				if (anyaccess) {
+					mutex_exit(&zp->z_acl_lock);
+					return (0);
+				}
 			}
+			*working_mode &= ~mask_matched;
 		}
 
 		/* Are we done? */
@@ -2329,7 +2387,6 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 	}
 
 	mutex_exit(&zp->z_acl_lock);
-	zfs_acl_free(aclp);
 
 	/* Put the found 'denies' back on the working mode */
 	if (deny_mask) {
@@ -2342,6 +2399,68 @@ zfs_zaccess_common(znode_t *zp, uint32_t
 	return (0);
 }
 
+/*
+ * Return true if any access whatsoever granted, we don't actually
+ * care what access is granted.
+ */
+boolean_t
+zfs_has_access(znode_t *zp, cred_t *cr)
+{
+	uint32_t have = ACE_ALL_PERMS;
+
+	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+		uid_t		owner;
+
+		owner = zfs_fuid_map_id(zp->z_zfsvfs,
+		    zp->z_phys->zp_uid, cr, ZFS_OWNER);
+
+		return (
+		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VREAD) == 0 ||
+		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VWRITE) == 0 ||
+		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VEXEC) == 0 ||
+		    secpolicy_vnode_chown(ZTOV(zp), cr, owner) == 0 ||
+		    secpolicy_vnode_setdac(ZTOV(zp), cr, owner) == 0 ||
+		    secpolicy_vnode_remove(ZTOV(zp), cr) == 0);
+	}
+	return (B_TRUE);
+}
+
+static int
+zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
+    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int err;
+
+	*working_mode = v4_mode;
+	*check_privs = B_TRUE;
+
+	/*
+	 * Short circuit empty requests
+	 */
+	if (v4_mode == 0 || zfsvfs->z_replay) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
+		*check_privs = B_FALSE;
+		return (err);
+	}
+
+	/*
+	 * The caller requested that the ACL check be skipped.  This
+	 * would only happen if the caller checked VOP_ACCESS() with a
+	 * 32 bit ACE mask and already had the appropriate permissions.
+	 */
+	if (skipaclchk) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+}
+
 static int
 zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
     cred_t *cr)
@@ -2353,6 +2472,78 @@ zfs_zaccess_append(znode_t *zp, uint32_t
 	    check_privs, B_FALSE, cr));
 }
 
+int
+zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
+{
+	boolean_t owner = B_FALSE;
+	boolean_t groupmbr = B_FALSE;
+	boolean_t is_attr;
+	uid_t fowner;
+	uid_t gowner;
+	uid_t uid = crgetuid(cr);
+	int error;
+
+	if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
+		return (EACCES);
+
+	is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) &&
+	    (ZTOV(zdp)->v_type == VDIR));
+	if (is_attr)
+		goto slow;
+
+	mutex_enter(&zdp->z_acl_lock);
+
+	if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) {
+		mutex_exit(&zdp->z_acl_lock);
+		return (0);
+	}
+
+	if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 ||
+	    FUID_INDEX(zdp->z_phys->zp_gid) != 0) {
+		mutex_exit(&zdp->z_acl_lock);
+		goto slow;
+	}
+
+	fowner = (uid_t)zdp->z_phys->zp_uid;
+	gowner = (uid_t)zdp->z_phys->zp_gid;
+
+	if (uid == fowner) {
+		owner = B_TRUE;
+		if (zdp->z_phys->zp_mode & S_IXUSR) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (groupmember(gowner, cr)) {
+		groupmbr = B_TRUE;
+		if (zdp->z_phys->zp_mode & S_IXGRP) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (!owner && !groupmbr) {
+		if (zdp->z_phys->zp_mode & S_IXOTH) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		}
+	}
+
+	mutex_exit(&zdp->z_acl_lock);
+
+slow:
+	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
+	ZFS_ENTER(zdp->z_zfsvfs);
+	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
+	ZFS_EXIT(zdp->z_zfsvfs);
+	return (error);
+}
+
 /*
  * Determine whether Access should be granted/denied, invoking least
  * priv subsytem when a deny is determined.
@@ -2457,7 +2648,7 @@ zfs_zaccess(znode_t *zp, int mode, int f
 			    owner, checkmode);
 
 		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
-			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, B_TRUE);
+			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
 		if (error == 0 && (working_mode & ACE_WRITE_ACL))
 			error = secpolicy_vnode_setdac(ZTOV(check_zp), cr, owner);
 
@@ -2466,7 +2657,7 @@ zfs_zaccess(znode_t *zp, int mode, int f
 			error = secpolicy_vnode_remove(ZTOV(check_zp), cr);
 
 		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
-			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, B_FALSE);
+			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
 		}
 		if (error == 0) {
 			/*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -994,6 +994,7 @@ zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 	}
 }
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -1164,6 +1164,27 @@ zfs_access(vnode_t *vp, int mode, int fl
 }
 
 /*
+ * If vnode is for a device return a specfs vnode instead.
+ */
+static int
+specvp_check(vnode_t **vpp, cred_t *cr)
+{
+	int error = 0;
+
+	if (IS_DEVVP(*vpp)) {
+		struct vnode *svp;
+
+		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+		VN_RELE(*vpp);
+		if (svp == NULL)
+			error = ENOSYS;
+		*vpp = svp;
+	}
+	return (error);
+}
+
+
+/*
  * Lookup an entry in a directory, or an extended attribute directory.
  * If it exists, return a held vnode reference for it.
  *
@@ -1192,10 +1213,49 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode
 {
 	znode_t *zdp = VTOZ(dvp);
 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
-	int	error;
+	int	error = 0;
 	int *direntflags = NULL;
 	void *realpnp = NULL;
 
+	/* fast path */
+	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
+
+		if (dvp->v_type != VDIR) {
+			return (ENOTDIR);
+		} else if (zdp->z_dbuf == NULL) {
+			return (EIO);
+		}
+
+		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
+			error = zfs_fastaccesschk_execute(zdp, cr);
+			if (!error) {
+				*vpp = dvp;
+				VN_HOLD(*vpp);
+				return (0);
+			}
+			return (error);
+		} else {
+			vnode_t *tvp = dnlc_lookup(dvp, nm);
+
+			if (tvp) {
+				error = zfs_fastaccesschk_execute(zdp, cr);
+				if (error) {
+					VN_RELE(tvp);
+					return (error);
+				}
+				if (tvp == DNLC_NO_VNODE) {
+					VN_RELE(tvp);
+					return (ENOENT);
+				} else {
+					*vpp = tvp;
+					return (specvp_check(vpp, cr));
+				}
+			}
+		}
+	}
+
+	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
+
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zdp);
 
@@ -1261,21 +1321,8 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode
 	}
 
 	error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
-	if (error == 0) {
-		/*
-		 * Convert device special files
-		 */
-		if (IS_DEVVP(*vpp)) {
-			vnode_t	*svp;
-
-			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
-			VN_RELE(*vpp);
-			if (svp == NULL)
-				error = ENOSYS;
-			else
-				*vpp = svp;
-		}
-	}
+	if (error == 0)
+		error = specvp_check(vpp, cr);
 
 	/* Translate errors and add SAVENAME when needed. */
 	if (cnp->cn_flags & ISLASTCN) {
@@ -1468,6 +1515,7 @@ top:
 		    &acl_ids)) != 0)
 			goto out;
 		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+			zfs_acl_ids_free(&acl_ids);
 			error = EDQUOT;
 			goto out;
 		}
@@ -1564,19 +1612,7 @@ out:
 			VN_RELE(ZTOV(zp));
 	} else {
 		*vpp = ZTOV(zp);
-		/*
-		 * If vnode is for a device return a specfs vnode instead.
-		 */
-		if (IS_DEVVP(*vpp)) {
-			struct vnode *svp;
-
-			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
-			VN_RELE(*vpp);
-			if (svp == NULL) {
-				error = ENOSYS;
-			}
-			*vpp = svp;
-		}
+		error = specvp_check(vpp, cr);
 	}
 
 	ZFS_EXIT(zfsvfs);
@@ -1883,6 +1919,7 @@ top:
 		return (error);
 	}
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
 		return (EDQUOT);
@@ -2274,6 +2311,21 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cre
 			}
 		}
 
+		if (flags & V_RDDIR_ACCFILTER) {
+			/*
+			 * If we have no access at all, don't include
+			 * this entry in the returned information
+			 */
+			znode_t	*ezp;
+			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
+				goto skip_entry;
+			if (!zfs_has_access(ezp, cr)) {
+				VN_RELE(ZTOV(ezp));
+				goto skip_entry;
+			}
+			VN_RELE(ZTOV(ezp));
+		}
+
 		if (flags & V_RDDIR_ENTFLAGS)
 			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
 		else
@@ -2324,6 +2376,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cre
 		if (prefetch)
 			dmu_prefetch(os, objnum, 0, 0);
 
+	skip_entry:
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */
@@ -2712,6 +2765,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, i
 top:
 	attrzp = NULL;
 
+	/* Can this be moved to before the top label? */
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		ZFS_EXIT(zfsvfs);
 		return (EROFS);
@@ -3036,6 +3090,8 @@ top:
 		zp->z_phys->zp_mode = new_mode;
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT3U(err, ==, 0);
+		zp->z_acl_cached = aclp;
+		aclp = NULL;
 		mutex_exit(&zp->z_acl_lock);
 	}
 
@@ -3127,10 +3183,8 @@ out:
 	if (attrzp)
 		VN_RELE(ZTOV(attrzp));
 
-	if (aclp) {
+	if (aclp)
 		zfs_acl_free(aclp);
-		aclp = NULL;
-	}
 
 	if (fuidp) {
 		zfs_fuid_info_free(fuidp);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Sat Aug 28 09:24:11 2010	(r211932)
@@ -177,6 +177,7 @@ zfs_znode_cache_constructor(void *buf, v
 
 	zp->z_dbuf = NULL;
 	zp->z_dirlocks = NULL;
+	zp->z_acl_cached = NULL;
 	return (0);
 }
 
@@ -199,6 +200,7 @@ zfs_znode_cache_destructor(void *buf, vo
 
 	ASSERT(zp->z_dbuf == NULL);
 	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT(zp->z_acl_cached == NULL);
 }
 
 #ifdef	ZNODE_STATS
@@ -244,6 +246,15 @@ zfs_znode_move_impl(znode_t *ozp, znode_
 	nzp->z_phys = ozp->z_phys;
 	nzp->z_dbuf = ozp->z_dbuf;
 
+	/*
+	 * Since this is just an idle znode and kmem is already dealing with
+	 * memory pressure, release any cached ACL.
+	 */
+	if (ozp->z_acl_cached) {
+		zfs_acl_free(ozp->z_acl_cached);
+		ozp->z_acl_cached = NULL;
+	}
+
 	/* Update back pointers. */
 	(void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys,
 	    znode_evict_error);
@@ -497,6 +508,7 @@ zfs_znode_dmu_init(zfsvfs_t *zfsvfs, zno
 	mutex_enter(&zp->z_lock);
 
 	ASSERT(zp->z_dbuf == NULL);
+	ASSERT(zp->z_acl_cached == NULL);
 	zp->z_dbuf = db;
 	nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error);
 
@@ -980,6 +992,13 @@ zfs_rezget(znode_t *zp)
 		return (EIO);
 	}
 
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+	mutex_exit(&zp->z_acl_lock);
+
 	zfs_znode_dmu_init(zfsvfs, zp, db);
 	zp->z_unlinked = (zp->z_phys->zp_links == 0);
 	zp->z_blksz = doi.doi_data_block_size;
@@ -1065,6 +1084,11 @@ zfs_znode_free(znode_t *zp)
 	list_remove(&zfsvfs->z_all_znodes, zp);
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
 	kmem_cache_free(znode_cache, zp);
 
 	VFS_RELE(zfsvfs->z_vfs);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	Sat Aug 28 08:59:55 2010	(r211931)
+++ head/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	Sat Aug 28 09:24:11 2010	(r211932)
@@ -378,6 +378,7 @@ struct taskq;
  * Flags for VOP_READDIR
  */
 #define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
+#define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
 
 /*
  * Public vnode manipulation functions.


More information about the svn-src-all mailing list