svn commit: r201723 - in stable/7/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys contrib/opensola...

Thu Jan 7 09:38:00 UTC 2010

Author: netchild
Date: Thu Jan  7 09:37:59 2010
New Revision: 201723
URL: http://svn.freebsd.org/changeset/base/201723

Log:
  MFC several ZFS related commits:
  
  r197459:
  ---snip---
      Before calling vflush(FORCECLOSE) mark file system as unmounted so the
      following vnops will fail. This is very important, because without this change
      vnode could be reclaimed at any point, even if we increased usecount. The only
      way to ensure that vnode won't be reclaimed was to lock it, which would be very
      hard to do in ZFS without changing a lot of code. With this change simply
      increasing usecount is enough to be sure vnode won't be reclaimed from under
      us. To be precise it can still be reclaimed but we won't be able to see it,
      because every try to enter ZFS through VFS will result in EIO.
  
      The only function that cannot return EIO, because it is needed for vflush() is
      zfs_root(). Introduce ZFS_ENTER_NOERROR() macro that only locks
      z_teardown_lock and never returns EIO.
  ---snip---
  
  r197512:
  ---snip---
      - Don't depend on value returned by gfs_*_inactive(), it doesn't work
        well with forced unmounts when GFS vnodes are referenced.
      - Make other preparations to GFS for forced unmounts.
  
      PR:			kern/139062
      Reported by:	trasz
  ---snip---
  
  r197513:
  ---snip---
      Use traverse() function to find and return mount point's vnode instead of
  cov    ered vnode when snapshot is already mounted.
  ---snip---
  
  r197513:
  ---snip---
      Handle cases where virtual (GFS) vnodes are referenced when doing forced
      unmount. In that case we cannot depend on the proper order of invalidating
      vnodes, so we have to free resources when we have a chance.
  
      PR:			kern/139062
      Reported by:	trasz
  ---snip---
  
  r197683:
  ---snip---
      Return EOPNOTSUPP instead of EINVAL when doing chflags(2) over an old
      format ZFS, as defined in the manual page.
  
      Submitted by:	pjd (response of my original patch but bugs are mine)
  ---snip---
  
  r198703:
  ---snip---
      - zfs_zaccess() can handle VAPPEND too, so map V_APPEND to VAPPEND and call
        zfs_access() instead of vaccess() in this case as well.
      - If VADMIN is specified with another V* flag (unlikely) call both
        zfs_access() and vaccess() after spliting V* flags.
  
      This fixes "dirtying snapshot!" panic.
  
      PR:				kern/139806
      Reported by:		Carl Chave <carl at chave.us>
      In co-operation with:	jh
  ---snip---
  While I'm here: fix two comments regarding the members of vop_access_args
  to comply what is in RELENG_7.
  
  r199156:
  ---snip---
      Avoid passing invalid mountpoint to getnewvnode().
  
      Reported by:	rwatson
      Tested by:	rwatson
  ---snip---
  
  r200724:
  ---snip---
      Apply fix Solaris bug 6462803 zfs snapshot -r failed because
      filesystem was busy. (onnv-gate revision 8989)
  
      Submitted by:	mm
      Approved by:	pjd
  ---snip---
  
  r200726:
  ---snip---
      Apply fix for Solaris bug 6801979: zfs recv can fail with E2BIG
      (onnv revision 8986)
  
      Requested by:	mm
      Submitted by:	pjd
      Obtained from:	OpenSolaris
  ---snip---
  
  r200727 (following is the corrected commit log):
  ---snip---
      Apply fix for Solaris bug 6764159: restore_object() makes a call
      that can block while having a tx open but not yet committed
      (onnv revision 7994)
  
      Submitted by:	mm
      Obtained from:	OpenSolaris
  ---snip---

Modified:
  stable/7/sys/cddl/compat/opensolaris/sys/vnode.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/cddl/compat/opensolaris/sys/vnode.h
==============================================================================

--- stable/7/sys/cddl/compat/opensolaris/sys/vnode.h	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/compat/opensolaris/sys/vnode.h	Thu Jan  7 09:37:59 2010	(r201723)
@@ -57,6 +57,8 @@ typedef	struct vop_vector	vnodeops_t;
 
 #define	v_count	v_usecount
 
+#define	V_APPEND	VAPPEND
+
 static __inline int
 vn_is_readonly(vnode_t *vp)
 {

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -595,7 +595,6 @@ found:
 	if (vp->v_flag & V_XATTRDIR)
 		VI_LOCK(fp->gfs_parent);
 	VI_LOCK(vp);
-	ASSERT(vp->v_count < 2);
 	/*
 	 * Really remove this vnode
 	 */
@@ -607,12 +606,7 @@ found:
 		 */
 		ge->gfse_vnode = NULL;
 	}
-	if (vp->v_count == 1) {
-		vp->v_usecount--;
-		vdropl(vp);
-	} else {
-		VI_UNLOCK(vp);
-	}
+	VI_UNLOCK(vp);
 
 	/*
 	 * Free vnode and release parent
@@ -1084,18 +1078,16 @@ gfs_vop_inactive(ap)
 {
 	vnode_t *vp = ap->a_vp;
 	gfs_file_t *fp = vp->v_data;
-	void *data;
 
 	if (fp->gfs_type == GFS_DIR)
-		data = gfs_dir_inactive(vp);
+		gfs_dir_inactive(vp);
 	else
-		data = gfs_file_inactive(vp);
-
-	if (data != NULL)
-		kmem_free(data, fp->gfs_size);
+		gfs_file_inactive(vp);
 
 	VI_LOCK(vp);
 	vp->v_data = NULL;
 	VI_UNLOCK(vp);
+	kmem_free(fp, fp->gfs_size);
+
 	return (0);
 }

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
@@ -108,19 +106,51 @@ dmu_object_claim(objset_t *os, uint64_t 
 
 int
 dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
-    int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+    int blocksize, dmu_object_type_t bonustype, int bonuslen)
 {
 	dnode_t *dn;
+	dmu_tx_t *tx;
+	int nblkptr;
 	int err;
 
-	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
+	if (object == DMU_META_DNODE_OBJECT)
 		return (EBADF);
 
 	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
+
+	if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
+	    dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
+		/* nothing is changing, this is a noop */
+		dnode_rele(dn, FTAG);
+		return (0);
+	}
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_bonus(tx, object);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_tx_abort(tx);
+		dnode_rele(dn, FTAG);
+		return (err);
+	}
+
+	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+
+	/*
+	 * If we are losing blkptrs or changing the block size this must
+	 * be a new file instance.   We must clear out the previous file
+	 * contents before we can change this type of metadata in the dnode.
+	 */
+	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
+		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
+
 	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
+
+	dmu_tx_commit(tx);
+
 	dnode_rele(dn, FTAG);
 
 	return (0);

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -828,11 +828,7 @@ restore_object(struct restorearg *ra, ob
 {
 	int err;
 	dmu_tx_t *tx;
-
-	err = dmu_object_info(os, drro->drr_object, NULL);
-
-	if (err != 0 && err != ENOENT)
-		return (EINVAL);
+	void *data = NULL;
 
 	if (drro->drr_type == DMU_OT_NONE ||
 	    drro->drr_type >= DMU_OT_NUMTYPES ||
@@ -846,12 +842,15 @@ restore_object(struct restorearg *ra, ob
 		return (EINVAL);
 	}
 
-	tx = dmu_tx_create(os);
+	err = dmu_object_info(os, drro->drr_object, NULL);
+
+	if (err != 0 && err != ENOENT)
+		return (EINVAL);
 
 	if (err == ENOENT) {
 		/* currently free, want to be allocated */
+		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1);
 		err = dmu_tx_assign(tx, TXG_WAIT);
 		if (err) {
 			dmu_tx_abort(tx);
@@ -860,45 +859,41 @@ restore_object(struct restorearg *ra, ob
 		err = dmu_object_claim(os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
+		dmu_tx_commit(tx);
 	} else {
 		/* currently allocated, want to be allocated */
-		dmu_tx_hold_bonus(tx, drro->drr_object);
-		/*
-		 * We may change blocksize and delete old content,
-		 * so need to hold_write and hold_free.
-		 */
-		dmu_tx_hold_write(tx, drro->drr_object, 0, 1);
-		dmu_tx_hold_free(tx, drro->drr_object, 0, DMU_OBJECT_END);
-		err = dmu_tx_assign(tx, TXG_WAIT);
-		if (err) {
-			dmu_tx_abort(tx);
-			return (err);
-		}
 
 		err = dmu_object_reclaim(os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
-		    drro->drr_bonustype, drro->drr_bonuslen, tx);
+		    drro->drr_bonustype, drro->drr_bonuslen);
 	}
-	if (err) {
-		dmu_tx_commit(tx);
+	if (err)
 		return (EINVAL);
+
+	if (drro->drr_bonuslen) {
+		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
+		if (ra->err)
+			return (ra->err);
+	}
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_bonus(tx, drro->drr_object);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_tx_abort(tx);
+		return (err);
 	}
 
 	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx);
 	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
 
-	if (drro->drr_bonuslen) {
+	if (data != NULL) {
 		dmu_buf_t *db;
-		void *data;
+
 		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
 		dmu_buf_will_dirty(db, tx);
 
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
-		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
-		if (data == NULL) {
-			dmu_tx_commit(tx);
-			return (ra->err);
-		}
 		bcopy(data, db->db_data, drro->drr_bonuslen);
 		if (ra->byteswap) {
 			dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -415,8 +415,7 @@ void
 dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	int i, nblkptr;
-	dmu_buf_impl_t *db = NULL;
+	int nblkptr;
 
 	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
 	ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
@@ -428,42 +427,25 @@ dnode_reallocate(dnode_t *dn, dmu_object
 	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 
-	for (i = 0; i < TXG_SIZE; i++)
-		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
-
 	/* clean up any unreferenced dbufs */
 	dnode_evict_dbufs(dn);
-	ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
-
-	/*
-	 * XXX I should really have a generation number to tell if we
-	 * need to do this...
-	 */
-	if (blocksize != dn->dn_datablksz ||
-	    dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) {
-		/* free all old data */
-		dnode_free_range(dn, 0, -1ULL, tx);
-	}
-
-	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 
-	/* change blocksize */
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
-	if (blocksize != dn->dn_datablksz &&
-	    (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
-	    list_head(&dn->dn_dbufs) != NULL)) {
-		db = dbuf_hold(dn, 0, FTAG);
-		dbuf_new_size(db, blocksize, tx);
-	}
-	dnode_setdblksz(dn, blocksize);
 	dnode_setdirty(dn, tx);
-	dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
-	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
+	if (dn->dn_datablksz != blocksize) {
+		/* change blocksize */
+		ASSERT(dn->dn_maxblkid == 0 &&
+		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
+		    dnode_block_freed(dn, 0)));
+		dnode_setdblksz(dn, blocksize);
+		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
+	}
+	if (dn->dn_bonuslen != bonuslen)
+		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
+	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	if (dn->dn_nblkptr != nblkptr)
 		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
 	rw_exit(&dn->dn_struct_rwlock);
-	if (db)
-		dbuf_rele(db, FTAG);
 
 	/* change type */
 	dn->dn_type = ot;
@@ -1187,11 +1169,6 @@ dnode_block_freed(dnode_t *dn, uint64_t 
 	if (dn->dn_free_txg)
 		return (TRUE);
 
-	/*
-	 * If dn_datablkshift is not set, then there's only a single
-	 * block, in which case there will never be a free range so it
-	 * won't matter.
-	 */
 	range_tofind.fr_blkid = blkid;
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	Thu Jan  7 09:37:59 2010	(r201723)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -237,7 +237,7 @@ uint64_t dmu_object_alloc(objset_t *os, 
 int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
 int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
-    int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+    int blocksize, dmu_object_type_t bonustype, int bonuslen);
 
 /*
  * Free an object from this objset.

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Thu Jan  7 09:37:59 2010	(r201723)
@@ -255,6 +255,7 @@ VTOZ(vnode_t *vp)
 
 /*
  * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
+ * ZFS_ENTER_NOERROR() is called when we can't return EIO.
  * ZFS_EXIT() must be called before exitting the vop.
  * ZFS_VERIFY_ZP() verifies the znode is valid.
  */
@@ -267,6 +268,9 @@ VTOZ(vnode_t *vp)
 		} \
 	}
 
+#define	ZFS_ENTER_NOERROR(zfsvfs) \
+	rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG)
+
 #define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 #define	ZFS_VERIFY_ZP(zp) \

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	Thu Jan  7 09:37:59 2010	(r201723)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -56,10 +56,16 @@ typedef struct zil_header {
 	uint64_t zh_replay_seq;	/* highest replayed sequence number */
 	blkptr_t zh_log;	/* log chain */
 	uint64_t zh_claim_seq;	/* highest claimed sequence number */
-	uint64_t zh_pad[5];
+	uint64_t zh_flags;	/* header flags */
+	uint64_t zh_pad[4];
 } zil_header_t;
 
 /*
+ * zh_flags bit settings
+ */
+#define	ZIL_REPLAY_NEEDED 0x1	/* replay needed - internal only */
+
+/*
  * Log block trailer - structure at the end of the header and each log block
  *
  * The zit_bt contains a zbt_cksum which for the intent log is

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -252,7 +252,7 @@ static int
 zfsctl_common_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
-		int  a_accmode;
+		int  a_mode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
@@ -817,7 +817,11 @@ zfsctl_snapdir_lookup(ap)
 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
 		*vpp = sep->se_root;
 		VN_HOLD(*vpp);
-		if ((*vpp)->v_mountedhere == NULL) {
+		err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
+		if (err) {
+			VN_RELE(*vpp);
+			*vpp = NULL;
+		} else if (*vpp == sep->se_root) {
 			/*
 			 * The snapshot was unmounted behind our backs,
 			 * try to remount it.
@@ -831,10 +835,9 @@ zfsctl_snapdir_lookup(ap)
 			 */
 			(*vpp)->v_flag &= ~VROOT;
 		}
-		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread);
 		mutex_exit(&sdp->sd_lock);
 		ZFS_EXIT(zfsvfs);
-		return (0);
+		return (err);
 	}
 
 	/*
@@ -1001,15 +1004,24 @@ zfsctl_snapdir_inactive(ap)
 {
 	vnode_t *vp = ap->a_vp;
 	zfsctl_snapdir_t *sdp = vp->v_data;
-	void *private;
+	zfs_snapentry_t *sep;
 
-	private = gfs_dir_inactive(vp);
-	if (private != NULL) {
-		ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
-		mutex_destroy(&sdp->sd_lock);
-		avl_destroy(&sdp->sd_snaps);
-		kmem_free(private, sizeof (zfsctl_snapdir_t));
+	/*
+	 * On forced unmount we have to free snapshots from here.
+	 */
+	mutex_enter(&sdp->sd_lock);
+	while ((sep = avl_first(&sdp->sd_snaps)) != NULL) {
+		avl_remove(&sdp->sd_snaps, sep);
+		kmem_free(sep->se_name, strlen(sep->se_name) + 1);
+		kmem_free(sep, sizeof (zfs_snapentry_t));
 	}
+	mutex_exit(&sdp->sd_lock);
+	gfs_dir_inactive(vp);
+	ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
+	mutex_destroy(&sdp->sd_lock);
+	avl_destroy(&sdp->sd_snaps);
+	kmem_free(sdp, sizeof (zfsctl_snapdir_t));
+
 	return (0);
 }
 
@@ -1066,6 +1078,9 @@ zfsctl_snapshot_inactive(ap)
 	int locked;
 	vnode_t *dvp;
 
+	if (vp->v_count > 0)
+		goto end;
+
 	VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
 	sdp = dvp->v_data;
 	VOP_UNLOCK(dvp, 0, curthread);
@@ -1073,11 +1088,6 @@ zfsctl_snapshot_inactive(ap)
 	if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
 		mutex_enter(&sdp->sd_lock);
 
-	if (vp->v_count > 1) {
-		if (!locked)
-			mutex_exit(&sdp->sd_lock);
-		return (0);
-	}
 	ASSERT(!vn_ismntpt(vp));
 
 	sep = avl_first(&sdp->sd_snaps);
@@ -1097,6 +1107,7 @@ zfsctl_snapshot_inactive(ap)
 	if (!locked)
 		mutex_exit(&sdp->sd_lock);
 	VN_RELE(dvp);
+end:
 	VFS_RELE(vp->v_vfsp);
 
 	/*

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -872,7 +872,7 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t
 	znode_t *rootzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	ZFS_ENTER_NOERROR(zfsvfs);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0) {
@@ -1045,6 +1045,17 @@ zfs_umount(vfs_t *vfsp, int fflag, kthre
 		ASSERT(zfsvfs->z_ctldir == NULL);
 	}
 
+	if (fflag & MS_FORCE) {
+		/*
+		 * Mark file system as unmounted before calling
+		 * vflush(FORCECLOSE). This way we ensure no future vnops
+		 * will be called and risk operating on DOOMED vnodes.
+		 */
+		rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+		zfsvfs->z_unmounted = B_TRUE;
+		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+	}
+
 	/*
 	 * Flush all the files.
 	 */
@@ -1111,10 +1122,7 @@ zfs_umount(vfs_t *vfsp, int fflag, kthre
 	if (zfsvfs->z_issnap) {
 		vnode_t *svp = vfsp->mnt_vnodecovered;
 
-		/*
-		 * We don't need an extra vn_rele if this is a manual snapshot mount
-		 */
-		if (svp->v_count == 2)
+		if (svp->v_count >= 2)
 			VN_RELE(svp);
 	}
 	zfs_freevfs(vfsp);

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -3964,26 +3964,38 @@ static int
 zfs_freebsd_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
-		accmode_t a_accmode;
+		accmode_t a_mode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
+	accmode_t accmode;
+	int error = 0;
 
 	/*
-	 * ZFS itself only knowns about VREAD, VWRITE and VEXEC, the rest
-	 * we have to handle by calling vaccess().
+	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
 	 */
-	if ((ap->a_mode & ~(VREAD|VWRITE|VEXEC)) != 0) {
-		vnode_t *vp = ap->a_vp;
-		znode_t *zp = VTOZ(vp);
-		znode_phys_t *zphys = zp->z_phys;
+	accmode = ap->a_mode & (VREAD|VWRITE|VEXEC|VAPPEND);
+	if (accmode != 0)
+		error = zfs_access(ap->a_vp, accmode, 0, ap->a_cred, NULL);
 
-		return (vaccess(vp->v_type, zphys->zp_mode, zphys->zp_uid,
-		    zphys->zp_gid, ap->a_mode, ap->a_cred, NULL));
+	/*
+	 * VADMIN has to be handled by vaccess().
+	 */
+	if (error == 0) {
+		accmode = ap->a_mode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
+		if (accmode != 0) {
+			vnode_t *vp = ap->a_vp;
+			znode_t *zp = VTOZ(vp);
+			znode_phys_t *zphys = zp->z_phys;
+
+			error = vaccess(vp->v_type, zphys->zp_mode,
+			    zphys->zp_uid, zphys->zp_gid, accmode, ap->a_cred,
+			    NULL);
+		}
 	}
 
-	return (zfs_access(ap->a_vp, ap->a_mode, 0, ap->a_cred, NULL));
+	return (error);
 }
 
 static int
@@ -4176,8 +4188,12 @@ zfs_freebsd_setattr(ap)
 	zflags = VTOZ(vp)->z_phys->zp_flags;
 
 	if (vap->va_flags != VNOVAL) {
+		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
 		int error;
 
+		if (zfsvfs->z_use_fuids == B_FALSE)
+			return (EOPNOTSUPP);
+
 		fflags = vap->va_flags;
 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0)
 			return (EOPNOTSUPP);

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -143,16 +143,19 @@ zfs_znode_cache_constructor(void *buf, v
 
 	POINTER_INVALIDATE(&zp->z_zfsvfs);
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
-	ASSERT(vfsp != NULL);
 
-	error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp);
-	if (error != 0 && (kmflags & KM_NOSLEEP))
-		return (-1);
-	ASSERT(error == 0);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
-	zp->z_vnode = vp;
-	vp->v_data = (caddr_t)zp;
-	VN_LOCK_AREC(vp);
+	if (vfsp != NULL) {
+		error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp);
+		if (error != 0 && (kmflags & KM_NOSLEEP))
+			return (-1);
+		ASSERT(error == 0);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
+		zp->z_vnode = vp;
+		vp->v_data = (caddr_t)zp;
+		VN_LOCK_AREC(vp);
+	} else {
+		zp->z_vnode = NULL;
+	}
 
 	list_link_init(&zp->z_link_node);
 
@@ -1441,7 +1444,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, 
 	nvpair_t	*elem;
 	int		error;
 	znode_t		*rootzp = NULL;
-	vnode_t		*vp;
+	vnode_t		vnode;
 	vattr_t		vattr;
 	znode_t		*zp;
 
@@ -1510,13 +1513,13 @@ zfs_create_fs(objset_t *os, cred_t *cr, 
 	vattr.va_gid = crgetgid(cr);
 
 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
-	zfs_znode_cache_constructor(rootzp, &zfsvfs, 0);
+	zfs_znode_cache_constructor(rootzp, NULL, 0);
 	rootzp->z_unlinked = 0;
 	rootzp->z_atime_dirty = 0;
 
-	vp = ZTOV(rootzp);
-	vp->v_type = VDIR;
-	VN_LOCK_ASHARE(vp);
+	vnode.v_type = VDIR;
+	vnode.v_data = rootzp;
+	rootzp->z_vnode = &vnode;
 
 	bzero(&zfsvfs, sizeof (zfsvfs_t));
 
@@ -1545,16 +1548,10 @@ zfs_create_fs(objset_t *os, cred_t *cr, 
 	ASSERT(error == 0);
 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
 
-	VI_LOCK(vp);
-	ZTOV(rootzp)->v_data = NULL;
-	ZTOV(rootzp)->v_count = 0;
-	ZTOV(rootzp)->v_holdcnt = 0;
-	rootzp->z_vnode = NULL;
-	VOP_UNLOCK(vp, 0, curthread);
-	vdestroy(vp);
 	dmu_buf_rele(rootzp->z_dbuf, NULL);
 	rootzp->z_dbuf = NULL;
 	mutex_destroy(&zfsvfs.z_znodes_lock);
+	rootzp->z_vnode = NULL;
 	kmem_cache_free(znode_cache, rootzp);
 }
 

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Thu Jan  7 09:37:59 2010	(r201723)
@@ -506,6 +506,25 @@ zil_rollback_destroy(zilog_t *zilog, dmu
 	    tx, zh->zh_claim_txg);
 }
 
+/*
+ * return true if the initial log block is not valid
+ */
+static boolean_t
+zil_empty(zilog_t *zilog)
+{
+	const zil_header_t *zh = zilog->zl_header;
+	arc_buf_t *abuf = NULL;
+
+	if (BP_IS_HOLE(&zh->zh_log))
+		return (B_TRUE);
+
+	if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
+		return (B_TRUE);
+
+	VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+	return (B_FALSE);
+}
+
 int
 zil_claim(char *osname, void *txarg)
 {
@@ -526,6 +545,21 @@ zil_claim(char *osname, void *txarg)
 	zh = zil_header_in_syncing_context(zilog);
 
 	/*
+	 * Record here whether the zil has any records to replay.
+	 * If the header block pointer is null or the block points
+	 * to the stubby then we know there are no valid log records.
+	 * We use the header to store this state as the the zilog gets
+	 * freed later in dmu_objset_close().
+	 * The flags (and the rest of the header fields) are cleared in
+	 * zil_sync() as a result of a zil_destroy(), after replaying the log.
+	 *
+	 * Note, the intent log can be empty but still need the
+	 * stubby to be claimed.
+	 */
+	if (!zil_empty(zilog))
+		zh->zh_flags |= ZIL_REPLAY_NEEDED;
+
+	/*
 	 * Claim all log blocks if we haven't already done so, and remember
 	 * the highest claimed sequence number.  This ensures that if we can
 	 * read only part of the log now (e.g. due to a missing device),
@@ -1349,25 +1383,6 @@ zil_free(zilog_t *zilog)
 }
 
 /*
- * return true if the initial log block is not valid
- */
-static boolean_t
-zil_empty(zilog_t *zilog)
-{
-	const zil_header_t *zh = zilog->zl_header;
-	arc_buf_t *abuf = NULL;
-
-	if (BP_IS_HOLE(&zh->zh_log))
-		return (B_TRUE);
-
-	if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
-		return (B_TRUE);
-
-	VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
-	return (B_FALSE);
-}
-
-/*
  * Open an intent log.
  */
 zilog_t *
@@ -1422,7 +1437,7 @@ zil_suspend(zilog_t *zilog)
 	const zil_header_t *zh = zilog->zl_header;
 
 	mutex_enter(&zilog->zl_lock);
-	if (zh->zh_claim_txg != 0) {		/* unplayed log */
+	if (zh->zh_flags & ZIL_REPLAY_NEEDED) {		/* unplayed log */
 		mutex_exit(&zilog->zl_lock);
 		return (EBUSY);
 	}
@@ -1649,7 +1664,7 @@ zil_replay(objset_t *os, void *arg, uint
 	const zil_header_t *zh = zilog->zl_header;
 	zil_replay_arg_t zr;
 
-	if (zil_empty(zilog)) {
+	if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
 		zil_destroy(zilog, B_TRUE);
 		return;
 	}

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	Thu Jan  7 09:28:17 2010	(r201722)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	Thu Jan  7 09:37:59 2010	(r201723)
@@ -304,7 +304,6 @@ typedef struct xvattr {
  * VOP_ACCESS flags
  */
 #define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
-#define	V_APPEND	0x2	/* want to do append only check */
 
 /*
  * Flags for vnode operations.