svn commit: r201651 - in stable/7/sys/cddl: compat/opensolaris/kern compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys

Wed Jan 6 16:09:59 UTC 2010

Author: netchild
Date: Wed Jan  6 16:09:58 2010
New Revision: 201651
URL: http://svn.freebsd.org/changeset/base/201651

Log:
  MFC several ZFS related commits:
  
  r196980:
  ---snip---
      When we automatically mount snapshot we want to return vnode of the mount point
      from the lookup and not covered vnode. This is one of the fixes for using .zfs/
      over NFS.
  ---snip---
  
  r196982:
  ---snip---
      We don't export individual snapshots, so mnt_export field in snapshot's
      mount point is NULL. That's why when we try to access snapshots over NFS
      use mnt_export field from the parent file system.
  ---snip---
  
  r197131:
  ---snip---
      Tighten up the check for race in zfs_zget() - ZTOV(zp) can not only contain
      NULL, but also can point to dead vnode, take that into account.
  
      PR:				kern/132068
      Reported by:		Edward Fisk" <7ogcg7g02 at sneakemail.com>, kris
      Fix based on patch from:	Jaakko Heinonen <jh at saunalahti.fi>
  ---snip---
  
  r197133:
  ---snip---
      - Protect reclaim with z_teardown_inactive_lock.
      - Be prepared for dbuf to disappear in zfs_reclaim_complete() and check if
        z_dbuf field is NULL - this might happen in case of rollback or forced
        unmount between zfs_freebsd_reclaim() and zfs_reclaim_complete().
      - On forced unmount wait for all znodes to be destroyed - destruction can be
        done asynchronously via zfs_reclaim_complete().
  ---snip---
  
  r197153:
  ---snip---
      When zfs.ko is compiled with debug, make sure that znode and vnode point at
      each other.
  ---snip---
  
  r197167:
  ---snip---
      Work-around READDIRPLUS problem with .zfs/ and .zfs/snapshot/ directories
      by just returning EOPNOTSUPP. This will allow NFS server to fall back to
      regular READDIR.
  
      Note that converting inode number to snapshot's vnode is expensive operation.
      Snapshots are stored in AVL tree, but based on their names, not inode numbers,
      so to convert inode to snapshot vnode we have to interate over all snalshots.
  
      This is not a problem in OpenSolaris, because in their READDIRPLUS
      implementation they use VOP_LOOKUP() on d_name, instead of VFS_VGET() on
      d_fileno as we do.
  
      PR:			kern/125149
      Reported by:	Weldon Godfrey <wgodfrey at ena.com>
      Analysis by:	Jaakko Heinonen <jh at saunalahti.fi>
  ---snip---
  
  r197177:
  ---snip---
      Support both case: when snapshot is already mounted and when it is not yet
      mounted.
  ---snip---
  
  r197201:
  ---snip---
      - Mount ZFS snapshots with MNT_IGNORE flag, so they are not visible in regular
        df(1) and mount(8) output. This is a bit smilar to OpenSolaris and follows
        ZFS route of not listing snapshots by default with 'zfs list' command.
      - Add UPDATING entry to note that ZFS snapshots are no longer visible in
        mount(8) and df(1) output by default.
  
      Reviewed by:	kib
  ---snip---
  Note: the MNT_IGNORE part is commented out in this commit and the UPDATING
  entry is not merged, as this would be a POLA violation on a stable branch.
  This revision is included here, as it also makes locking changes and makes
  sure that a snapshot is mounted RO.
  
  r197426:
  ---snip---
      Restore BSD behaviour - when creating new directory entry use parent directory
      gid to set group ownership and not process gid.
  
      This was overlooked during v6 -> v13 switch.
  
      PR:			kern/139076
      Reported by:	Sean Winn <sean at gothic.net.au>
  ---snip---
  
  r197458:
  ---snip---
      Close race in zfs_zget(). We have to increase usecount first and then
      check for VI_DOOMED flag. Before this change vnode could be reclaimed
      between checking for the flag and increasing usecount.
  ---snip---

Modified:
  stable/7/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
  stable/7/sys/cddl/compat/opensolaris/sys/vfs.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
==============================================================================

--- stable/7/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -115,12 +115,13 @@ extern struct mount *vfs_mount_alloc(str
     const char *fspath, struct thread *td);
 
 int
-domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,
+mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
     char *fspec, int fsflags)
 {
 	struct mount *mp;
 	struct vfsconf *vfsp;
 	struct ucred *cr;
+	vnode_t *vp;
 	int error;
 
 	/*
@@ -135,23 +136,28 @@ domount(kthread_t *td, vnode_t *vp, cons
 	if (vfsp == NULL)
 		return (ENODEV);
 
+	vp = *vpp;
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
+	/*
+	 * We need vnode lock to protect v_mountedhere and vnode interlock
+	 * to protect v_iflag.
+	 */
+	vn_lock(vp, LK_SHARED | LK_RETRY, td);
 	VI_LOCK(vp);
-	if ((vp->v_iflag & VI_MOUNT) != 0 ||
-	    vp->v_mountedhere != NULL) {
+	if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 		VI_UNLOCK(vp);
+		VOP_UNLOCK(vp, 0, td);
 		return (EBUSY);
 	}
 	vp->v_iflag |= VI_MOUNT;
 	VI_UNLOCK(vp);
+	VOP_UNLOCK(vp, 0, td);
 
 	/*
 	 * Allocate and initialize the filesystem.
 	 */
-	vn_lock(vp, LK_SHARED | LK_RETRY, td);
 	mp = vfs_mount_alloc(vp, vfsp, fspath, td);
-	VOP_UNLOCK(vp, 0,td);
 
 	mp->mnt_optnew = NULL;
 	vfs_setmntopt(mp, "from", fspec, 0);
@@ -161,11 +167,20 @@ domount(kthread_t *td, vnode_t *vp, cons
 	/*
 	 * Set the mount level flags.
 	 */
-	if (fsflags & MNT_RDONLY)
-		mp->mnt_flag |= MNT_RDONLY;
-	mp->mnt_flag &=~ MNT_UPDATEMASK;
+	mp->mnt_flag &= ~MNT_UPDATEMASK;
 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
 	/*
+	 * Snapshots are always read-only.
+	 */
+	mp->mnt_flag |= MNT_RDONLY;
+#if 0
+	/*
+	 * We don't want snapshots to be visible in regular
+	 * mount(8) and df(1) output.
+	 */
+	mp->mnt_flag |= MNT_IGNORE;
+#endif
+	/*
 	 * Unprivileged user can trigger mounting a snapshot, but we don't want
 	 * him to unmount it, so we switch to privileged of original mount.
 	 */
@@ -173,11 +188,6 @@ domount(kthread_t *td, vnode_t *vp, cons
 	mp->mnt_cred = crdup(vp->v_mount->mnt_cred);
 	mp->mnt_stat.f_owner = mp->mnt_cred->cr_uid;
 	/*
-	 * Mount the filesystem.
-	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
-	 * get.  No freeing of cn_pnbuf.
-	 */
-	/*
 	 * XXX: This is evil, but we can't mount a snapshot as a regular user.
 	 * XXX: Is is safe when snapshot is mounted from within a jail?
 	 */
@@ -186,7 +196,7 @@ domount(kthread_t *td, vnode_t *vp, cons
 	error = VFS_MOUNT(mp, td);
 	td->td_ucred = cr;
 
-	if (!error) {
+	if (error == 0) {
 		if (mp->mnt_opt != NULL)
 			vfs_freeopts(mp->mnt_opt);
 		mp->mnt_opt = mp->mnt_optnew;
@@ -198,42 +208,33 @@ domount(kthread_t *td, vnode_t *vp, cons
 	*/
 	mp->mnt_optnew = NULL;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
-	/*
-	 * Put the new filesystem on the mount list after root.
-	 */
 #ifdef FREEBSD_NAMECACHE
 	cache_purge(vp);
 #endif
-	if (!error) {
+	VI_LOCK(vp);
+	vp->v_iflag &= ~VI_MOUNT;
+	VI_UNLOCK(vp);
+	if (error == 0) {
 		vnode_t *mvp;
 
-		VI_LOCK(vp);
-		vp->v_iflag &= ~VI_MOUNT;
-		VI_UNLOCK(vp);
 		vp->v_mountedhere = mp;
+		/*
+		 * Put the new filesystem on the mount list.
+		 */
 		mtx_lock(&mountlist_mtx);
 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 		mtx_unlock(&mountlist_mtx);
 		vfs_event_signal(NULL, VQ_MOUNT, 0);
 		if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp, td))
 			panic("mount: lost mount");
-		mountcheckdirs(vp, mvp);
-		vput(mvp);
-		VOP_UNLOCK(vp, 0, td);
-		if ((mp->mnt_flag & MNT_RDONLY) == 0)
-			error = vfs_allocate_syncvnode(mp);
+		vput(vp);
 		vfs_unbusy(mp, td);
-		if (error)
-			vrele(vp);
-		else
-			vfs_mountedfrom(mp, fspec);
+		*vpp = mvp;
 	} else {
-		VI_LOCK(vp);
-		vp->v_iflag &= ~VI_MOUNT;
-		VI_UNLOCK(vp);
-		VOP_UNLOCK(vp, 0, td);
+		vput(vp);
 		vfs_unbusy(mp, td);
 		vfs_mount_destroy(mp);
+		*vpp = NULL;
 	}
 	return (error);
 }

Modified: stable/7/sys/cddl/compat/opensolaris/sys/vfs.h
==============================================================================
--- stable/7/sys/cddl/compat/opensolaris/sys/vfs.h	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/compat/opensolaris/sys/vfs.h	Wed Jan  6 16:09:58 2010	(r201651)
@@ -110,8 +110,8 @@ void vfs_setmntopt(vfs_t *vfsp, const ch
     int flags __unused);
 void vfs_clearmntopt(vfs_t *vfsp, const char *name);
 int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
-int domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,
-    char *fspec, int fsflags);
+int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
+    char *fspath, char *fspec, int fsflags);
 
 typedef	uint64_t	vfs_feature_t;
 

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	Wed Jan  6 16:09:58 2010	(r201651)
@@ -231,8 +231,27 @@ typedef struct znode {
 /*
  * Convert between znode pointers and vnode pointers
  */
+#ifdef DEBUG
+static __inline vnode_t *
+ZTOV(znode_t *zp)
+{
+	vnode_t *vp = zp->z_vnode;
+
+	ASSERT(vp == NULL || vp->v_data == NULL || vp->v_data == zp);
+	return (vp);
+}
+static __inline znode_t *
+VTOZ(vnode_t *vp)
+{
+	znode_t *zp = (znode_t *)vp->v_data;
+
+	ASSERT(zp == NULL || zp->z_vnode == NULL || zp->z_vnode == vp);
+	return (zp);
+}
+#else
 #define	ZTOV(ZP)	((ZP)->z_vnode)
 #define	VTOZ(VP)	((znode_t *)(VP)->v_data)
+#endif
 
 /*
  * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -1841,7 +1841,7 @@ zfs_perm_init(znode_t *zp, znode_t *pare
 				fgid = zfs_fuid_create_cred(zfsvfs,
 				    ZFS_GROUP, tx, cr, fuidp);
 #ifdef __FreeBSD__
-				gid = parent->z_phys->zp_gid;
+				gid = fgid = parent->z_phys->zp_gid;
 #else
 				gid = crgetgid(cr);
 #endif

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -879,20 +879,20 @@ domount:
 	mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 	(void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
 	    dvp->v_vfsp->mnt_stat.f_mntonname, nm);
-	err = domount(curthread, *vpp, "zfs", mountpoint, snapname, 0);
+	err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0);
 	kmem_free(mountpoint, mountpoint_len);
-	/* FreeBSD: This line was moved from below to avoid a lock recursion. */
-	if (err == 0)
-		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread);
-	mutex_exit(&sdp->sd_lock);
-	/*
-	 * If we had an error, drop our hold on the vnode and
-	 * zfsctl_snapshot_inactive() will clean up.
-	 */
-	if (err) {
-		VN_RELE(*vpp);
-		*vpp = NULL;
+	if (err == 0) {
+		/*
+		 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
+		 *
+		 * This is where we lie about our v_vfsp in order to
+		 * make .zfs/snapshot/<snapname> accessible over NFS
+		 * without requiring manual mounts of <snapname>.
+		 */
+		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
+		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
 	}
+	mutex_exit(&sdp->sd_lock);
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -97,6 +97,8 @@ static int zfs_root(vfs_t *vfsp, int fla
 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td);
 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
 static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td);
+static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
+    struct ucred **credanonp);
 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp);
 static void zfs_objset_close(zfsvfs_t *zfsvfs);
 static void zfs_freevfs(vfs_t *vfsp);
@@ -108,6 +110,7 @@ static struct vfsops zfs_vfsops = {
 	.vfs_statfs =		zfs_statfs,
 	.vfs_vget =		zfs_vget,
 	.vfs_sync =		zfs_sync,
+	.vfs_checkexp =		zfs_checkexp,
 	.vfs_fhtovp =		zfs_fhtovp,
 };
 
@@ -955,6 +958,18 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
 		zfsvfs->z_unmounted = B_TRUE;
 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+
+#ifdef __FreeBSD__
+		/*
+		 * Some znodes might not be fully reclaimed, wait for them.
+		 */
+		mutex_enter(&zfsvfs->z_znodes_lock);
+		while (list_head(&zfsvfs->z_all_znodes) != NULL) {
+			msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
+			    "zteardown", 0);
+		}
+		mutex_exit(&zfsvfs->z_znodes_lock);
+#endif
 	}
 
 	/*
@@ -1114,6 +1129,20 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int fla
 	znode_t		*zp;
 	int 		err;
 
+	/*
+	 * XXXPJD: zfs_zget() can't operate on virtual entires like .zfs/ or
+	 * .zfs/snapshot/ directories, so for now just return EOPNOTSUPP.
+	 * This will make NFS to fall back to using READDIR instead of
+	 * READDIRPLUS.
+	 * Also snapshots are stored in AVL tree, but based on their names,
+	 * not inode numbers, so it will be very inefficient to iterate
+	 * over all snapshots to find the right one.
+	 * Note that OpenSolaris READDIRPLUS implementation does LOOKUP on
+	 * d_name, and not VGET on d_fileno as we do.
+	 */
+	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR)
+		return (EOPNOTSUPP);
+
 	ZFS_ENTER(zfsvfs);
 	err = zfs_zget(zfsvfs, ino, &zp);
 	if (err == 0 && zp->z_unlinked) {
@@ -1134,6 +1163,26 @@ CTASSERT(SHORT_FID_LEN <= sizeof(struct 
 CTASSERT(LONG_FID_LEN <= sizeof(struct fid));
 
 static int
+zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
+    struct ucred **credanonp)
+{
+	zfsvfs_t *zfsvfs = vfsp->vfs_data;
+
+	/*
+	 * If this is regular file system vfsp is the same as
+	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
+	 * zfsvfs->z_parent->z_vfs represents parent file system
+	 * which we have to use here, because only this file system
+	 * has mnt_export configured.
+	 */
+	vfsp = zfsvfs->z_parent->z_vfs;
+
+	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
+	    credanonp));
+}
+
+
+static int
 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
 {
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
@@ -1148,7 +1197,11 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vno
 
 	ZFS_ENTER(zfsvfs);
 
-	if (fidp->fid_len == LONG_FID_LEN) {
+	/*
+	 * On FreeBSD we can get snapshot's mount point or its parent file
+	 * system mount point depending if snapshot is already mounted or not.
+	 */
+	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 		uint64_t	setgen = 0;

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -4340,11 +4340,20 @@ zfs_reclaim_complete(void *arg, int pend
 	znode_t	*zp = arg;
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
-	ZFS_LOG(1, "zp=%p", zp);
-	ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
-	zfs_znode_dmu_fini(zp);
-	ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+	if (zp->z_dbuf != NULL) {
+		ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
+	}
 	zfs_znode_free(zp);
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	/*
+	 * If the file system is being unmounted, there is a process waiting
+	 * for us, wake it up.
+	 */
+	if (zfsvfs->z_unmounted)
+		wakeup_one(zfsvfs);
 }
 
 static int
@@ -4356,6 +4365,9 @@ zfs_freebsd_reclaim(ap)
 {
 	vnode_t	*vp = ap->a_vp;
 	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
 
 	ASSERT(zp != NULL);
 
@@ -4366,7 +4378,7 @@ zfs_freebsd_reclaim(ap)
 
 	mutex_enter(&zp->z_lock);
 	ASSERT(zp->z_phys != NULL);
-	ZTOV(zp) = NULL;
+	zp->z_vnode = NULL;
 	mutex_exit(&zp->z_lock);
 
 	if (zp->z_unlinked)
@@ -4374,7 +4386,6 @@ zfs_freebsd_reclaim(ap)
 	else if (zp->z_dbuf == NULL)
 		zfs_znode_free(zp);
 	else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ {
-		zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 		int locked;
 
 		locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 :
@@ -4397,6 +4408,7 @@ zfs_freebsd_reclaim(ap)
 	vp->v_data = NULL;
 	ASSERT(vp->v_holdcnt >= 1);
 	VI_UNLOCK(vp);
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
 	return (0);
 }
 

Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Wed Jan  6 16:05:33 2010	(r201650)
+++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	Wed Jan  6 16:09:58 2010	(r201651)
@@ -110,7 +110,7 @@ znode_evict_error(dmu_buf_t *dbuf, void 
 		mutex_exit(&zp->z_lock);
 		zfs_znode_free(zp);
 	} else if (vp->v_count == 0) {
-		ZTOV(zp) = NULL;
+		zp->z_vnode = NULL;
 		vhold(vp);
 		mutex_exit(&zp->z_lock);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
@@ -896,9 +896,25 @@ again:
 		if (zp->z_unlinked) {
 			err = ENOENT;
 		} else {
-			if (ZTOV(zp) != NULL)
-				VN_HOLD(ZTOV(zp));
+			int dying = 0;
+
+			vp = ZTOV(zp);
+			if (vp == NULL)
+				dying = 1;
 			else {
+				VN_HOLD(vp);
+				if ((vp->v_iflag & VI_DOOMED) != 0) {
+					dying = 1;
+					/*
+					 * Don't VN_RELE() vnode here, because
+					 * it can call vn_lock() which creates
+					 * LOR between vnode lock and znode
+					 * lock. We will VN_RELE() the vnode
+					 * after droping znode lock.
+					 */
+				}
+			}
+			if (dying) {
 				if (first) {
 					ZFS_LOG(1, "dying znode detected (zp=%p)", zp);
 					first = 0;
@@ -910,6 +926,8 @@ again:
 				dmu_buf_rele(db, NULL);
 				mutex_exit(&zp->z_lock);
 				ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+				if (vp != NULL)
+					VN_RELE(vp);
 				tsleep(zp, 0, "zcollide", 1);
 				goto again;
 			}
@@ -1531,7 +1549,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, 
 	ZTOV(rootzp)->v_data = NULL;
 	ZTOV(rootzp)->v_count = 0;
 	ZTOV(rootzp)->v_holdcnt = 0;
-	ZTOV(rootzp) = NULL;
+	rootzp->z_vnode = NULL;
 	VOP_UNLOCK(vp, 0, curthread);
 	vdestroy(vp);
 	dmu_buf_rele(rootzp->z_dbuf, NULL);