panic "Sleeping thread owns a non-sleepable lock" via cv_timedwait_signal, was "rsync over NFS"

Thu Oct 4 09:11:17 UTC 2012

On Thu, Oct 04, 2012 at 10:22:37AM +0200, Norbert Aschendorff wrote:
> Hehe, sure, if you assist me :)
> I'm not very experienced with SVN, I'm actually a git user and I think
> it's also better if I do /not/ express my opinion on SVN here ;)
> The only actions I'm currently able to do in SVN are checkout, update,
> commit and view log and status -- so any help is appreciated :P

I merged the changes for you, try the patch below.

Index: .
===================================================================

--- .	(revision 241191)
+++ .	(working copy)

Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /head/sys:r240283-240285
Index: fs
===================================================================
--- fs	(revision 241191)
+++ fs	(working copy)

Property changes on: fs
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /head/sys/fs:r240285
Index: fs/nullfs/null.h
===================================================================
--- fs/nullfs/null.h	(revision 241191)
+++ fs/nullfs/null.h	(working copy)
@@ -56,6 +56,7 @@
 int nullfs_init(struct vfsconf *vfsp);
 int nullfs_uninit(struct vfsconf *vfsp);
 int null_nodeget(struct mount *mp, struct vnode *target, struct vnode **vpp);
+struct vnode *null_hashget(struct mount *mp, struct vnode *lowervp);
 void null_hashrem(struct null_node *xp);
 int null_bypass(struct vop_generic_args *ap);
 
Index: fs/nullfs/null_subr.c
===================================================================
--- fs/nullfs/null_subr.c	(revision 241191)
+++ fs/nullfs/null_subr.c	(working copy)
@@ -67,7 +67,6 @@
 static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
 MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
 
-static struct vnode * null_hashget(struct mount *, struct vnode *);
 static struct vnode * null_hashins(struct mount *, struct null_node *);
 
 /*
@@ -98,7 +97,7 @@
  * Return a VREF'ed alias for lower vnode if already exists, else 0.
  * Lower vnode should be locked on entry and will be left locked on exit.
  */
-static struct vnode *
+struct vnode *
 null_hashget(mp, lowervp)
 	struct mount *mp;
 	struct vnode *lowervp;
@@ -209,14 +208,10 @@
 	struct vnode *vp;
 	int error;
 
-	/*
-	 * The insmntque1() call below requires the exclusive lock on
-	 * the nullfs vnode.
-	 */
-	ASSERT_VOP_ELOCKED(lowervp, "lowervp");
-	KASSERT(lowervp->v_usecount >= 1, ("Unreferenced vnode %p\n", lowervp));
+	ASSERT_VOP_LOCKED(lowervp, "lowervp");
+	KASSERT(lowervp->v_usecount >= 1, ("Unreferenced vnode %p", lowervp));
 
-	/* Lookup the hash firstly */
+	/* Lookup the hash firstly. */
 	*vpp = null_hashget(mp, lowervp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
@@ -224,6 +219,19 @@
 	}
 
 	/*
+	 * The insmntque1() call below requires the exclusive lock on
+	 * the nullfs vnode.  Upgrade the lock now if hash failed to
+	 * provide ready to use vnode.
+	 */
+	if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) {
+		vn_lock(lowervp, LK_UPGRADE | LK_RETRY);
+		if ((lowervp->v_iflag & VI_DOOMED) != 0) {
+			vput(lowervp);
+			return (ENOENT);
+		}
+	}
+
+	/*
 	 * We do not serialize vnode creation, instead we will check for
 	 * duplicates later, when adding new vnode to hash.
 	 * Note that duplicate can only appear in hash if the lowervp is
@@ -233,8 +241,7 @@
 	 * might cause a bogus v_data pointer to get dereferenced
 	 * elsewhere if MALLOC should block.
 	 */
-	xp = malloc(sizeof(struct null_node),
-	    M_NULLFSNODE, M_WAITOK);
+	xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
 
 	error = getnewvnode("null", mp, &null_vnodeops, &vp);
 	if (error) {
Index: fs/nullfs/null_vfsops.c
===================================================================
--- fs/nullfs/null_vfsops.c	(revision 241191)
+++ fs/nullfs/null_vfsops.c	(working copy)
@@ -65,6 +65,7 @@
 static vfs_unmount_t	nullfs_unmount;
 static vfs_vget_t	nullfs_vget;
 static vfs_extattrctl_t	nullfs_extattrctl;
+static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;
 
 /*
  * Mount null layer
@@ -121,8 +122,10 @@
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread);
 	error = namei(ndp);
+
 	/*
 	 * Re-lock vnode.
+	 * XXXKIB This is deadlock-prone as well.
 	 */
 	if (isvnunlocked)
 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
@@ -146,7 +149,7 @@
 	}
 
 	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
-				M_NULLFSMNT, M_WAITOK);	/* XXX */
+	    M_NULLFSMNT, M_WAITOK);
 
 	/*
 	 * Save reference to underlying FS
@@ -186,10 +189,15 @@
 	}
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
-	    (MNTK_MPSAFE | MNTK_SHARED_WRITES);
+	    (MNTK_MPSAFE | MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
+	    MNTK_EXTENDED_SHARED);
+	mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT;
 	MNT_IUNLOCK(mp);
 	mp->mnt_data =  xmp;
 	vfs_getnewfsid(mp);
+	MNT_ILOCK(xmp->nullm_vfs);
+	TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp, mnt_upper_link);
+	MNT_IUNLOCK(xmp->nullm_vfs);
 
 	vfs_mountedfrom(mp, target);
 
@@ -206,14 +214,16 @@
 	struct mount *mp;
 	int mntflags;
 {
-	void *mntdata;
-	int error;
-	int flags = 0;
+	struct null_mount *mntdata;
+	struct mount *ump;
+	int error, flags;
 
 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
 
 	if (mntflags & MNT_FORCE)
-		flags |= FORCECLOSE;
+		flags = FORCECLOSE;
+	else
+		flags = 0;
 
 	/* There is 1 extra root vnode reference (nullm_rootvp). */
 	error = vflush(mp, 1, flags, curthread);
@@ -224,9 +234,17 @@
 	 * Finally, throw away the null_mount structure
 	 */
 	mntdata = mp->mnt_data;
-	mp->mnt_data = 0;
+	ump = mntdata->nullm_vfs;
+	MNT_ILOCK(ump);
+	while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
+		ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
+		msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
+	}
+	TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
+	MNT_IUNLOCK(ump);
+	mp->mnt_data = NULL;
 	free(mntdata, M_NULLFSMNT);
-	return 0;
+	return (0);
 }
 
 static int
@@ -316,13 +334,10 @@
 
 	KASSERT((flags & LK_TYPE_MASK) != 0,
 	    ("nullfs_vget: no lock requested"));
-	flags &= ~LK_TYPE_MASK;
-	flags |= LK_EXCLUSIVE;
 
 	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
-	if (error)
+	if (error != 0)
 		return (error);
-
 	return (null_nodeget(mp, *vpp, vpp));
 }
 
@@ -334,11 +349,11 @@
 	struct vnode **vpp;
 {
 	int error;
-	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, LK_EXCLUSIVE,
+
+	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
 	    vpp);
-	if (error)
+	if (error != 0)
 		return (error);
-
 	return (null_nodeget(mp, *vpp, vpp));
 }
 
@@ -350,11 +365,23 @@
 	int namespace;
 	const char *attrname;
 {
-	return VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, filename_vp,
-	    namespace, attrname);
+
+	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
+	    filename_vp, namespace, attrname));
 }
 
+static void
+nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
+{
+	struct vnode *vp;
 
+	vp = null_hashget(mp, lowervp);
+	if (vp == NULL)
+		return;
+	vgone(vp);
+	vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
+}
+
 static struct vfsops null_vfsops = {
 	.vfs_extattrctl =	nullfs_extattrctl,
 	.vfs_fhtovp =		nullfs_fhtovp,
@@ -367,6 +394,7 @@
 	.vfs_uninit =		nullfs_uninit,
 	.vfs_unmount =		nullfs_unmount,
 	.vfs_vget =		nullfs_vget,
+	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
 };
 
 VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
Index: fs/nullfs/null_vnops.c
===================================================================
--- fs/nullfs/null_vnops.c	(revision 241191)
+++ fs/nullfs/null_vnops.c	(working copy)
@@ -665,34 +665,19 @@
 }
 
 /*
- * There is no way to tell that someone issued remove/rmdir operation
- * on the underlying filesystem. For now we just have to release lowervp
- * as soon as possible.
- *
- * Note, we can't release any resources nor remove vnode from hash before 
- * appropriate VXLOCK stuff is done because other process can find this
- * vnode in hash during inactivation and may be sitting in vget() and waiting
- * for null_inactive to unlock vnode. Thus we will do all those in VOP_RECLAIM.
+ * XXXKIB
  */
 static int
-null_inactive(struct vop_inactive_args *ap)
+null_inactive(struct vop_inactive_args *ap __unused)
 {
-	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 
-	vp->v_object = NULL;
-
-	/*
-	 * If this is the last reference, then free up the vnode
-	 * so as not to tie up the lower vnodes.
-	 */
-	vrecycle(vp, td);
-
 	return (0);
 }
 
 /*
- * Now, the VXLOCK is in force and we're free to destroy the null vnode.
+ * Now, the nullfs vnode and, due to the sharing lock, the lower
+ * vnode, are exclusively locked, and we shall destroy the null vnode.
  */
 static int
 null_reclaim(struct vop_reclaim_args *ap)
Index: kern/vfs_lookup.c
===================================================================
--- kern/vfs_lookup.c	(revision 241191)
+++ kern/vfs_lookup.c	(working copy)
@@ -396,11 +396,13 @@
 }
 
 static int
-compute_cn_lkflags(struct mount *mp, int lkflags)
+compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
 {
 
-	if (mp == NULL || 
-	    ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) {
+	if (mp == NULL || ((lkflags & LK_SHARED) &&
+	    (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) ||
+	    ((cnflags & ISDOTDOT) &&
+	    (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) {
 		lkflags &= ~LK_SHARED;
 		lkflags |= LK_EXCLUSIVE;
 	}
@@ -529,7 +531,8 @@
 	dp = ndp->ni_startdir;
 	ndp->ni_startdir = NULLVP;
 	vn_lock(dp,
-	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY));
+	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
+	    cnp->cn_flags));
 
 dirloop:
 	/*
@@ -686,7 +689,7 @@
 			VFS_UNLOCK_GIANT(tvfslocked);
 			vn_lock(dp,
 			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
-			    LK_RETRY));
+			    LK_RETRY, ISDOTDOT));
 		}
 	}
 
@@ -724,7 +727,8 @@
 	vprint("lookup in", dp);
 #endif
 	lkflags_save = cnp->cn_lkflags;
-	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags);
+	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
+	    cnp->cn_flags);
 	if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
 		cnp->cn_lkflags = lkflags_save;
 		KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
@@ -743,7 +747,7 @@
 			VFS_UNLOCK_GIANT(tvfslocked);
 			vn_lock(dp,
 			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
-			    LK_RETRY));
+			    LK_RETRY, cnp->cn_flags));
 			goto unionlookup;
 		}
 
@@ -815,8 +819,8 @@
 		dvfslocked = 0;
 		vref(vp_crossmp);
 		ndp->ni_dvp = vp_crossmp;
-		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags),
-		    &tdp);
+		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
+		    cnp->cn_flags), &tdp);
 		vfs_unbusy(mp);
 		if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
 			panic("vp_crossmp exclusively locked or reclaimed");
Index: kern/vfs_mount.c
===================================================================
--- kern/vfs_mount.c	(revision 241191)
+++ kern/vfs_mount.c	(working copy)
@@ -479,6 +479,7 @@
 	mac_mount_create(cred, mp);
 #endif
 	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
+	TAILQ_INIT(&mp->mnt_uppers);
 	return (mp);
 }
 
@@ -512,6 +513,7 @@
 			vprint("", vp);
 		panic("unmount: dangling vnode");
 	}
+	KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
 	if (mp->mnt_activevnodelistsize != 0)
@@ -1260,7 +1262,8 @@
 	}
 
 	MNT_ILOCK(mp);
-	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
+	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
+	    !TAILQ_EMPTY(&mp->mnt_uppers)) {
 		MNT_IUNLOCK(mp);
 		if (coveredvp)
 			VOP_UNLOCK(coveredvp, 0);
Index: kern/vfs_subr.c
===================================================================
--- kern/vfs_subr.c	(revision 241191)
+++ kern/vfs_subr.c	(working copy)
@@ -2689,7 +2689,59 @@
 	VI_UNLOCK(vp);
 }
 
+static void
+vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
+    struct vnode *lowervp __unused)
+{
+}
+
 /*
+ * Notify upper mounts about reclaimed vnode.
+ */
+static void
+vgonel_reclaim_lowervp(struct vnode *vp)
+{
+	static struct vfsops vgonel_vfsops = {
+		.vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
+	};
+	struct mount *mp, *ump, *mmp;
+
+	mp = vp->v_mount;
+	if (mp == NULL)
+		return;
+
+	MNT_ILOCK(mp);
+	if (TAILQ_EMPTY(&mp->mnt_uppers))
+		goto unlock;
+	MNT_IUNLOCK(mp);
+	mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
+	mmp->mnt_op = &vgonel_vfsops;
+	mmp->mnt_kern_flag |= MNTK_MARKER;
+	MNT_ILOCK(mp);
+	mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
+	for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
+		if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
+			ump = TAILQ_NEXT(ump, mnt_upper_link);
+			continue;
+		}
+		TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
+		MNT_IUNLOCK(mp);
+		VFS_RECLAIM_LOWERVP(ump, vp);
+		MNT_ILOCK(mp);
+		ump = TAILQ_NEXT(mmp, mnt_upper_link);
+		TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
+	}
+	free(mmp, M_TEMP);
+	mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
+	if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
+		mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
+		wakeup(&mp->mnt_uppers);
+	}
+unlock:
+	MNT_IUNLOCK(mp);
+}
+
+/*
  * vgone, with the vp interlock held.
  */
 void
@@ -2713,6 +2765,7 @@
 	if (vp->v_iflag & VI_DOOMED)
 		return;
 	vp->v_iflag |= VI_DOOMED;
+
 	/*
 	 * Check to see if the vnode is in use.  If so, we have to call
 	 * VOP_CLOSE() and VOP_INACTIVE().
@@ -2720,6 +2773,8 @@
 	active = vp->v_usecount;
 	oweinact = (vp->v_iflag & VI_OWEINACT);
 	VI_UNLOCK(vp);
+	vgonel_reclaim_lowervp(vp);
+
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
Index: sys/mount.h
===================================================================
--- sys/mount.h	(revision 241191)
+++ sys/mount.h	(working copy)
@@ -189,6 +189,8 @@
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
+	TAILQ_ENTRY(mount) mnt_upper_link;	/* (m) we in the all uppers */
+	TAILQ_HEAD(, mount) mnt_uppers;		/* (m) upper mounts over us*/
 };
 
 /*
@@ -374,6 +376,10 @@
 #define	MNTK_NO_IOPF	0x00000100	/* Disallow page faults during reads
 					   and writes. Filesystem shall properly
 					   handle i/o state on EFAULT. */
+#define	MNTK_VGONE_UPPER	0x00000200
+#define	MNTK_VGONE_WAITER	0x00000200
+#define	MNTK_MARKER		0x00000400
+#define	MNTK_LOOKUP_EXCL_DOTDOT	0x00000800
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
@@ -629,6 +635,7 @@
 typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
 		    struct sysctl_req *req);
 typedef void vfs_susp_clean_t(struct mount *mp);
+typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp);
 
 struct vfsops {
 	vfs_mount_t		*vfs_mount;
@@ -646,6 +653,7 @@
 	vfs_extattrctl_t	*vfs_extattrctl;
 	vfs_sysctl_t		*vfs_sysctl;
 	vfs_susp_clean_t	*vfs_susp_clean;
+	vfs_reclaim_lowervp_t	*vfs_reclaim_lowervp;
 };
 
 vfs_statfs_t	__vfs_statfs;
@@ -671,6 +679,9 @@
 #define	VFS_SUSP_CLEAN(MP) \
 	({if (*(MP)->mnt_op->vfs_susp_clean != NULL)		\
 	       (*(MP)->mnt_op->vfs_susp_clean)(MP); })
+#define	VFS_RECLAIM_LOWERVP(MP, VP)				\
+	({if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL)	\
+		(*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); })
 
 #define	VFS_NEEDSGIANT_(MP)						\
     ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20121004/fbe38eec/attachment.pgp