Deadlock between nfsd and snapshots.

Kostik Belousov kostikbel at gmail.com
Wed Aug 23 11:08:17 UTC 2006


Attached is the prototype change. System booted from the modified kernel
survived cyclic snapshotting of the exported partition. The partition was
also mounted by loopback nfs, and the loop of extracting perl-5.8.8 from
archive, grepping tree for foobar and removing it run over nfs.

I have at least one questions:

> > Protecting the existing i_flag and the timestamps with the vnode
> > interlock when the current thread only has a shared vnode lock should
> > be sufficient to protect against the races, removing the need for #3,
> > #4 and #4 below.
Could you, please, explain this point ? I did not wrap all accesses to
i_flag and timestamps with vnode interlock, only ufs_itimes, ufs_lazyaccess
and ufs_getattr for now.

Index: ffs/ffs_snapshot.c
===================================================================
RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_snapshot.c,v
retrieving revision 1.128
diff -u -r1.128 ffs_snapshot.c
--- ffs/ffs_snapshot.c	21 Aug 2006 17:20:19 -0000	1.128
+++ ffs/ffs_snapshot.c	23 Aug 2006 10:59:24 -0000
@@ -2322,6 +2322,8 @@
  loop:
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
 		VI_LOCK(vp);
+		if (vp->v_type == VREG)
+			ufs_lazyaccess(vp);
 		if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT ||
 		    vp->v_usecount > 0 ||
 		    vp->v_type == VNON) {
Index: ufs/inode.h
===================================================================
RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/inode.h,v
retrieving revision 1.49
diff -u -r1.49 inode.h
--- ufs/inode.h	14 Mar 2005 10:21:16 -0000	1.49
+++ ufs/inode.h	23 Aug 2006 10:59:24 -0000
@@ -119,6 +119,7 @@
 #define	IN_RENAME	0x0010		/* Inode is being renamed. */
 #define	IN_LAZYMOD	0x0040		/* Modified, but don't write yet. */
 #define	IN_SPACECOUNTED	0x0080		/* Blocks to be freed in free count. */
+#define	IN_LAZYACCESS	0x0100		/* Process IN_ACCESS after the suspension finished */
 
 #define i_devvp i_ump->um_devvp
 #define i_umbufobj i_ump->um_bo
Index: ufs/ufs_extern.h
===================================================================
RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_extern.h,v
retrieving revision 1.55
diff -u -r1.55 ufs_extern.h
--- ufs/ufs_extern.h	14 Mar 2005 10:21:16 -0000	1.55
+++ ufs/ufs_extern.h	23 Aug 2006 10:59:24 -0000
@@ -74,6 +74,7 @@
 int	 ufs_inactive(struct vop_inactive_args *);
 int	 ufs_init(struct vfsconf *);
 void	 ufs_itimes(struct vnode *vp);
+void	 ufs_lazyaccess(struct vnode *vp);
 int	 ufs_lookup(struct vop_cachedlookup_args *);
 int	 ufs_readdir(struct vop_readdir_args *);
 int	 ufs_reclaim(struct vop_reclaim_args *);
Index: ufs/ufs_vnops.c
===================================================================
RCS file: /usr/local/arch/ncvs/src/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.277
diff -u -r1.277 ufs_vnops.c
--- ufs/ufs_vnops.c	31 May 2006 15:55:52 -0000	1.277
+++ ufs/ufs_vnops.c	23 Aug 2006 10:59:24 -0000
@@ -128,31 +128,70 @@
 {
 	struct inode *ip;
 	struct timespec ts;
+	int mnt_locked;
 
 	ip = VTOI(vp);
+	mnt_locked = 0;
+
+	if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
+		VI_LOCK(vp);
+		goto out;
+	}
+
+	vfs_timestamp(&ts);
+
+	MNT_ILOCK(vp->v_mount);		/* For reading of mnt_kern_flags */
+	mnt_locked = 1;
+
+	VI_LOCK(vp);
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
-		return;
+		goto out_unl;
+
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
 		ip->i_flag |= IN_LAZYMOD;
-	else
+	else if (((vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) == 0) ||
+		 ((ip->i_flag & (IN_CHANGE | IN_UPDATE)) != 0))
 		ip->i_flag |= IN_MODIFIED;
-	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
-		vfs_timestamp(&ts);
-		if (ip->i_flag & IN_ACCESS) {
-			DIP_SET(ip, i_atime, ts.tv_sec);
-			DIP_SET(ip, i_atimensec, ts.tv_nsec);
-		}
-		if (ip->i_flag & IN_UPDATE) {
-			DIP_SET(ip, i_mtime, ts.tv_sec);
-			DIP_SET(ip, i_mtimensec, ts.tv_nsec);
-			ip->i_modrev++;
-		}
-		if (ip->i_flag & IN_CHANGE) {
-			DIP_SET(ip, i_ctime, ts.tv_sec);
-			DIP_SET(ip, i_ctimensec, ts.tv_nsec);
-		}
+	else if ((ip->i_flag & IN_ACCESS) != 0)
+		ip->i_flag |= IN_LAZYACCESS;
+	
+	if (ip->i_flag & IN_ACCESS) {
+		DIP_SET(ip, i_atime, ts.tv_sec);
+		DIP_SET(ip, i_atimensec, ts.tv_nsec);
+	}
+	if (ip->i_flag & IN_UPDATE) {
+		DIP_SET(ip, i_mtime, ts.tv_sec);
+		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
+		ip->i_modrev++;
+	}
+	if (ip->i_flag & IN_CHANGE) {
+		DIP_SET(ip, i_ctime, ts.tv_sec);
+		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
 	}
+
+ out:
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+ out_unl:
+	VI_UNLOCK(vp);
+	if (mnt_locked)
+		MNT_IUNLOCK(vp->v_mount);
+}
+
+/*
+ * Clear the IN_LAZYACCESS i_flag. vnode shall be interlocked.
+ */
+
+void
+ufs_lazyaccess(vp)
+	struct vnode *vp;
+{
+	struct inode *ip;
+
+	ip = VTOI(vp);
+	if ((ip->i_flag & IN_LAZYACCESS) != 0) {
+		ip->i_flag &= ~IN_LAZYACCESS;
+		ip->i_flag |= IN_MODIFIED;
+	}
 }
 
 /*
@@ -266,11 +305,15 @@
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
+	int usecount;
 
 	VI_LOCK(vp);
-	if (vp->v_usecount > 1)
-		ufs_itimes(vp);
+	usecount = vp->v_usecount;
 	VI_UNLOCK(vp);
+
+	if (usecount > 1)
+		ufs_itimes(vp);
+
 	return (0);
 }
 
@@ -378,8 +421,10 @@
 	if (ip->i_ump->um_fstype == UFS1) {
 		vap->va_rdev = ip->i_din1->di_rdev;
 		vap->va_size = ip->i_din1->di_size;
+		VI_LOCK(vp);
 		vap->va_atime.tv_sec = ip->i_din1->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
+		VI_UNLOCK(vp);
 		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
@@ -390,8 +435,10 @@
 	} else {
 		vap->va_rdev = ip->i_din2->di_rdev;
 		vap->va_size = ip->i_din2->di_size;
+		VI_LOCK(vp);
 		vap->va_atime.tv_sec = ip->i_din2->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
+		VI_UNLOCK(vp);
 		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
@@ -400,7 +447,9 @@
 		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
 	}
+	VI_LOCK(vp);
 	vap->va_flags = ip->i_flags;
+	VI_UNLOCK(vp);
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_type = IFTOVT(ip->i_mode);
@@ -1992,11 +2041,15 @@
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
+	int usecount;
 
 	VI_LOCK(vp);
-	if (vp->v_usecount > 1)
-		ufs_itimes(vp);
+	usecount = vp->v_usecount;
 	VI_UNLOCK(vp);
+
+	if (usecount > 1)
+		ufs_itimes(vp);
+
 	return (fifo_specops.vop_close(ap));
 }
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-fs/attachments/20060823/f69decd7/attachment.pgp


More information about the freebsd-fs mailing list