svn commit: r207106 - in projects/suj/head/sys: sys ufs/ffs

Jeff Roberson jeff at FreeBSD.org
Fri Apr 23 09:09:39 UTC 2010


Author: jeff
Date: Fri Apr 23 09:09:39 2010
New Revision: 207106
URL: http://svn.freebsd.org/changeset/base/207106

Log:
  Fix snapshots on SUJ protected filesystems.
   - Expunge the journal file from the snapshot filesystem.  This leaves it
     present with a zero byte file.
   - Unfortunately expunging is insufficient to prevent a deadlock between
     the snaplk and jwait.  Implement a B_NOCOPY flag that causes a buf write
     to skip ffs_copyonwrite all together.
   - When removing a snapshot sync the re-written indirect blocks before
     calling truncate.  This is a long-standing snapshot bug that applies to
     any softdep protected ffs filesystem.  The dirtied indirect blocks have
     no dependencies so softdep_setup_freeblocks will discard their contents
     and use the physical disk block addresses in indir_trunc() when removing
     the file.  This can lead to indir_trunc discovering block pointers with
     BLK_NOCOPY and BLK_SNAP values.

Modified:
  projects/suj/head/sys/sys/buf.h
  projects/suj/head/sys/ufs/ffs/ffs_extern.h
  projects/suj/head/sys/ufs/ffs/ffs_snapshot.c
  projects/suj/head/sys/ufs/ffs/ffs_softdep.c
  projects/suj/head/sys/ufs/ffs/ffs_vfsops.c

Modified: projects/suj/head/sys/sys/buf.h
==============================================================================
--- projects/suj/head/sys/sys/buf.h	Fri Apr 23 08:49:38 2010	(r207105)
+++ projects/suj/head/sys/sys/buf.h	Fri Apr 23 09:09:39 2010	(r207106)
@@ -215,7 +215,7 @@ struct buf {
 #define	B_DIRTY		0x00200000	/* Needs writing later (in EXT2FS). */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_00800000	0x00800000	/* Available flag. */
-#define	B_01000000	0x01000000	/* Available flag. */
+#define	B_NOCOPY	0x01000000	/* Don't copy-on-write this buf. */
 #define	B_NEEDSGIANT	0x02000000	/* Buffer's vnode needs giant. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define B_MANAGED	0x08000000	/* Managed by FS. */

Modified: projects/suj/head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_extern.h	Fri Apr 23 08:49:38 2010	(r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_extern.h	Fri Apr 23 09:09:39 2010	(r207106)
@@ -145,6 +145,8 @@ int     softdep_process_worklist(struct 
 int     softdep_fsync(struct vnode *);
 int	softdep_waitidle(struct mount *);
 int	softdep_prealloc(struct vnode *, int);
+int	softdep_journal_lookup(struct mount *, struct vnode **);
+
 
 int	ffs_rdonly(struct inode *);
 

Modified: projects/suj/head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_snapshot.c	Fri Apr 23 08:49:38 2010	(r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_snapshot.c	Fri Apr 23 09:09:39 2010	(r207106)
@@ -142,7 +142,7 @@ MTX_SYSINIT(ffs_snapfree, &snapfree_lock
 static int cgaccount(int, struct vnode *, struct buf *, int);
 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
     int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
-    ufs_lbn_t, int), int);
+    ufs_lbn_t, int), int, int);
 static int indiracct_ufs1(struct vnode *, struct vnode *, int,
     ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
     int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
@@ -155,7 +155,7 @@ static int mapacct_ufs1(struct vnode *, 
     struct fs *, ufs_lbn_t, int);
 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
     int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
-    ufs_lbn_t, int), int);
+    ufs_lbn_t, int), int, int);
 static int indiracct_ufs2(struct vnode *, struct vnode *, int,
     ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
     int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
@@ -591,10 +591,10 @@ loop:
 		snaplistsize += 1;
 		if (xp->i_ump->um_fstype == UFS1)
 			error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
-			    BLK_NOCOPY);
+			    BLK_NOCOPY, 1);
 		else
 			error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
-			    BLK_NOCOPY);
+			    BLK_NOCOPY, 1);
 		if (blkno)
 			DIP_SET(xp, i_db[loc], blkno);
 		if (!error)
@@ -613,6 +613,26 @@ loop:
 	}
 	MNT_IUNLOCK(mp);
 	/*
+	 * Erase the journal file from the snapshot.
+	 */
+	if (fs->fs_flags & FS_SUJ) {
+		error = softdep_journal_lookup(mp, &xvp);
+		if (error) {
+			free(copy_fs->fs_csp, M_UFSMNT);
+			bawrite(sbp);
+			sbp = NULL;
+			goto out1;
+		}
+		xp = VTOI(xvp);
+		if (xp->i_ump->um_fstype == UFS1)
+			error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
+			    BLK_NOCOPY, 0);
+		else
+			error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
+			    BLK_NOCOPY, 0);
+		vput(xvp);
+	}
+	/*
 	 * Acquire a lock on the snapdata structure, creating it if necessary.
 	 */
 	sn = ffs_snapdata_acquire(devvp);
@@ -692,10 +712,10 @@ out1:
 			break;
 		if (xp->i_ump->um_fstype == UFS1)
 			error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
-			    BLK_SNAP);
+			    BLK_SNAP, 0);
 		else
 			error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
-			    BLK_SNAP);
+			    BLK_SNAP, 0);
 		if (error == 0 && xp->i_effnlink == 0) {
 			error = ffs_freefile(ump,
 					     copy_fs,
@@ -720,9 +740,11 @@ out1:
 	 * the list of allocated blocks in i_snapblklist.
 	 */
 	if (ip->i_ump->um_fstype == UFS1)
-		error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
+		error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
+		    BLK_SNAP, 0);
 	else
-		error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
+		error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
+		    BLK_SNAP, 0);
 	if (error) {
 		fs->fs_snapinum[snaploc] = 0;
 		free(snapblklist, M_UFSMNT);
@@ -955,13 +977,14 @@ cgaccount(cg, vp, nbp, passno)
  * is reproduced once each for UFS1 and UFS2.
  */
 static int
-expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
+expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
 	struct vnode *snapvp;
 	struct inode *cancelip;
 	struct fs *fs;
 	int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
 	    struct fs *, ufs_lbn_t, int);
 	int expungetype;
+	int clearmode;
 {
 	int i, error, indiroff;
 	ufs_lbn_t lbn, rlbn;
@@ -1006,7 +1029,7 @@ expunge_ufs1(snapvp, cancelip, fs, acctf
 	 */
 	dip = (struct ufs1_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, cancelip->i_number);
-	if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0)
+	if (clearmode || cancelip->i_effnlink == 0)
 		dip->di_mode = 0;
 	dip->di_size = 0;
 	dip->di_blocks = 0;
@@ -1235,13 +1258,14 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, 
  * is reproduced once each for UFS1 and UFS2.
  */
 static int
-expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
+expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
 	struct vnode *snapvp;
 	struct inode *cancelip;
 	struct fs *fs;
 	int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
 	    struct fs *, ufs_lbn_t, int);
 	int expungetype;
+	int clearmode;
 {
 	int i, error, indiroff;
 	ufs_lbn_t lbn, rlbn;
@@ -1286,7 +1310,7 @@ expunge_ufs2(snapvp, cancelip, fs, acctf
 	 */
 	dip = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, cancelip->i_number);
-	if (expungetype == BLK_NOCOPY)
+	if (clearmode || cancelip->i_effnlink == 0)
 		dip->di_mode = 0;
 	dip->di_size = 0;
 	dip->di_blocks = 0;
@@ -1658,6 +1682,13 @@ ffs_snapremove(vp)
 	ip->i_flags &= ~SF_SNAPSHOT;
 	DIP_SET(ip, i_flags, ip->i_flags);
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * The dirtied indirects must be written out before
+	 * softdep_setup_freeblocks() is called.  Otherwise indir_trunc()
+	 * may find indirect pointers using the magic BLK_* values.
+	 */
+	if (DOINGSOFTDEP(vp))
+		ffs_syncvnode(vp, MNT_WAIT);
 #ifdef QUOTA
 	/*
 	 * Reenable disk quotas for ex-snapshot file.

Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Fri Apr 23 08:49:38 2010	(r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Fri Apr 23 09:09:39 2010	(r207106)
@@ -2052,27 +2052,16 @@ jblocks_add(jblocks, daddr, blocks)
 	return;
 }
 
-/*
- * Open and verify the journal file.
- */
-static int
-journal_mount(mp, fs, cred)
+int
+softdep_journal_lookup(mp, vpp)
 	struct mount *mp;
-	struct fs *fs;
-	struct ucred *cred;
+	struct vnode **vpp;
 {
 	struct componentname cnp;
-	struct jblocks *jblocks;
 	struct vnode *dvp;
-	struct vnode *vp;
-	struct inode *ip;
-	ufs2_daddr_t blkno;
 	ino_t sujournal;
-	int bcount;
 	int error;
-	int i;
 
-	mp->mnt_kern_flag |= MNTK_SUJ;
 	error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
 	if (error)
 		return (error);
@@ -2086,13 +2075,35 @@ journal_mount(mp, fs, cred)
 	cnp.cn_namelen = strlen(SUJ_FILE);
 	error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
 	vput(dvp);
+	if (error != 0)
+		return (error);
+	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
+	return (error);
+}
+
+/*
+ * Open and verify the journal file.
+ */
+static int
+journal_mount(mp, fs, cred)
+	struct mount *mp;
+	struct fs *fs;
+	struct ucred *cred;
+{
+	struct jblocks *jblocks;
+	struct vnode *vp;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	int bcount;
+	int error;
+	int i;
+
+	mp->mnt_kern_flag |= MNTK_SUJ;
+	error = softdep_journal_lookup(mp, &vp);
 	if (error != 0) {
 		printf("Failed to find journal.  Use tunefs to create one\n");
 		return (error);
 	}
-	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp);
-	if (error)
-		return (error);
 	ip = VTOI(vp);
 	if (ip->i_size < SUJ_MIN) {
 		error = ENOSPC;
@@ -2588,6 +2599,7 @@ softdep_process_journal(mp, flags)
 		bp->b_bcount = size;
 		bp->b_bufobj = &ump->um_devvp->v_bufobj;
 		bp->b_flags &= ~B_INVAL;
+		bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
 		/*
 		 * Initialize our jseg with cnt records.  Assign the next
 		 * sequence number to it and link it in-order.

Modified: projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_vfsops.c	Fri Apr 23 08:49:38 2010	(r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_vfsops.c	Fri Apr 23 09:09:39 2010	(r207106)
@@ -1948,6 +1948,7 @@ ffs_geom_strategy(struct bufobj *bo, str
 	struct vnode *vp;
 	int error;
 	struct buf *tbp;
+	int nocopy;
 
 	vp = bo->__bo_vnode;
 	if (bp->b_iocmd == BIO_WRITE) {
@@ -1955,8 +1956,9 @@ ffs_geom_strategy(struct bufobj *bo, str
 		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
 		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 			panic("ffs_geom_strategy: bad I/O");
-		bp->b_flags &= ~B_VALIDSUSPWRT;
-		if ((vp->v_vflag & VV_COPYONWRITE) &&
+		nocopy = bp->b_flags & B_NOCOPY;
+		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
+		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
 		    vp->v_rdev->si_snapdata != NULL) {
 			if ((bp->b_flags & B_CLUSTER) != 0) {
 				runningbufwakeup(bp);


More information about the svn-src-projects mailing list