svn commit: r223020 - in head/sys/ufs: ffs ufs

Kirk McKusick mckusick at FreeBSD.org
Sun Jun 12 19:27:06 UTC 2011


Author: mckusick
Date: Sun Jun 12 19:27:05 2011
New Revision: 223020
URL: http://svn.freebsd.org/changeset/base/223020

Log:
  Update to soft updates journaling to properly track freed blocks
  that get claimed by snapshots.
  
  Submitted by:	Jeff Roberson
  Tested by:	Peter Holm

Modified:
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_snapshot.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c	Sun Jun 12 18:52:39 2011	(r223019)
+++ head/sys/ufs/ffs/ffs_alloc.c	Sun Jun 12 19:27:05 2011	(r223020)
@@ -2035,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
 	 */
 	if (devvp->v_type != VREG &&
 	    (devvp->v_vflag & VV_COPYONWRITE) &&
-	    ffs_snapblkfree(fs, devvp, bno, size, inum)) {
+	    ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) {
 		return;
 	}
 	if (!ump->um_candelete) {

Modified: head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- head/sys/ufs/ffs/ffs_extern.h	Sun Jun 12 18:52:39 2011	(r223019)
+++ head/sys/ufs/ffs/ffs_extern.h	Sun Jun 12 19:27:05 2011	(r223020)
@@ -80,12 +80,14 @@ int	ffs_realloccg(struct inode *, ufs2_d
 	    ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
 int	ffs_sbupdate(struct ufsmount *, int, int);
 void	ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
-int	ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
+int	ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
+	    struct workhead *);
 void	ffs_snapremove(struct vnode *vp);
 int	ffs_snapshot(struct mount *mp, char *snapfile);
 void	ffs_snapshot_mount(struct mount *mp);
 void	ffs_snapshot_unmount(struct mount *mp);
 void	process_deferred_inactive(struct mount *mp);
+void	ffs_sync_snap(struct mount *, int);
 int	ffs_syncvnode(struct vnode *vp, int waitfor);
 int	ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
 int	ffs_update(struct vnode *, int);
@@ -149,6 +151,9 @@ int	softdep_prealloc(struct vnode *, int
 int	softdep_journal_lookup(struct mount *, struct vnode **);
 void	softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
 void	softdep_journal_fsync(struct inode *);
+void	softdep_buf_append(struct buf *, struct workhead *);
+void	softdep_inode_append(struct inode *, struct ucred *, struct workhead *);
+void	softdep_freework(struct workhead *);
 
 
 /*
@@ -161,4 +166,14 @@ void	softdep_journal_fsync(struct inode 
 
 int	ffs_rdonly(struct inode *);
 
+TAILQ_HEAD(snaphead, inode);
+
+struct snapdata {
+	LIST_ENTRY(snapdata) sn_link;
+	struct snaphead sn_head;
+	daddr_t sn_listsize;
+	daddr_t *sn_blklist;
+	struct lock sn_lock;
+};
+
 #endif /* !_UFS_FFS_EXTERN_H */

Modified: head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- head/sys/ufs/ffs/ffs_snapshot.c	Sun Jun 12 18:52:39 2011	(r223019)
+++ head/sys/ufs/ffs/ffs_snapshot.c	Sun Jun 12 19:27:05 2011	(r223020)
@@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile)
 }
 
 int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
 	struct fs *fs;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
+	struct workhead *wkhd;
 {
 	return (EINVAL);
 }
@@ -123,19 +124,16 @@ ffs_copyonwrite(devvp, bp)
 	return (EINVAL);
 }
 
+void
+ffs_sync_snap(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+}
+
 #else
 FEATURE(ffs_snapshot, "FFS snapshot support");
 
-TAILQ_HEAD(snaphead, inode);
-
-struct snapdata {
-	LIST_ENTRY(snapdata) sn_link;
-	struct snaphead sn_head;
-	daddr_t sn_listsize;
-	daddr_t *sn_blklist;
-	struct lock sn_lock;
-};
-
 LIST_HEAD(, snapdata) snapfree;
 static struct mtx snapfree_lock;
 MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
@@ -1635,7 +1633,7 @@ ffs_snapremove(vp)
 			DIP_SET(ip, i_db[blkno], 0);
 		else if ((dblk == blkstofrags(fs, blkno) &&
 		     ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
-		     ip->i_number))) {
+		     ip->i_number, NULL))) {
 			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
 			    btodb(fs->fs_bsize));
 			DIP_SET(ip, i_db[blkno], 0);
@@ -1660,7 +1658,7 @@ ffs_snapremove(vp)
 					((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
 				else if ((dblk == blkstofrags(fs, blkno) &&
 				     ffs_snapblkfree(fs, ip->i_devvp, dblk,
-				     fs->fs_bsize, ip->i_number))) {
+				     fs->fs_bsize, ip->i_number, NULL))) {
 					ip->i_din1->di_blocks -=
 					    btodb(fs->fs_bsize);
 					((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
@@ -1674,7 +1672,7 @@ ffs_snapremove(vp)
 				((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
 			else if ((dblk == blkstofrags(fs, blkno) &&
 			     ffs_snapblkfree(fs, ip->i_devvp, dblk,
-			     fs->fs_bsize, ip->i_number))) {
+			     fs->fs_bsize, ip->i_number, NULL))) {
 				ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
 				((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
 			}
@@ -1722,12 +1720,13 @@ ffs_snapremove(vp)
  * must always have been allocated from a BLK_NOCOPY location.
  */
 int
-ffs_snapblkfree(fs, devvp, bno, size, inum)
+ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd)
 	struct fs *fs;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
+	struct workhead *wkhd;
 {
 	struct buf *ibp, *cbp, *savedcbp = 0;
 	struct thread *td = curthread;
@@ -1825,6 +1824,17 @@ retry:
 				    "Grabonremove: snapino", ip->i_number,
 				    (intmax_t)lbn, inum);
 #endif
+			/*
+			 * If journaling is tracking this write we must add
+			 * the work to the inode or indirect being written.
+			 */
+			if (wkhd != NULL) {
+				if (lbn < NDADDR)
+					softdep_inode_append(ip,
+					    curthread->td_ucred, wkhd);
+				else
+					softdep_buf_append(ibp, wkhd);
+			}
 			if (lbn < NDADDR) {
 				DIP_SET(ip, i_db[lbn], bno);
 			} else if (ip->i_ump->um_fstype == UFS1) {
@@ -1902,6 +1912,8 @@ retry:
 	 * not be freed. Although space will be lost, the snapshot
 	 * will stay consistent.
 	 */
+	if (error != 0 && wkhd != NULL)
+		softdep_freework(wkhd);
 	lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
 	return (error);
 }
@@ -2400,6 +2412,42 @@ ffs_copyonwrite(devvp, bp)
 }
 
 /*
+ * sync snapshots to force freework records waiting on snapshots to claim
+ * blocks to free.
+ */
+void
+ffs_sync_snap(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+	struct snapdata *sn;
+	struct vnode *devvp;
+	struct vnode *vp;
+	struct inode *ip;
+
+	devvp = VFSTOUFS(mp)->um_devvp;
+	if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
+		return;
+	for (;;) {
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+		if (sn == NULL) {
+			VI_UNLOCK(devvp);
+			return;
+		}
+		if (lockmgr(&sn->sn_lock,
+		    LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+		    VI_MTX(devvp)) == 0)
+			break;
+	}
+	TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+		vp = ITOV(ip);
+		ffs_syncvnode(vp, waitfor);
+	}
+	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+}
+
+/*
  * Read the specified block into the given buffer.
  * Much of this boiler-plate comes from bwrite().
  */

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	Sun Jun 12 18:52:39 2011	(r223019)
+++ head/sys/ufs/ffs/ffs_softdep.c	Sun Jun 12 19:27:05 2011	(r223020)
@@ -584,6 +584,33 @@ softdep_get_depcounts(struct mount *mp,
 	*softdepactiveaccp = 0;
 }
 
+void
+softdep_buf_append(bp, wkhd)
+	struct buf *bp;
+	struct workhead *wkhd;
+{
+
+	panic("softdep_buf_appendwork called");
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+	struct inode *ip;
+	struct ucred *cred;
+	struct workhead *wkhd;
+{
+
+	panic("softdep_inode_appendwork called");
+}
+
+void
+softdep_freework(wkhd)
+	struct workhead *wkhd;
+{
+
+	panic("softdep_freework called");
+}
+
 #else
 
 FEATURE(softupdates, "FFS soft-updates support");
@@ -867,7 +894,7 @@ static	void freework_enqueue(struct free
 static	int handle_workitem_freeblocks(struct freeblks *, int);
 static	int handle_complete_freeblocks(struct freeblks *, int);
 static	void handle_workitem_indirblk(struct freework *);
-static	void handle_written_freework(struct freework *, int);
+static	void handle_written_freework(struct freework *);
 static	void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static	struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
 	    struct workhead *);
@@ -1632,6 +1659,7 @@ process_truncates(vp)
 		if (cgwait) {
 			FREE_LOCK(&lk);
 			sync_cgs(mp, MNT_WAIT);
+			ffs_sync_snap(mp, MNT_WAIT);
 			ACQUIRE_LOCK(&lk);
 			continue;
 		}
@@ -5922,7 +5950,7 @@ complete_trunc_indir(freework)
 	 */
 	if (bp == NULL)  {
 		if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
-			handle_written_freework(freework, 0);
+			handle_written_freework(freework);
 		else
 			WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
 			   &freework->fw_list);
@@ -5974,7 +6002,7 @@ out:
 	 */
 	if (totblks > datablocks)
 		return (0);
-	return (totblks - datablocks);
+	return (datablocks - totblks);
 }
 
 /*
@@ -7228,6 +7256,7 @@ freework_freeblock(freework)
 		cancel_jnewblk(jnewblk, &wkhd);
 		needj = 0;
 	} else if (needj) {
+		freework->fw_state |= DELAYEDFREE;
 		freeblks->fb_cgwait++;
 		WORKLIST_INSERT(&wkhd, &freework->fw_list);
 	}
@@ -7241,7 +7270,7 @@ freework_freeblock(freework)
 	 * made it to disk.  We can immediately free the freeblk.
 	 */
 	if (needj == 0)
-		handle_written_freework(freework, 0);
+		handle_written_freework(freework);
 }
 
 /*
@@ -7256,7 +7285,8 @@ freework_enqueue(freework)
 	struct freeblks *freeblks;
 
 	freeblks = freework->fw_freeblks;
-	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
+	if ((freework->fw_state & INPROGRESS) == 0)
+		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 	if ((freeblks->fb_state &
 	    (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
 	    LIST_EMPTY(&freeblks->fb_jblkdephd))
@@ -7282,13 +7312,14 @@ handle_workitem_indirblk(freework)
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	if (freework->fw_state & DEPCOMPLETE) {
-		handle_written_freework(freework, 0);
+		handle_written_freework(freework);
 		return;
 	}
 	if (freework->fw_off == NINDIR(fs)) {
 		freework_freeblock(freework);
 		return;
 	}
+	freework->fw_state |= INPROGRESS;
 	FREE_LOCK(&lk);
 	indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
 	    freework->fw_lbn);
@@ -7301,16 +7332,16 @@ handle_workitem_indirblk(freework)
  * the freeblks is added back to the worklist if there is more work to do.
  */
 static void
-handle_written_freework(freework, cgwrite)
+handle_written_freework(freework)
 	struct freework *freework;
-	int cgwrite;
 {
 	struct freeblks *freeblks;
 	struct freework *parent;
 
 	freeblks = freework->fw_freeblks;
 	parent = freework->fw_parent;
-	freeblks->fb_cgwait -= cgwrite;
+	if (freework->fw_state & DELAYEDFREE)
+		freeblks->fb_cgwait--;
 	freework->fw_state |= COMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
@@ -7552,6 +7583,8 @@ indir_trunc(freework, dbn, lbn)
 		return;
 	}
 	ACQUIRE_LOCK(&lk);
+	/* Protects against a race with complete_trunc_indir(). */
+	freework->fw_state &= ~INPROGRESS;
 	/*
 	 * If we have an indirdep we need to enforce the truncation order
 	 * and discard it when it is complete.
@@ -7675,7 +7708,7 @@ indir_trunc(freework, dbn, lbn)
 	if (freework->fw_blkno == dbn) {
 		freework->fw_state |= ALLCOMPLETE;
 		ACQUIRE_LOCK(&lk);
-		handle_written_freework(freework, 0);
+		handle_written_freework(freework);
 		FREE_LOCK(&lk);
 	}
 	return;
@@ -10368,8 +10401,7 @@ softdep_disk_write_complete(bp)
 			continue;
 
 		case D_FREEWORK:
-			/* Freework on an indirect block, not bmsafemap. */
-			handle_written_freework(WK_FREEWORK(wk), 0);
+			handle_written_freework(WK_FREEWORK(wk));
 			break;
 
 		case D_JSEGDEP:
@@ -10540,7 +10572,7 @@ handle_jwork(wkhd)
 			free_freedep(WK_FREEDEP(wk));
 			continue;
 		case D_FREEWORK:
-			handle_written_freework(WK_FREEWORK(wk), 1);
+			handle_written_freework(WK_FREEWORK(wk));
 			continue;
 		default:
 			panic("handle_jwork: Unknown type %s\n",
@@ -12738,6 +12770,53 @@ clear_inodedeps(td)
 	}
 }
 
+void
+softdep_buf_append(bp, wkhd)
+	struct buf *bp;
+	struct workhead *wkhd;
+{
+	struct worklist *wk;
+
+	ACQUIRE_LOCK(&lk);
+	while ((wk = LIST_FIRST(wkhd)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(&bp->b_dep, wk);
+	}
+	FREE_LOCK(&lk);
+
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+	struct inode *ip;
+	struct ucred *cred;
+	struct workhead *wkhd;
+{
+	struct buf *bp;
+	struct fs *fs;
+	int error;
+
+	fs = ip->i_fs;
+	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	    (int)fs->fs_bsize, cred, &bp);
+	if (error) {
+		softdep_freework(wkhd);
+		return;
+	}
+	softdep_buf_append(bp, wkhd);
+	bqrelse(bp);
+}
+
+void
+softdep_freework(wkhd)
+	struct workhead *wkhd;
+{
+
+	ACQUIRE_LOCK(&lk);
+	handle_jwork(wkhd);
+	FREE_LOCK(&lk);
+}
+
 /*
  * Function to determine if the buffer has outstanding dependencies
  * that will cause a roll-back if the buffer is written. If wantcount

Modified: head/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- head/sys/ufs/ufs/ufs_vnops.c	Sun Jun 12 18:52:39 2011	(r223019)
+++ head/sys/ufs/ufs/ufs_vnops.c	Sun Jun 12 19:27:05 2011	(r223020)
@@ -1838,6 +1838,8 @@ ufs_mkdir(ap)
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
+			if (DOINGSOFTDEP(tvp))
+				softdep_revert_link(dp, ip);
 			UFS_VFREE(tvp, ip->i_number, dmode);
 			vput(tvp);
 			return (error);
@@ -1850,6 +1852,8 @@ ufs_mkdir(ap)
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		if (DOINGSOFTDEP(tvp))
+			softdep_revert_link(dp, ip);
 		UFS_VFREE(tvp, ip->i_number, dmode);
 		vput(tvp);
 		return (error);
@@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
+			if (DOINGSOFTDEP(tvp))
+				softdep_revert_link(pdir, ip);
 			UFS_VFREE(tvp, ip->i_number, mode);
 			vput(tvp);
 			return (error);
@@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		if (DOINGSOFTDEP(tvp))
+			softdep_revert_link(pdir, ip);
 		UFS_VFREE(tvp, ip->i_number, mode);
 		vput(tvp);
 		return (error);


More information about the svn-src-all mailing list