svn commit: r207106 - in projects/suj/head/sys: sys ufs/ffs
Jeff Roberson
jeff at FreeBSD.org
Fri Apr 23 09:09:39 UTC 2010
Author: jeff
Date: Fri Apr 23 09:09:39 2010
New Revision: 207106
URL: http://svn.freebsd.org/changeset/base/207106
Log:
Fix snapshots on SUJ protected filesystems.
- Expunge the journal file from the snapshot filesystem. This leaves it
present with a zero byte file.
- Unfortunately expunging is insufficient to prevent a deadlock between
the snaplk and jwait. Implement a B_NOCOPY flag that causes a buf write
to skip ffs_copyonwrite all together.
- When removing a snapshot sync the re-written indirect blocks before
calling truncate. This is a long-standing snapshot bug that applies to
any softdep protected ffs filesystem. The dirtied indirect blocks have
no dependencies so softdep_setup_freeblocks will discard their contents
and use the physical disk block addresses in indir_trunc() when removing
the file. This can lead to indir_trunc discovering block pointers with
BLK_NOCOPY and BLK_SNAP values.
Modified:
projects/suj/head/sys/sys/buf.h
projects/suj/head/sys/ufs/ffs/ffs_extern.h
projects/suj/head/sys/ufs/ffs/ffs_snapshot.c
projects/suj/head/sys/ufs/ffs/ffs_softdep.c
projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
Modified: projects/suj/head/sys/sys/buf.h
==============================================================================
--- projects/suj/head/sys/sys/buf.h Fri Apr 23 08:49:38 2010 (r207105)
+++ projects/suj/head/sys/sys/buf.h Fri Apr 23 09:09:39 2010 (r207106)
@@ -215,7 +215,7 @@ struct buf {
#define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_00800000 0x00800000 /* Available flag. */
-#define B_01000000 0x01000000 /* Available flag. */
+#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
#define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */
#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
#define B_MANAGED 0x08000000 /* Managed by FS. */
Modified: projects/suj/head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_extern.h Fri Apr 23 08:49:38 2010 (r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_extern.h Fri Apr 23 09:09:39 2010 (r207106)
@@ -145,6 +145,8 @@ int softdep_process_worklist(struct
int softdep_fsync(struct vnode *);
int softdep_waitidle(struct mount *);
int softdep_prealloc(struct vnode *, int);
+int softdep_journal_lookup(struct mount *, struct vnode **);
+
int ffs_rdonly(struct inode *);
Modified: projects/suj/head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_snapshot.c Fri Apr 23 08:49:38 2010 (r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_snapshot.c Fri Apr 23 09:09:39 2010 (r207106)
@@ -142,7 +142,7 @@ MTX_SYSINIT(ffs_snapfree, &snapfree_lock
static int cgaccount(int, struct vnode *, struct buf *, int);
static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
- ufs_lbn_t, int), int);
+ ufs_lbn_t, int), int, int);
static int indiracct_ufs1(struct vnode *, struct vnode *, int,
ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
@@ -155,7 +155,7 @@ static int mapacct_ufs1(struct vnode *,
struct fs *, ufs_lbn_t, int);
static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
- ufs_lbn_t, int), int);
+ ufs_lbn_t, int), int, int);
static int indiracct_ufs2(struct vnode *, struct vnode *, int,
ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
@@ -591,10 +591,10 @@ loop:
snaplistsize += 1;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
- BLK_NOCOPY);
+ BLK_NOCOPY, 1);
else
error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
- BLK_NOCOPY);
+ BLK_NOCOPY, 1);
if (blkno)
DIP_SET(xp, i_db[loc], blkno);
if (!error)
@@ -613,6 +613,26 @@ loop:
}
MNT_IUNLOCK(mp);
/*
+ * Erase the journal file from the snapshot.
+ */
+ if (fs->fs_flags & FS_SUJ) {
+ error = softdep_journal_lookup(mp, &xvp);
+ if (error) {
+ free(copy_fs->fs_csp, M_UFSMNT);
+ bawrite(sbp);
+ sbp = NULL;
+ goto out1;
+ }
+ xp = VTOI(xvp);
+ if (xp->i_ump->um_fstype == UFS1)
+ error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
+ BLK_NOCOPY, 0);
+ else
+ error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
+ BLK_NOCOPY, 0);
+ vput(xvp);
+ }
+ /*
* Acquire a lock on the snapdata structure, creating it if necessary.
*/
sn = ffs_snapdata_acquire(devvp);
@@ -692,10 +712,10 @@ out1:
break;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
- BLK_SNAP);
+ BLK_SNAP, 0);
else
error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
- BLK_SNAP);
+ BLK_SNAP, 0);
if (error == 0 && xp->i_effnlink == 0) {
error = ffs_freefile(ump,
copy_fs,
@@ -720,9 +740,11 @@ out1:
* the list of allocated blocks in i_snapblklist.
*/
if (ip->i_ump->um_fstype == UFS1)
- error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
+ error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
+ BLK_SNAP, 0);
else
- error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
+ error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
+ BLK_SNAP, 0);
if (error) {
fs->fs_snapinum[snaploc] = 0;
free(snapblklist, M_UFSMNT);
@@ -955,13 +977,14 @@ cgaccount(cg, vp, nbp, passno)
* is reproduced once each for UFS1 and UFS2.
*/
static int
-expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
+expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
struct vnode *snapvp;
struct inode *cancelip;
struct fs *fs;
int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
struct fs *, ufs_lbn_t, int);
int expungetype;
+ int clearmode;
{
int i, error, indiroff;
ufs_lbn_t lbn, rlbn;
@@ -1006,7 +1029,7 @@ expunge_ufs1(snapvp, cancelip, fs, acctf
*/
dip = (struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, cancelip->i_number);
- if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0)
+ if (clearmode || cancelip->i_effnlink == 0)
dip->di_mode = 0;
dip->di_size = 0;
dip->di_blocks = 0;
@@ -1235,13 +1258,14 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs,
* is reproduced once each for UFS1 and UFS2.
*/
static int
-expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
+expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
struct vnode *snapvp;
struct inode *cancelip;
struct fs *fs;
int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
struct fs *, ufs_lbn_t, int);
int expungetype;
+ int clearmode;
{
int i, error, indiroff;
ufs_lbn_t lbn, rlbn;
@@ -1286,7 +1310,7 @@ expunge_ufs2(snapvp, cancelip, fs, acctf
*/
dip = (struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, cancelip->i_number);
- if (expungetype == BLK_NOCOPY)
+ if (clearmode || cancelip->i_effnlink == 0)
dip->di_mode = 0;
dip->di_size = 0;
dip->di_blocks = 0;
@@ -1658,6 +1682,13 @@ ffs_snapremove(vp)
ip->i_flags &= ~SF_SNAPSHOT;
DIP_SET(ip, i_flags, ip->i_flags);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ /*
+ * The dirtied indirects must be written out before
+ * softdep_setup_freeblocks() is called. Otherwise indir_trunc()
+ * may find indirect pointers using the magic BLK_* values.
+ */
+ if (DOINGSOFTDEP(vp))
+ ffs_syncvnode(vp, MNT_WAIT);
#ifdef QUOTA
/*
* Reenable disk quotas for ex-snapshot file.
Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_softdep.c Fri Apr 23 08:49:38 2010 (r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c Fri Apr 23 09:09:39 2010 (r207106)
@@ -2052,27 +2052,16 @@ jblocks_add(jblocks, daddr, blocks)
return;
}
-/*
- * Open and verify the journal file.
- */
-static int
-journal_mount(mp, fs, cred)
+int
+softdep_journal_lookup(mp, vpp)
struct mount *mp;
- struct fs *fs;
- struct ucred *cred;
+ struct vnode **vpp;
{
struct componentname cnp;
- struct jblocks *jblocks;
struct vnode *dvp;
- struct vnode *vp;
- struct inode *ip;
- ufs2_daddr_t blkno;
ino_t sujournal;
- int bcount;
int error;
- int i;
- mp->mnt_kern_flag |= MNTK_SUJ;
error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
if (error)
return (error);
@@ -2086,13 +2075,35 @@ journal_mount(mp, fs, cred)
cnp.cn_namelen = strlen(SUJ_FILE);
error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
vput(dvp);
+ if (error != 0)
+ return (error);
+ error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
+ return (error);
+}
+
+/*
+ * Open and verify the journal file.
+ */
+static int
+journal_mount(mp, fs, cred)
+ struct mount *mp;
+ struct fs *fs;
+ struct ucred *cred;
+{
+ struct jblocks *jblocks;
+ struct vnode *vp;
+ struct inode *ip;
+ ufs2_daddr_t blkno;
+ int bcount;
+ int error;
+ int i;
+
+ mp->mnt_kern_flag |= MNTK_SUJ;
+ error = softdep_journal_lookup(mp, &vp);
if (error != 0) {
printf("Failed to find journal. Use tunefs to create one\n");
return (error);
}
- error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp);
- if (error)
- return (error);
ip = VTOI(vp);
if (ip->i_size < SUJ_MIN) {
error = ENOSPC;
@@ -2588,6 +2599,7 @@ softdep_process_journal(mp, flags)
bp->b_bcount = size;
bp->b_bufobj = &ump->um_devvp->v_bufobj;
bp->b_flags &= ~B_INVAL;
+ bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
/*
* Initialize our jseg with cnt records. Assign the next
* sequence number to it and link it in-order.
Modified: projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Fri Apr 23 08:49:38 2010 (r207105)
+++ projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Fri Apr 23 09:09:39 2010 (r207106)
@@ -1948,6 +1948,7 @@ ffs_geom_strategy(struct bufobj *bo, str
struct vnode *vp;
int error;
struct buf *tbp;
+ int nocopy;
vp = bo->__bo_vnode;
if (bp->b_iocmd == BIO_WRITE) {
@@ -1955,8 +1956,9 @@ ffs_geom_strategy(struct bufobj *bo, str
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
(bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
panic("ffs_geom_strategy: bad I/O");
- bp->b_flags &= ~B_VALIDSUSPWRT;
- if ((vp->v_vflag & VV_COPYONWRITE) &&
+ nocopy = bp->b_flags & B_NOCOPY;
+ bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
+ if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
vp->v_rdev->si_snapdata != NULL) {
if ((bp->b_flags & B_CLUSTER) != 0) {
runningbufwakeup(bp);
More information about the svn-src-projects
mailing list