git: d4a8f5bf1339 - main - Handle UFS/FFS file deletion from cylinder groups with check-hash failure.

From: Kirk McKusick <mckusick_at_FreeBSD.org>
Date: Mon, 07 Aug 2023 23:28:39 UTC
The branch main has been updated by mckusick:

URL: https://cgit.FreeBSD.org/src/commit/?id=d4a8f5bf133956e71c05edff6fa20b156e5f1bbf

commit d4a8f5bf133956e71c05edff6fa20b156e5f1bbf
Author:     Kirk McKusick <mckusick@FreeBSD.org>
AuthorDate: 2023-08-07 23:27:39 +0000
Commit:     Kirk McKusick <mckusick@FreeBSD.org>
CommitDate: 2023-08-07 23:28:11 +0000

    Handle UFS/FFS file deletion from cylinder groups with check-hash failure.
    
    When a file is deleted, its blocks need to be put back in the free
    block list and its inode needs to be put back in the inode free list.
    These lists reside in cylinder-group maps. If either some of its blocks
    or its inode reside in a cylinder-group map with a bad check hash
    it is not possible to free the associated resource. Since the cylinder
    group cannot be repaired until the filesystem is unmounted these
    resources cannot be freed. They simply accumulate in memory. And
    any attempt to unmount the filesystem loops forever trying to flush them.
    
    With this change, the resource update claims to succeed so that the
    file deletion can successfully complete. The filesystem is marked as
    requiring an fsck so that before the next time that the filesystem is
    mounted, the offending cylinder groups are reconstructed causing the
    lost resources to be reclaimed.
    
    A better solution would be to downgrade the filesystem to read-only,
    but that capability is not currently implemented.
    
    Reported-by:  Peter Holm
    Tested-by:    Peter Holm
    MFC-after:    1 week
    Sponsored-by: The FreeBSD Foundation
---
 sys/ufs/ffs/ffs_alloc.c   | 27 ++++++++++++++------
 sys/ufs/ffs/ffs_extern.h  |  4 +--
 sys/ufs/ffs/ffs_softdep.c | 65 +++++++++++++++++++++++++----------------------
 3 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index c5e2a706a128..e173253720c6 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -2295,9 +2295,14 @@ ffs_blkfree_cg(struct ufsmount *ump,
 		return;
 	}
 	if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
-		if (!ffs_fsfail_cleanup(ump, error) ||
-		    !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
+		if (!MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
 			return;
+		/*
+		 * Would like to just downgrade to read-only. Until that
+		 * capability is available, just toss the cylinder group
+		 * update and mark the filesystem as needing to run fsck.
+		 */
+		fs->fs_flags |= FS_NEEDSFSCK;
 		if (devvp->v_type == VREG)
 			dbn = fragstoblks(fs, cgtod(fs, cg));
 		else
@@ -2305,7 +2310,7 @@ ffs_blkfree_cg(struct ufsmount *ump,
 		error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
 		KASSERT(error == 0, ("getblkx failed"));
 		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
-		    numfrags(fs, size), dephd);
+		    numfrags(fs, size), dephd, true);
 		bp->b_flags |= B_RELBUF | B_NOCACHE;
 		bp->b_flags &= ~B_CACHE;
 		bawrite(bp);
@@ -2380,7 +2385,7 @@ ffs_blkfree_cg(struct ufsmount *ump,
 	mp = UFSTOVFS(ump);
 	if (MOUNTEDSOFTDEP(mp) && devvp->v_type == VCHR)
 		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
-		    numfrags(fs, size), dephd);
+		    numfrags(fs, size), dephd, false);
 	bdwrite(bp);
 }
 
@@ -2841,16 +2846,21 @@ ffs_freefile(struct ufsmount *ump,
 		panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
 		    devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
 	if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
-		if (!ffs_fsfail_cleanup(ump, error) ||
-		    !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
+		if (!MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
 			return (error);
+		/*
+		 * Would like to just downgrade to read-only. Until that
+		 * capability is available, just toss the cylinder group
+		 * update and mark the filesystem as needing to run fsck.
+		 */
+		fs->fs_flags |= FS_NEEDSFSCK;
 		if (devvp->v_type == VREG)
 			dbn = fragstoblks(fs, cgtod(fs, cg));
 		else
 			dbn = fsbtodb(fs, cgtod(fs, cg));
 		error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
 		KASSERT(error == 0, ("getblkx failed"));
-		softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd);
+		softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd, true);
 		bp->b_flags |= B_RELBUF | B_NOCACHE;
 		bp->b_flags &= ~B_CACHE;
 		bawrite(bp);
@@ -2880,7 +2890,7 @@ ffs_freefile(struct ufsmount *ump,
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type == VCHR)
-		softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd);
+		softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd, false);
 	bdwrite(bp);
 	return (0);
 }
@@ -2888,6 +2898,7 @@ ffs_freefile(struct ufsmount *ump,
 /*
  * Check to see if a file is free.
  * Used to check for allocated files in snapshots.
+ * Return 1 if file is free.
  */
 int
 ffs_checkfreefile(struct fs *fs,
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 46f1a71ed585..97213bc20d7c 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -194,9 +194,9 @@ void	softdep_setup_allocindir_meta(struct buf *, struct inode *,
 void	softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
 	    struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *);
 void	softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int,
-	    struct workhead *);
+	    struct workhead *, bool);
 void	softdep_setup_inofree(struct mount *, struct buf *, ino_t,
-	    struct workhead *);
+	    struct workhead *, bool);
 void	softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
 void	softdep_fsync_mountdev(struct vnode *);
 int	softdep_sync_metadata(struct vnode *);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 2606c17f7295..f671f529a04b 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -300,7 +300,8 @@ softdep_setup_blkfree(struct mount *mp,
 	struct buf *bp,
 	ufs2_daddr_t blkno,
 	int frags,
-	struct workhead *wkhd)
+	struct workhead *wkhd,
+	bool doingrecovery)
 {
 
 	panic("%s called", __FUNCTION__);
@@ -310,7 +311,8 @@ void
 softdep_setup_inofree(struct mount *mp,
 	struct buf *bp,
 	ino_t ino,
-	struct workhead *wkhd)
+	struct workhead *wkhd,
+	bool doingrecovery)
 {
 
 	panic("%s called", __FUNCTION__);
@@ -10926,30 +10928,26 @@ void
 softdep_setup_inofree(struct mount *mp,
 	struct buf *bp,
 	ino_t ino,
-	struct workhead *wkhd)
+	struct workhead *wkhd,
+	bool doingrecovery)
 {
 	struct worklist *wk, *wkn;
-	struct inodedep *inodedep;
 	struct ufsmount *ump;
-	uint8_t *inosused;
-	struct cg *cgp;
-	struct fs *fs;
+#ifdef INVARIANTS
+	struct inodedep *inodedep;
+#endif
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_inofree called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
-	if (!ffs_fsfail_cleanup(ump, 0)) {
-		fs = ump->um_fs;
-		cgp = (struct cg *)bp->b_data;
-		inosused = cg_inosused(cgp);
-		if (isset(inosused, ino % fs->fs_ipg))
-			panic("softdep_setup_inofree: inode %ju not freed.",
-			    (uintmax_t)ino);
-	}
-	if (inodedep_lookup(mp, ino, 0, &inodedep))
-		panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
-		    (uintmax_t)ino, inodedep);
+	KASSERT(doingrecovery || ffs_fsfail_cleanup(ump, 0) ||
+	    isclr(cg_inosused((struct cg *)bp->b_data),
+	    ino % ump->um_fs->fs_ipg),
+	    ("softdep_setup_inofree: inode %ju not freed.", (uintmax_t)ino));
+	KASSERT(inodedep_lookup(mp, ino, 0, &inodedep) == 0,
+	    ("softdep_setup_inofree: ino %ju has existing inodedep %p",
+	    (uintmax_t)ino, inodedep));
 	if (wkhd) {
 		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
 			if (wk->wk_type != D_JADDREF)
@@ -10980,7 +10978,8 @@ softdep_setup_blkfree(
 	struct buf *bp,
 	ufs2_daddr_t blkno,
 	int frags,
-	struct workhead *wkhd)
+	struct workhead *wkhd,
+	bool doingrecovery)
 {
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
@@ -11027,18 +11026,22 @@ softdep_setup_blkfree(
 			KASSERT(jnewblk->jn_state & GOINGAWAY,
 			    ("softdep_setup_blkfree: jnewblk not canceled."));
 #ifdef INVARIANTS
-			/*
-			 * Assert that this block is free in the bitmap
-			 * before we discard the jnewblk.
-			 */
-			cgp = (struct cg *)bp->b_data;
-			blksfree = cg_blksfree(cgp);
-			bno = dtogd(fs, jnewblk->jn_blkno);
-			for (i = jnewblk->jn_oldfrags;
-			    i < jnewblk->jn_frags; i++) {
-				if (isset(blksfree, bno + i))
-					continue;
-				panic("softdep_setup_blkfree: not free");
+			if (!doingrecovery && !ffs_fsfail_cleanup(ump, 0)) {
+				/*
+				 * Assert that this block is free in the
+				 * bitmap before we discard the jnewblk.
+				 */
+				cgp = (struct cg *)bp->b_data;
+				blksfree = cg_blksfree(cgp);
+				bno = dtogd(fs, jnewblk->jn_blkno);
+				for (i = jnewblk->jn_oldfrags;
+				    i < jnewblk->jn_frags; i++) {
+					if (isset(blksfree, bno + i))
+						continue;
+					panic("softdep_setup_blkfree: block "
+					    "%ju not freed.",
+					    (uintmax_t)jnewblk->jn_blkno);
+				}
 			}
 #endif
 			/*