git: 345bfec10905 - main - Provide cache coherency between getnextinode() and ginode()

From: Kirk McKusick <mckusick_at_FreeBSD.org>
Date: Wed, 24 Aug 2022 06:50:07 UTC
The branch main has been updated by mckusick:

URL: https://cgit.FreeBSD.org/src/commit/?id=345bfec1090571ea219263c99eed29de27e143c8

commit 345bfec1090571ea219263c99eed29de27e143c8
Author:     Kirk McKusick <mckusick@FreeBSD.org>
AuthorDate: 2022-08-24 06:48:40 +0000
Commit:     Kirk McKusick <mckusick@FreeBSD.org>
CommitDate: 2022-08-24 06:48:40 +0000

    Provide cache coherency between getnextinode() and ginode()
    
    The fsck_ffs(8) utility has two subsystems for reading and writing
    inodes. The getnextinode() interface is used in Pass 1 (and Pass
    1b if needed) to sequentially walk through all the inodes in the
    filesystem. The ginode() interface is used to read and write
    individual inodes. Pass 1 uses a mix of both interfaces. This
    change ensures that ginode() returns a pointer to the inode in the
    cache maintained by getnextinode() when that interface holds the
    requested inode so that all modifications to the inode are made in
    a single place and are all written to the disk together.
    
    Reported by:  Peter Holm
    Tested by:    Peter Holm
    Sponsored by: The FreeBSD Foundation
---
 sbin/fsck_ffs/inode.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
index 8a93ad092a4a..ae7124784415 100644
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -415,21 +415,35 @@ chkrange(ufs2_daddr_t blk, int cnt)
 
 /*
  * General purpose interface for reading inodes.
+ *
+ * firstinum and lastinum track contents of getnextino() cache (below).
  */
+static ino_t firstinum, lastinum;
+static struct bufarea inobuf;
+
 void
 ginode(ino_t inumber, struct inode *ip)
 {
 	ufs2_daddr_t iblk;
+	ino_t numinodes;
 
 	if (inumber < UFS_ROOTINO || inumber > maxino)
 		errx(EEXIT, "bad inode number %ju to ginode",
 		    (uintmax_t)inumber);
 	ip->i_number = inumber;
-	if (icachebp != NULL &&
+	if (inumber >= firstinum && inumber < lastinum) {
+		/* contents in getnextino() cache */
+		ip->i_bp = &inobuf;
+		inobuf.b_refcnt++;
+		inobuf.b_index = firstinum;
+		numinodes = lastinum - firstinum;
+	} else if (icachebp != NULL &&
 	    inumber >= icachebp->b_index &&
 	    inumber < icachebp->b_index + INOPB(&sblock)) {
 		/* take an additional reference for the returned inode */
 		icachebp->b_refcnt++;
+		ip->i_bp = icachebp;
+		numinodes = INOPB(&sblock);
 	} else {
 		iblk = ino_to_fsba(&sblock, inumber);
 		/* release our cache-hold reference on old icachebp */
@@ -445,15 +459,16 @@ ginode(ino_t inumber, struct inode *ip)
 		/* take a cache-hold reference on new icachebp */
 		icachebp->b_refcnt++;
 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
+		ip->i_bp = icachebp;
+		numinodes = INOPB(&sblock);
 	}
-	ip->i_bp = icachebp;
 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
 		ip->i_dp = (union dinode *)
-		    &icachebp->b_un.b_dinode1[inumber % INOPB(&sblock)];
+		    &ip->i_bp->b_un.b_dinode1[inumber % numinodes];
 		return;
 	}
 	ip->i_dp = (union dinode *)
-	    &icachebp->b_un.b_dinode2[inumber % INOPB(&sblock)];
+	    &ip->i_bp->b_un.b_dinode2[inumber % numinodes];
 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
 		pwarn("INODE CHECK-HASH FAILED");
 		prtinode(ip);
@@ -487,9 +502,8 @@ irelse(struct inode *ip)
  * Special purpose version of ginode used to optimize first pass
  * over all the inodes in numerical order.
  */
-static ino_t nextino, lastinum, lastvalidinum;
+static ino_t nextinum, lastvalidinum;
 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
-static struct bufarea inobuf;
 
 union dinode *
 getnextinode(ino_t inumber, int rebuildcg)
@@ -502,11 +516,12 @@ getnextinode(ino_t inumber, int rebuildcg)
 	struct inode ip;
 	static caddr_t nextinop;
 
-	if (inumber != nextino++ || inumber > lastvalidinum)
+	if (inumber != nextinum++ || inumber > lastvalidinum)
 		errx(EEXIT, "bad inode number %ju to nextinode",
 		    (uintmax_t)inumber);
 	if (inumber >= lastinum) {
 		readcount++;
+		firstinum = lastinum;
 		blk = ino_to_fsba(&sblock, lastinum);
 		if (readcount % readpercg == 0) {
 			size = partialsize;
@@ -520,6 +535,9 @@ getnextinode(ino_t inumber, int rebuildcg)
 		 * If getblk encounters an error, it will already have zeroed
 		 * out the buffer, so we do not need to do so here.
 		 */
+		if (inobuf.b_refcnt != 0)
+			pfatal("Non-zero getnextinode() ref count %d\n",
+			    inobuf.b_refcnt);
 		flush(fswritefd, &inobuf);
 		getblk(&inobuf, blk, size);
 		nextinop = inobuf.b_un.b_buf;
@@ -604,7 +622,7 @@ setinodebuf(int cg, ino_t inosused)
 
 	inum = cg * sblock.fs_ipg;
 	lastvalidinum = inum + inosused - 1;
-	nextino = inum;
+	nextinum = inum;
 	lastinum = inum;
 	readcount = 0;
 	/* Flush old contents in case they have been updated */
@@ -674,6 +692,7 @@ freeinodebuf(void)
 	if (inobuf.b_un.b_buf != NULL)
 		free((char *)inobuf.b_un.b_buf);
 	inobuf.b_un.b_buf = NULL;
+	firstinum = lastinum = 0;
 }
 
 /*