git: 35a301555bff - main - Increase UFS/FFS maximum link count from 32767 to 65530.

From: Kirk McKusick <mckusick_at_FreeBSD.org>
Date: Sun, 03 Dec 2023 20:40:33 UTC
The branch main has been updated by mckusick:

URL: https://cgit.FreeBSD.org/src/commit/?id=35a301555bff2ac27a727c10641b7efb3f162988

commit 35a301555bff2ac27a727c10641b7efb3f162988
Author:     Kirk McKusick <mckusick@FreeBSD.org>
AuthorDate: 2023-12-03 20:36:42 +0000
Commit:     Kirk McKusick <mckusick@FreeBSD.org>
CommitDate: 2023-12-03 20:40:29 +0000

    Increase UFS/FFS maximum link count from 32767 to 65530.
    
    The link count for a UFS/FFS inode is stored in a signed 16-bit
    integer. Thus the maximum link count has been 32767.
    
    This limit has been recently hit by the poudriere build system when
    doing a ports build as it needs one directory per port and the
    number of ports recently passed 32767.
    
    A long-term solution would be to use one of the spare 32-bit fields
    in the inode to store the link count. However, the UFS1 format does
    not have a spare and adding the spare in UFS2 would make it hard
    to make it compatible when running on older kernels that use the
    original link count field. So this patch uses the much simpler
    approach of changing the existing link count field from a signed
    16-bit value to an unsigned 16-bit value. It has the fewest lines
    of code changes. The only thing that changes is the type in the
    dinode and inode structures and the definition of UFS_LINK_MAX. It
    has the added benefit that it works with both UFS1 and UFS2.
    
    It allows easy backward compatibility. Indeed it is backward
    compatibility that is the primary reason to go with this approach.
    If a filesystem with the new organization is mounted on an older
    kernel, it still needs to work. Thus if we move the new link count
    to a new field, we still need to maintain the old link count as
    best as possible even when running on a kernel that knows about the
    larger link counts. And we would have to carry this overhead for
    the indefinite future.
    
    If we have a new link-count field, we will have to add a new
    filesystem flag to indicate that we are running with larger link
    counts. We will also need to add of one of the new-feature flags
    to say that we have larger link counts. Older kernels clear the
    new-feature flags that they do not know about, so when a filesystem
    is used on an older kernel and then moved back to a newer one, the
    newer one will know that the new link counts have not been maintained
    and that it will be necessary to run a full fsck on the filesystem
    to correct the link counts before it can be mounted.
    
    With this change, older kernels will generally work with the bigger
    counts. While it will not itself allow the link count to exceed
    32767, it will have no problem working with inodes that have a link
    count greater than 32767. Since it tests that i_nlink <= UFS_LINK_MAX,
    counts that are bigger than 32767 will appear negative, so will
    still pass the test. Of course, if they ever drop below 32767, they
    will no longer be able to exceed 32767. The one issue is if the
    link count ever exceeds 65535 then it will wrap to zero and the
    older kernel will be none the wiser. But this corner case is likely
    to be very rare since these kernels and the applications running
    on them do not expect to be able to get link counts over 32767. And
    over time, the use of new filesystems on older kernels will become
    rarer and rarer.
    
    Reported-by: Mark Millard running poudriere on the ports tree
    Reviewed-by: kib, olce.freebsd_certner.fr
    Tested-by:   Peter Holm, Mark Millard
    MFC-after:   2 weeks
    Differential Revision: https://reviews.freebsd.org/D42767
---
 sys/ufs/ffs/ffs_alloc.c   |  2 +-
 sys/ufs/ffs/ffs_softdep.c |  4 ++--
 sys/ufs/ufs/dinode.h      |  6 +++---
 sys/ufs/ufs/inode.h       | 10 ++++++++--
 sys/ufs/ufs/ufs_lookup.c  |  8 ++++----
 sys/ufs/ufs/ufs_vnops.c   | 32 ++++++++++++++++----------------
 6 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 38e6d6a41ec0..713dcf1ca97a 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -3330,7 +3330,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 			break;
 		ip = VTOI(vp);
 		ip->i_nlink += cmd.size;
-		DIP_SET(ip, i_nlink, ip->i_nlink);
+		DIP_SET_NLINK(ip, ip->i_nlink);
 		ip->i_effnlink += cmd.size;
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED);
 		error = ffs_update(vp, 1);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 2afafb9380ba..5c8e2b6cde81 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -10046,7 +10046,7 @@ handle_workitem_remove(struct dirrem *dirrem, int flags)
 		KASSERT(ip->i_nlink >= 0, ("handle_workitem_remove: file ino "
 		    "%ju negative i_nlink %d", (intmax_t)ip->i_number,
 		    ip->i_nlink));
-		DIP_SET(ip, i_nlink, ip->i_nlink);
+		DIP_SET_NLINK(ip, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
@@ -10069,7 +10069,7 @@ handle_workitem_remove(struct dirrem *dirrem, int flags)
 	ip->i_nlink -= 2;
 	KASSERT(ip->i_nlink >= 0, ("handle_workitem_remove: directory ino "
 	    "%ju negative i_nlink %d", (intmax_t)ip->i_number, ip->i_nlink));
-	DIP_SET(ip, i_nlink, ip->i_nlink);
+	DIP_SET_NLINK(ip, ip->i_nlink);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h
index 0819362b1def..673e6f2555f1 100644
--- a/sys/ufs/ufs/dinode.h
+++ b/sys/ufs/ufs/dinode.h
@@ -123,7 +123,7 @@ typedef int64_t ufs_time_t;
 
 struct ufs2_dinode {
 	uint16_t	di_mode;	/*   0: IFMT, permissions; see below. */
-	int16_t		di_nlink;	/*   2: File link count. */
+	uint16_t	di_nlink;	/*   2: File link count. */
 	uint32_t	di_uid;		/*   4: File owner. */
 	uint32_t	di_gid;		/*   8: File group. */
 	uint32_t	di_blksize;	/*  12: Inode blocksize. */
@@ -178,7 +178,7 @@ struct ufs2_dinode {
  */
 struct ufs1_dinode {
 	uint16_t	di_mode;	/*   0: IFMT, permissions; see below. */
-	int16_t		di_nlink;	/*   2: File link count. */
+	uint16_t	di_nlink;	/*   2: File link count. */
 	union {
 		uint32_t di_freelink;	/*   4: SUJ: Next unlinked inode. */
 		uint32_t di_dirdepth;	/*   4: IFDIR: depth from root dir */
@@ -208,6 +208,6 @@ struct ufs1_dinode {
 	uint64_t	di_modrev;	/* 120: i_modrev for NFSv4 */
 };
 
-#define	UFS_LINK_MAX	32767
+#define	UFS_LINK_MAX	65500	/* leave a few spare for special values */
 
 #endif /* _UFS_UFS_DINODE_H_ */
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 4f456d319ad0..85d3c4898318 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -95,7 +95,7 @@ struct inode {
 
 	ino_t	  i_number;	/* The identity of the inode. */
 	uint32_t  i_flag;	/* flags, see below */
-	int	  i_effnlink;	/* i_nlink when I/O completes */
+	int32_t	  i_effnlink;	/* i_nlink when I/O completes */
 
 	/*
 	 * Side effects; used during directory lookup.
@@ -131,7 +131,7 @@ struct inode {
 	uint32_t i_flags;	/* Status flags (chflags). */
 	uint32_t i_uid;		/* File owner. */
 	uint32_t i_gid;		/* File group. */
-	int16_t  i_nlink;	/* File link count. */
+	int32_t  i_nlink;	/* File link count. */
 	uint16_t i_mode;	/* IFMT, permissions; see below. */
 };
 /*
@@ -242,6 +242,12 @@ I_IS_UFS2(const struct inode *ip)
 	else							\
 		(ip)->i_din2->d##field = (val); 		\
 	} while (0)
+#define	DIP_SET_NLINK(ip, val) do {					\
+	KASSERT(ip->i_nlink >= 0, ("%s:%d %s(): setting negative "	\
+	    "nlink value %d for inode %jd\n", __FILE__, __LINE__,	\
+	    __FUNCTION__, (ip)->i_nlink, (ip)->i_number));		\
+	DIP_SET(ip, i_nlink, val);					\
+	} while (0)
 
 #define	IS_SNAPSHOT(ip)		((ip)->i_flags & SF_SNAPSHOT)
 #define	IS_UFS(vp)		((vp)->v_data != NULL)
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 68955488ff0e..2d6c79970c96 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1121,7 +1121,7 @@ ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
 			softdep_setup_unlink(dp, ip);
 		} else {
 			ip->i_nlink--;
-			DIP_SET(ip, i_nlink, ip->i_nlink);
+			DIP_SET_NLINK(ip, ip->i_nlink);
 			UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		}
 	}
@@ -1137,7 +1137,7 @@ ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
 				softdep_change_linkcnt(ip);
 			} else {
 				ip->i_nlink++;
-				DIP_SET(ip, i_nlink, ip->i_nlink);
+				DIP_SET_NLINK(ip, ip->i_nlink);
 				UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 			}
 		}
@@ -1241,7 +1241,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
 		softdep_setup_unlink(dp, oip);
 	} else {
 		oip->i_nlink--;
-		DIP_SET(oip, i_nlink, oip->i_nlink);
+		DIP_SET_NLINK(oip, oip->i_nlink);
 		UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 	}
 
@@ -1258,7 +1258,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
 			softdep_change_linkcnt(oip);
 		} else {
 			oip->i_nlink++;
-			DIP_SET(oip, i_nlink, oip->i_nlink);
+			DIP_SET_NLINK(oip, oip->i_nlink);
 			UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 		}
 		return (error);
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 88772131a0ab..3bfa2019739a 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1131,7 +1131,7 @@ ufs_link(
 
 	ip->i_effnlink++;
 	ip->i_nlink++;
-	DIP_SET(ip, i_nlink, ip->i_nlink);
+	DIP_SET_NLINK(ip, ip->i_nlink);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_link(VTOI(tdvp), ip);
@@ -1144,7 +1144,7 @@ ufs_link(
 	if (error) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
-		DIP_SET(ip, i_nlink, ip->i_nlink);
+		DIP_SET_NLINK(ip, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_link(VTOI(tdvp), ip);
@@ -1526,7 +1526,7 @@ relock:
 	 */
 	fip->i_effnlink++;
 	fip->i_nlink++;
-	DIP_SET(fip, i_nlink, fip->i_nlink);
+	DIP_SET_NLINK(fip, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_setup_link(tdp, fip);
@@ -1555,7 +1555,7 @@ relock:
 			if (tdp->i_nlink >= UFS_LINK_MAX) {
 				fip->i_effnlink--;
 				fip->i_nlink--;
-				DIP_SET(fip, i_nlink, fip->i_nlink);
+				DIP_SET_NLINK(fip, fip->i_nlink);
 				UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 				if (DOINGSOFTDEP(fvp))
 					softdep_revert_link(tdp, fip);
@@ -1678,11 +1678,11 @@ relock:
 			 */
 			if (!newparent) {
 				tdp->i_nlink--;
-				DIP_SET(tdp, i_nlink, tdp->i_nlink);
+				DIP_SET_NLINK(tdp, tdp->i_nlink);
 				UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			}
 			tip->i_nlink--;
-			DIP_SET(tip, i_nlink, tip->i_nlink);
+			DIP_SET_NLINK(tip, tip->i_nlink);
 			UFS_INODE_SET_FLAG(tip, IN_CHANGE);
 		}
 	}
@@ -1717,7 +1717,7 @@ relock:
 		if (tip == NULL) {
 			tdp->i_effnlink++;
 			tdp->i_nlink++;
-			DIP_SET(tdp, i_nlink, tdp->i_nlink);
+			DIP_SET_NLINK(tdp, tdp->i_nlink);
 			UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			if (DOINGSOFTDEP(tdvp))
 				softdep_setup_dotdot_link(tdp, fip);
@@ -1780,7 +1780,7 @@ unlockout:
 bad:
 	fip->i_effnlink--;
 	fip->i_nlink--;
-	DIP_SET(fip, i_nlink, fip->i_nlink);
+	DIP_SET_NLINK(fip, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_revert_link(tdp, fip);
@@ -2120,7 +2120,7 @@ ufs_mkdir(
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
-	DIP_SET(ip, i_nlink, 2);
+	DIP_SET_NLINK(ip, 2);
 	DIP_SET(ip, i_dirdepth, DIP(dp,i_dirdepth) + 1);
 
 	if (cnp->cn_flags & ISWHITEOUT) {
@@ -2135,7 +2135,7 @@ ufs_mkdir(
 	 */
 	dp->i_effnlink++;
 	dp->i_nlink++;
-	DIP_SET(dp, i_nlink, dp->i_nlink);
+	DIP_SET_NLINK(dp, dp->i_nlink);
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 	if (DOINGSOFTDEP(dvp))
 		softdep_setup_mkdir(dp, ip);
@@ -2226,7 +2226,7 @@ bad:
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
-		DIP_SET(dp, i_nlink, dp->i_nlink);
+		DIP_SET_NLINK(dp, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
@@ -2234,7 +2234,7 @@ bad:
 		 */
 		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
-		DIP_SET(ip, i_nlink, 0);
+		DIP_SET_NLINK(ip, 0);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_mkdir(dp, ip);
@@ -2331,11 +2331,11 @@ ufs_rmdir(
 	 */
 	if (!DOINGSOFTDEP(vp)) {
 		dp->i_nlink--;
-		DIP_SET(dp, i_nlink, dp->i_nlink);
+		DIP_SET_NLINK(dp, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		error = UFS_UPDATE(dvp, 0);
 		ip->i_nlink--;
-		DIP_SET(ip, i_nlink, ip->i_nlink);
+		DIP_SET_NLINK(ip, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	}
 	cache_vop_rmdir(dvp, vp);
@@ -2872,7 +2872,7 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
-	DIP_SET(ip, i_nlink, 1);
+	DIP_SET_NLINK(ip, 1);
 	if (DOINGSOFTDEP(tvp))
 		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
@@ -2928,7 +2928,7 @@ bad:
 	 */
 	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
-	DIP_SET(ip, i_nlink, 0);
+	DIP_SET_NLINK(ip, 0);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(tvp))
 		softdep_revert_create(VTOI(dvp), ip);