A way to quick fix of "leaking lots of unreferenced inodes"
Ireneusz Pluta
ipluta at wp.pl
Sun Dec 7 10:17:18 UTC 2014
Hello,
I need to fix a server running the:
9.1-RELEASE FreeBSD 9.1-RELEASE #0 r243825: Tue Dec 4 09:23:10 UTC 2012
which suffers a lot from the bug fixed with this commit:
http://lists.freebsd.org/pipermail/svn-src-releng/2013-September/000114.html
The main and practically only purpose of this machine is running a PostgreSQL server with serveral
multi-hundred GB databases.
And yes, I currently use the "procedure" do restart postgres, with umount -f
/mountpoint/of/pgsql/data, every few weeks, to not let it accumulate too much leak over longer uptimes.
To not to take a risk of eventual troubles with freebsd-update, and thus having the machine down for
maybe more than expected half to one hour, I am thinking of making the fix quicker just by applying the
- VOP_UNLOCK(nvp, 0);
+ vput(nvp);
patch to the /usr/src/sys/ufs/ufs/ufs_vnops.c and rebuilding the kernel. But I want to make sure if
just applying only this patch to the 9.1-RELEASE, and ignoring all other changes within src/ufs/ufs
made before this commit, is enough and safe for the filesystem.
As I browse commit log of my local git clone of freebsd repo between release/9.1.0 and this fix, I
can see there was one more earlier change to the ufs_vnops.c itself:
$ git log -U0 --oneline release/9.1.0..releng/9.2 sys/ufs/ufs/ufs_vnops.c
0ec41e1 MFS of 255104: MFC of 253998:
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 487477c..b70166d 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1274 +1274 @@ relock:
- VOP_UNLOCK(nvp, 0);
+ vput(nvp);
3d32639 MFC r248422: Remove negative name cache entry pointing to the target name, which could be
instantiated while tdvp was unlocked.
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 03c8bb0..487477c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1564,0 +1565 @@ relock:
+ cache_purge_negative(tdvp);
and even some more to the sys/ufs/ufs path, as listed at the end.
So again this is my question: can I just put to my /usr/src/sys/ufs/ufs/ufs_vnops.c what has changed
in http://lists.freebsd.org/pipermail/svn-src-releng/2013-September/000114.html, recompile, and not
worry about all the other stuff?
Thanks
Irek.
$ git log -U0 --oneline release/9.1.0..releng/9.2 sys/ufs/ufs | tee
0ec41e1 MFS of 255104: MFC of 253998:
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 487477c..b70166d 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1274 +1274 @@ relock:
- VOP_UNLOCK(nvp, 0);
+ vput(nvp);
a89175a Merge the second part of the unmapped I/O changes. This enables the infrastructure in the
block layer and UFS filesystem as well as a few drivers. The list of MFC revisions is long, so I
won't quote changelogs.
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index c590748..31a2ba8 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -123,0 +124 @@ void softdep_revert_rmdir(struct inode *, struct inode *);
+#define BA_UNMAPPED 0x00040000 /* Do not mmap resulted buffer. */
63c193a MFC of 248561:
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 35fe8fd..8d11e24 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1388 +1388,2 @@ static int
-ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
+ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino,
+ struct vnode **dd_vp)
@@ -1390,0 +1392 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
+ struct vnode *ddvp;
@@ -1392,0 +1395 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
+ ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino");
@@ -1394,0 +1398,13 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
+ /*
+ * First check to see if we have it in the name cache.
+ */
+ if ((ddvp = vn_dir_dd_ino(vp)) != NULL) {
+ KASSERT(ddvp->v_mount == vp->v_mount,
+ ("ufs_dir_dd_ino: Unexpected mount point crossing"));
+ *dd_ino = VTOI(ddvp)->i_number;
+ *dd_vp = ddvp;
+ return (0);
+ }
+ /*
+ * Have to read the directory.
+ */
@@ -1411,0 +1428 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
+ *dd_vp = NULL;
@@ -1436 +1453 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct u
- error = ufs_dir_dd_ino(vp, cred, &dd_ino);
+ error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1);
@@ -1447,15 +1464,7 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct u
- error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
- if (error != 0) {
- *wait_ino = dd_ino;
- break;
- }
- /* Recheck that ".." still points to vp1 after relock of vp */
- error = ufs_dir_dd_ino(vp, cred, &dd_ino);
- if (error != 0) {
- vput(vp1);
- break;
- }
- /* Redo the check of ".." if directory was reparented */
- if (dd_ino != VTOI(vp1)->i_number) {
- vput(vp1);
- continue;
+ if (vp1 == NULL) {
+ error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT,
+ &vp1);
+ if (error != 0) {
+ *wait_ino = dd_ino;
+ break;
+ }
@@ -1462,0 +1472,2 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct u
+ KASSERT(dd_ino == VTOI(vp1)->i_number,
+ ("directory %d reparented\n", VTOI(vp1)->i_number));
@@ -1469,0 +1481,2 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct u
+ if (vp1 != NULL)
+ vput(vp1);
3d32639 MFC r248422: Remove negative name cache entry pointing to the target name, which could be
instantiated while tdvp was unlocked.
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 03c8bb0..487477c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1564,0 +1565 @@ relock:
+ cache_purge_negative(tdvp);
b89ace2 MFC r247388: Work around the hold of references to the struct dquot by the freeblk workitems
for some time at unmount.
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index c3789c3..88437c9 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -83 +83 @@ static int dqsync(struct vnode *, struct dquot *);
-static void dqflush(struct vnode *);
+static int dqflush(struct vnode *);
@@ -683,2 +683,6 @@ again:
- dqflush(qvp);
- /* Clear um_quotas before closing the quota vnode to prevent
+ error = dqflush(qvp);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Clear um_quotas before closing the quota vnode to prevent
@@ -1618 +1622 @@ out:
-static void
+static int
@@ -1622,0 +1627 @@ dqflush(struct vnode *vp)
+ int error;
@@ -1628,0 +1634 @@ dqflush(struct vnode *vp)
+ error = 0;
@@ -1636,3 +1642,5 @@ dqflush(struct vnode *vp)
- panic("dqflush: stray dquot");
- LIST_REMOVE(dq, dq_hash);
- dq->dq_ump = (struct ufsmount *)0;
+ error = EBUSY;
+ else {
+ LIST_REMOVE(dq, dq_hash);
+ dq->dq_ump = NULL;
+ }
@@ -1641,0 +1650 @@ dqflush(struct vnode *vp)
+ return (error);
3436e90 MFC r246562:
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 51f0197..25142dd 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -154,4 +153,0 @@ struct inode {
-#define MAXSYMLINKLEN(ip) \
- ((ip)->i_ump->um_fstype == UFS1) ? \
- ((NDADDR + NIADDR) * sizeof(ufs1_daddr_t)) : \
- ((NDADDR + NIADDR) * sizeof(ufs2_daddr_t))
1572df8 MFC r239359:
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 2b02000..51f0197 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -170 +169,0 @@ struct indir {
- int in_exists; /* Flag if the block exists. */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index e0fb307..22887c8 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -215 +214,0 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
- ap->in_exists = 1;
@@ -360 +358,0 @@ ufs_getlbns(vp, bn, ap, nump)
- ap->in_exists = 0;
@@ -373 +370,0 @@ ufs_getlbns(vp, bn, ap, nump)
- ap->in_exists = 0;
a53e5a7 MFC r246299;
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 56ca058..35fe8fd 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1435 +1434,0 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct u
- error = 0;
75f830b MFC r243245:
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index 6447dce..b55d958 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -100,0 +101 @@ struct ufsmount {
+ int um_writesuspended; /* suspension in progress */
bb61831 MFC r242476: The r241025 fixed the case when a binary, executed from nullfs mount, was still
possible to open for write from the lower filesystem. There is a symmetric situation where the
binary could already has file descriptors opened for write, but it can be executed from the nullfs
overlay.
diff --git a/sys/ufs/ufs/ufs_extattr.c b/sys/ufs/ufs/ufs_extattr.c
index 777f385..51bef86 100644
--- a/sys/ufs/ufs/ufs_extattr.c
+++ b/sys/ufs/ufs/ufs_extattr.c
@@ -337 +337 @@ ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp,
- vp->v_writecount++;
+ VOP_ADD_WRITECOUNT(vp, 1);
521315f MFC r244239: Fix a typo, resulting in the NULL pointer dereference.
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index d353167..c3789c3 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1055 +1055 @@ again:
- MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+ MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp);
More information about the freebsd-fs
mailing list