Nullfs leaks i-nodes
Konstantin Belousov
kostikbel at gmail.com
Wed May 8 09:13:28 UTC 2013
On Tue, May 07, 2013 at 08:30:06AM +0200, G??ran L??wkrantz wrote:
> I created a PR, kern/178238, on this but would like to know if anyone has
> any ideas or patches?
>
> Have updated the system where I see this to FreeBSD 9.1-STABLE #0 r250229
> and still have the problem.
The patch below should fix the issue for you, at least it did so in my
limited testing.
What is does:
1. When inactivating a nullfs vnode, check if the lower vnode is
unlinked, and reclaim upper vnode if so. [This fixes your case].
2. Besides a callback to the upper filesystems for the lower vnode
reclaimation, it also calls the upper fs for lower vnode unlink.
This allows nullfs to purge cached vnodes for the unlinked lower.
[This fixes an opposite case, when the vnode is removed from the
lower mount, but upper aliases prevent the vnode from being
recycled].
3. Fix a wart which existed from the introduction of the nullfs caching,
do not unlock lower vnode in the nullfs_reclaim_lowervp(). It should
be completely innocent, but now it is also formally safe.
4. Fix vnode reference leak in nullfs_reclaim_lowervp().
Please note that the patch is basically not tested, I only verified your
scenario and a mirror of it as described in item 2.
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 4f37020..a624be6 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -53,8 +53,11 @@ struct null_node {
LIST_ENTRY(null_node) null_hash; /* Hash list */
struct vnode *null_lowervp; /* VREFed once */
struct vnode *null_vnode; /* Back pointer */
+ u_int null_flags;
};
+#define NULLV_NOUNLOCK 0x0001
+
#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
#define NULLTOV(xp) ((xp)->null_vnode)
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 02932bd..544c358 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -65,7 +65,6 @@ static vfs_statfs_t nullfs_statfs;
static vfs_unmount_t nullfs_unmount;
static vfs_vget_t nullfs_vget;
static vfs_extattrctl_t nullfs_extattrctl;
-static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;
/*
* Mount null layer
@@ -391,8 +390,22 @@ nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
vp = null_hashget(mp, lowervp);
if (vp == NULL)
return;
+ VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
vgone(vp);
- vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
+ vput(vp);
+}
+
+static void
+nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
+{
+ struct vnode *vp;
+
+ vp = null_hashget(mp, lowervp);
+ if (vp == NULL || vp->v_usecount > 1)
+ return;
+ VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
+ vgone(vp);
+ vput(vp);
}
static struct vfsops null_vfsops = {
@@ -408,6 +421,7 @@ static struct vfsops null_vfsops = {
.vfs_unmount = nullfs_unmount,
.vfs_vget = nullfs_vget,
.vfs_reclaim_lowervp = nullfs_reclaim_lowervp,
+ .vfs_unlink_lowervp = nullfs_unlink_lowervp,
};
VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index f59865f..3c41124 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -692,18 +692,21 @@ null_unlock(struct vop_unlock_args *ap)
static int
null_inactive(struct vop_inactive_args *ap __unused)
{
- struct vnode *vp;
+ struct vnode *vp, *lvp;
struct mount *mp;
struct null_mount *xmp;
vp = ap->a_vp;
+ lvp = NULLVPTOLOWERVP(vp);
mp = vp->v_mount;
xmp = MOUNTTONULLMOUNT(mp);
- if ((xmp->nullm_flags & NULLM_CACHE) == 0) {
+ if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
+ (lvp->v_vflag & VV_NOSYNC) != 0) {
/*
* If this is the last reference and caching of the
- * nullfs vnodes is not enabled, then free up the
- * vnode so as not to tie up the lower vnodes.
+ * nullfs vnodes is not enabled, or the lower vnode is
+ * deleted, then free up the vnode so as not to tie up
+ * the lower vnodes.
*/
vp->v_object = NULL;
vrecycle(vp);
@@ -748,7 +751,10 @@ null_reclaim(struct vop_reclaim_args *ap)
*/
if (vp->v_writecount > 0)
VOP_ADD_WRITECOUNT(lowervp, -1);
- vput(lowervp);
+ if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
+ vunref(lowervp);
+ else
+ vput(lowervp);
free(xp, M_NULLFSNODE);
return (0);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index de118f7..988a114 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2700,19 +2700,20 @@ vgone(struct vnode *vp)
}
static void
-vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
+notify_lowervp_vfs_dummy(struct mount *mp __unused,
struct vnode *lowervp __unused)
{
}
/*
- * Notify upper mounts about reclaimed vnode.
+ * Notify upper mounts about reclaimed or unlinked vnode.
*/
-static void
-vgonel_reclaim_lowervp(struct vnode *vp)
+void
+vfs_notify_upper(struct vnode *vp, int event)
{
static struct vfsops vgonel_vfsops = {
- .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
+ .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
+ .vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
};
struct mount *mp, *ump, *mmp;
@@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp)
}
TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
MNT_IUNLOCK(mp);
- VFS_RECLAIM_LOWERVP(ump, vp);
+ switch (event) {
+ case VFS_NOTIFY_UPPER_RECLAIM:
+ VFS_RECLAIM_LOWERVP(ump, vp);
+ break;
+ case VFS_NOTIFY_UPPER_UNLINK:
+ VFS_UNLINK_LOWERVP(ump, vp);
+ break;
+ default:
+ KASSERT(0, ("invalid event %d", event));
+ break;
+ }
MNT_ILOCK(mp);
ump = TAILQ_NEXT(mmp, mnt_upper_link);
TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
@@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp)
active = vp->v_usecount;
oweinact = (vp->v_iflag & VI_OWEINACT);
VI_UNLOCK(vp);
- vgonel_reclaim_lowervp(vp);
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
/*
* Clean out any buffers associated with the vnode.
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 29cb7bd..a004ea0 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1846,6 +1846,7 @@ restart:
if (error)
goto out;
#endif
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
#ifdef MAC
out:
@@ -3825,6 +3826,7 @@ restart:
return (error);
goto restart;
}
+ vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
vn_finished_write(mp);
out:
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index a9c86f6..a953dae 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -608,7 +608,7 @@ typedef int vfs_mount_t(struct mount *mp);
typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
struct sysctl_req *req);
typedef void vfs_susp_clean_t(struct mount *mp);
-typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp);
+typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
struct vfsops {
vfs_mount_t *vfs_mount;
@@ -626,7 +626,8 @@ struct vfsops {
vfs_extattrctl_t *vfs_extattrctl;
vfs_sysctl_t *vfs_sysctl;
vfs_susp_clean_t *vfs_susp_clean;
- vfs_reclaim_lowervp_t *vfs_reclaim_lowervp;
+ vfs_notify_lowervp_t *vfs_reclaim_lowervp;
+ vfs_notify_lowervp_t *vfs_unlink_lowervp;
};
vfs_statfs_t __vfs_statfs;
@@ -747,6 +748,14 @@ vfs_statfs_t __vfs_statfs;
} \
} while (0)
+#define VFS_UNLINK_LOWERVP(MP, VP) do { \
+ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \
+ VFS_PROLOGUE(MP); \
+ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \
+ VFS_EPILOGUE(MP); \
+ } \
+} while (0)
+
#define VFS_KNOTE_LOCKED(vp, hint) do \
{ \
if (((vp)->v_vflag & VV_NOKNOTE) == 0) \
@@ -759,6 +768,9 @@ vfs_statfs_t __vfs_statfs;
VN_KNOTE((vp), (hint), 0); \
} while (0)
+#define VFS_NOTIFY_UPPER_RECLAIM 1
+#define VFS_NOTIFY_UPPER_UNLINK 2
+
#include <sys/module.h>
/*
@@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *);
void vfs_mount_error(struct mount *, const char *, ...);
void vfs_mountroot(void); /* mount our root filesystem */
void vfs_mountedfrom(struct mount *, const char *from);
+void vfs_notify_upper(struct vnode *, int);
void vfs_oexport_conv(const struct oexport_args *oexp,
struct export_args *exp);
void vfs_ref(struct mount *);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 834 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20130508/90510c6a/attachment.sig>
More information about the freebsd-stable
mailing list