Nullfs leaks i-nodes

Goran Lowkrantz goran.lowkrantz at ismobile.com
Thu May 9 12:11:55 UTC 2013


--On Wednesday, 08 May, 2013 12:13 PM +0300 Konstantin Belousov 
<kostikbel at gmail.com> wrote:

> On Tue, May 07, 2013 at 08:30:06AM +0200, G??ran L??wkrantz wrote:
>> I created a PR, kern/178238, on this but would like to know if anyone
>> has  any ideas or patches?
>>
>> Have updated the system where I see this to FreeBSD 9.1-STABLE #0
>> r250229  and still have the problem.
>
> The patch below should fix the issue for you, at least it did so in my
> limited testing.
>
> What is does:
> 1. When inactivating a nullfs vnode, check if the lower vnode is
>    unlinked, and reclaim upper vnode if so. [This fixes your case].
>
> 2. Besides a callback to the upper filesystems for the lower vnode
>    reclaimation, it also calls the upper fs for lower vnode unlink.
>    This allows nullfs to purge cached vnodes for the unlinked lower.
>    [This fixes an opposite case, when the vnode is removed from the
>    lower mount, but upper aliases prevent the vnode from being
>    recycled].
>
> 3. Fix a wart which existed from the introduction of the nullfs caching,
>    do not unlock lower vnode in the nullfs_reclaim_lowervp().  It should
>    be completely innocent, but now it is also formally safe.
>
> 4. Fix vnode reference leak in nullfs_reclaim_lowervp().
>
> Please note that the patch is basically not tested, I only verified your
> scenario and a mirror of it as described in item 2.
>
> diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
> index 4f37020..a624be6 100644
> --- a/sys/fs/nullfs/null.h
> +++ b/sys/fs/nullfs/null.h
> @@ -53,8 +53,11 @@ struct null_node {
>  	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
>  	struct vnode	        *null_lowervp;	/* VREFed once */
>  	struct vnode		*null_vnode;	/* Back pointer */
> +	u_int			null_flags;
>  };
>
> +#define	NULLV_NOUNLOCK	0x0001
> +
>  #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
>  #define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
>  #define	NULLTOV(xp) ((xp)->null_vnode)
> diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
> index 02932bd..544c358 100644
> --- a/sys/fs/nullfs/null_vfsops.c
> +++ b/sys/fs/nullfs/null_vfsops.c
> @@ -65,7 +65,6 @@ static vfs_statfs_t	nullfs_statfs;
>  static vfs_unmount_t	nullfs_unmount;
>  static vfs_vget_t	nullfs_vget;
>  static vfs_extattrctl_t	nullfs_extattrctl;
> -static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp;
>
>  /*
>   * Mount null layer
> @@ -391,8 +390,22 @@ nullfs_reclaim_lowervp(struct mount *mp, struct
> vnode *lowervp)  	vp = null_hashget(mp, lowervp);
>  	if (vp == NULL)
>  		return;
> +	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
>  	vgone(vp);
> -	vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
> +	vput(vp);
> +}
> +
> +static void
> +nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
> +{
> +	struct vnode *vp;
> +
> +	vp = null_hashget(mp, lowervp);
> +	if (vp == NULL || vp->v_usecount > 1)
> +		return;
> +	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
> +	vgone(vp);
> +	vput(vp);
>  }
>
>  static struct vfsops null_vfsops = {
> @@ -408,6 +421,7 @@ static struct vfsops null_vfsops = {
>  	.vfs_unmount =		nullfs_unmount,
>  	.vfs_vget =		nullfs_vget,
>  	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
> +	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
>  };
>
>  VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
> diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
> index f59865f..3c41124 100644
> --- a/sys/fs/nullfs/null_vnops.c
> +++ b/sys/fs/nullfs/null_vnops.c
> @@ -692,18 +692,21 @@ null_unlock(struct vop_unlock_args *ap)
>  static int
>  null_inactive(struct vop_inactive_args *ap __unused)
>  {
> -	struct vnode *vp;
> +	struct vnode *vp, *lvp;
>  	struct mount *mp;
>  	struct null_mount *xmp;
>
>  	vp = ap->a_vp;
> +	lvp = NULLVPTOLOWERVP(vp);
>  	mp = vp->v_mount;
>  	xmp = MOUNTTONULLMOUNT(mp);
> -	if ((xmp->nullm_flags & NULLM_CACHE) == 0) {
> +	if ((xmp->nullm_flags & NULLM_CACHE) == 0 ||
> +	    (lvp->v_vflag & VV_NOSYNC) != 0) {
>  		/*
>  		 * If this is the last reference and caching of the
> -		 * nullfs vnodes is not enabled, then free up the
> -		 * vnode so as not to tie up the lower vnodes.
> +		 * nullfs vnodes is not enabled, or the lower vnode is
> +		 * deleted, then free up the vnode so as not to tie up
> +		 * the lower vnodes.
>  		 */
>  		vp->v_object = NULL;
>  		vrecycle(vp);
> @@ -748,7 +751,10 @@ null_reclaim(struct vop_reclaim_args *ap)
>  	 */
>  	if (vp->v_writecount > 0)
>  		VOP_ADD_WRITECOUNT(lowervp, -1);
> -	vput(lowervp);
> +	if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
> +		vunref(lowervp);
> +	else
> +		vput(lowervp);
>  	free(xp, M_NULLFSNODE);
>
>  	return (0);
> diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
> index de118f7..988a114 100644
> --- a/sys/kern/vfs_subr.c
> +++ b/sys/kern/vfs_subr.c
> @@ -2700,19 +2700,20 @@ vgone(struct vnode *vp)
>  }
>
>  static void
> -vgonel_reclaim_lowervp_vfs(struct mount *mp __unused,
> +notify_lowervp_vfs_dummy(struct mount *mp __unused,
>      struct vnode *lowervp __unused)
>  {
>  }
>
>  /*
> - * Notify upper mounts about reclaimed vnode.
> + * Notify upper mounts about reclaimed or unlinked vnode.
>   */
> -static void
> -vgonel_reclaim_lowervp(struct vnode *vp)
> +void
> +vfs_notify_upper(struct vnode *vp, int event)
>  {
>  	static struct vfsops vgonel_vfsops = {
> -		.vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs
> +		.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
> +		.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
>  	};
>  	struct mount *mp, *ump, *mmp;
>
> @@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp)
>  		}
>  		TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
>  		MNT_IUNLOCK(mp);
> -		VFS_RECLAIM_LOWERVP(ump, vp);
> +		switch (event) {
> +		case VFS_NOTIFY_UPPER_RECLAIM:
> +			VFS_RECLAIM_LOWERVP(ump, vp);
> +			break;
> +		case VFS_NOTIFY_UPPER_UNLINK:
> +			VFS_UNLINK_LOWERVP(ump, vp);
> +			break;
> +		default:
> +			KASSERT(0, ("invalid event %d", event));
> +			break;
> +		}
>  		MNT_ILOCK(mp);
>  		ump = TAILQ_NEXT(mmp, mnt_upper_link);
>  		TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
> @@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp)
>  	active = vp->v_usecount;
>  	oweinact = (vp->v_iflag & VI_OWEINACT);
>  	VI_UNLOCK(vp);
> -	vgonel_reclaim_lowervp(vp);
> +	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
>
>  	/*
>  	 * Clean out any buffers associated with the vnode.
> diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
> index 29cb7bd..a004ea0 100644
> --- a/sys/kern/vfs_syscalls.c
> +++ b/sys/kern/vfs_syscalls.c
> @@ -1846,6 +1846,7 @@ restart:
>  		if (error)
>  			goto out;
>  #endif
> +		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
>  		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
>  #ifdef MAC
>  out:
> @@ -3825,6 +3826,7 @@ restart:
>  			return (error);
>  		goto restart;
>  	}
> +	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
>  	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
>  	vn_finished_write(mp);
>  out:
> diff --git a/sys/sys/mount.h b/sys/sys/mount.h
> index a9c86f6..a953dae 100644
> --- a/sys/sys/mount.h
> +++ b/sys/sys/mount.h
> @@ -608,7 +608,7 @@ typedef	int vfs_mount_t(struct mount *mp);
>  typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
>  		    struct sysctl_req *req);
>  typedef void vfs_susp_clean_t(struct mount *mp);
> -typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode
> *lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct
> vnode *lowervp);
>  struct vfsops {
>  	vfs_mount_t		*vfs_mount;
> @@ -626,7 +626,8 @@ struct vfsops {
>  	vfs_extattrctl_t	*vfs_extattrctl;
>  	vfs_sysctl_t		*vfs_sysctl;
>  	vfs_susp_clean_t	*vfs_susp_clean;
> -	vfs_reclaim_lowervp_t	*vfs_reclaim_lowervp;
> +	vfs_notify_lowervp_t	*vfs_reclaim_lowervp;
> +	vfs_notify_lowervp_t	*vfs_unlink_lowervp;
>  };
>
>  vfs_statfs_t	__vfs_statfs;
> @@ -747,6 +748,14 @@ vfs_statfs_t	__vfs_statfs;
>  	}								\
>  } while (0)
>
> +#define	VFS_UNLINK_LOWERVP(MP, VP) do {					\
> +	if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) {		\
> +		VFS_PROLOGUE(MP);					\
> +		(*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP));	\
> +		VFS_EPILOGUE(MP);					\
> +	}								\
> +} while (0)
> +
>  #define VFS_KNOTE_LOCKED(vp, hint) do					\
>  {									\
>  	if (((vp)->v_vflag & VV_NOKNOTE) == 0)				\
> @@ -759,6 +768,9 @@ vfs_statfs_t	__vfs_statfs;
>  		VN_KNOTE((vp), (hint), 0);				\
>  } while (0)
>
> +#define	VFS_NOTIFY_UPPER_RECLAIM	1
> +#define	VFS_NOTIFY_UPPER_UNLINK		2
> +
>  #include <sys/module.h>
>
>  /*
> @@ -840,6 +852,7 @@ int	vfs_modevent(module_t, int, void *);
>  void	vfs_mount_error(struct mount *, const char *, ...);
>  void	vfs_mountroot(void);			/* mount our root filesystem */
>  void	vfs_mountedfrom(struct mount *, const char *from);
> +void	vfs_notify_upper(struct vnode *, int);
>  void	vfs_oexport_conv(const struct oexport_args *oexp,
>  	    struct export_args *exp);
>  void	vfs_ref(struct mount *);
I assume this is CURRENT? Tried on STABLE but got this:
cc1: warnings being treated as errors
/usr/src/sys/kern/vfs_subr.c: In function 'vfs_notify_upper':
/usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of 
function 'VFS_PROLOGUE'
/usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 
'VFS_PROLOGUE' [-Wnested-externs]
/usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of 
function 'VFS_EPILOGUE'
/usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 
'VFS_EPILOGUE' [-Wnested-externs]
*** [vfs_subr.o] Error code 1

/glz





More information about the freebsd-stable mailing list