svn commit: r268852 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys

Robert Watson rwatson at FreeBSD.org
Sat Jul 19 10:42:38 UTC 2014


On Fri, 18 Jul 2014, Xin LI wrote:

> Log:
>  5008 lock contention (rrw_exit) while running a read only load
>  Reviewed by: Matthew Ahrens <matthew.ahrens at delphix.com>
>  Reviewed by: George Wilson <george.wilson at delphix.com>
>  Reviewed by: Alex Reece <alex.reece at delphix.com>
>  Reviewed by: Christopher Siden <christopher.siden at delphix.com>
>  Reviewed by: Richard Yao <ryao at gentoo.org>
>  Reviewed by: Saso Kiselkov <skiselkov.ml at gmail.com>
>  Approved by: Garrett D'Amore <garrett at damore.org>
>
>  illumos/illumos-gate at c9030f6c93613fe30ee0c16f92b96da7816ac052

Is there an opportunity to use our own read-mostly lock implementation here? 
It should be substantially more scalable, has integration with WITNESS, proper 
priority propagation, etc.

Robert


>
> Modified:
>  vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
>  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
>  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
>  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
>  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
>  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
>  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -286,3 +286,91 @@ rrw_tsd_destroy(void *arg)
> 		    (void *)curthread, (void *)rn->rn_rrl);
> 	}
> }
> +
> +/*
> + * A reader-mostly lock implementation, tuning above reader-writer locks
> + * for hightly parallel read acquisitions, while pessimizing writes.
> + *
> + * The idea is to split single busy lock into array of locks, so that
> + * each reader can lock only one of them for read, depending on result
> + * of simple hash function.  That proportionally reduces lock congestion.
> + * Writer same time has to sequentially aquire write on all the locks.
> + * That makes write aquisition proportionally slower, but in places where
> + * it is used (filesystem unmount) performance is not critical.
> + *
> + * All the functions below are direct wrappers around functions above.
> + */
> +void
> +rrm_init(rrmlock_t *rrl, boolean_t track_all)
> +{
> +	int i;
> +
> +	for (i = 0; i < RRM_NUM_LOCKS; i++)
> +		rrw_init(&rrl->locks[i], track_all);
> +}
> +
> +void
> +rrm_destroy(rrmlock_t *rrl)
> +{
> +	int i;
> +
> +	for (i = 0; i < RRM_NUM_LOCKS; i++)
> +		rrw_destroy(&rrl->locks[i]);
> +}
> +
> +void
> +rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag)
> +{
> +	if (rw == RW_READER)
> +		rrm_enter_read(rrl, tag);
> +	else
> +		rrm_enter_write(rrl);
> +}
> +
> +/*
> + * This maps the current thread to a specific lock.  Note that the lock
> + * must be released by the same thread that acquired it.  We do this
> + * mapping by taking the thread pointer mod a prime number.  We examine
> + * only the low 32 bits of the thread pointer, because 32-bit division
> + * is faster than 64-bit division, and the high 32 bits have little
> + * entropy anyway.
> + */
> +#define	RRM_TD_LOCK()	(((uint32_t)(uintptr_t)(curthread)) % RRM_NUM_LOCKS)
> +
> +void
> +rrm_enter_read(rrmlock_t *rrl, void *tag)
> +{
> +	rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag);
> +}
> +
> +void
> +rrm_enter_write(rrmlock_t *rrl)
> +{
> +	int i;
> +
> +	for (i = 0; i < RRM_NUM_LOCKS; i++)
> +		rrw_enter_write(&rrl->locks[i]);
> +}
> +
> +void
> +rrm_exit(rrmlock_t *rrl, void *tag)
> +{
> +	int i;
> +
> +	if (rrl->locks[0].rr_writer == curthread) {
> +		for (i = 0; i < RRM_NUM_LOCKS; i++)
> +			rrw_exit(&rrl->locks[i], tag);
> +	} else {
> +		rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag);
> +	}
> +}
> +
> +boolean_t
> +rrm_held(rrmlock_t *rrl, krw_t rw)
> +{
> +	if (rw == RW_WRITER) {
> +		return (rrw_held(&rrl->locks[0], rw));
> +	} else {
> +		return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw));
> +	}
> +}
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -80,6 +80,31 @@ void rrw_tsd_destroy(void *arg);
> #define	RRW_LOCK_HELD(x) \
> 	(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
>
> +/*
> + * A reader-mostly lock implementation, tuning above reader-writer locks
> + * for hightly parallel read acquisitions, pessimizing write acquisitions.
> + *
> + * This should be a prime number.  See comment in rrwlock.c near
> + * RRM_TD_LOCK() for details.
> + */
> +#define	RRM_NUM_LOCKS		17
> +typedef struct rrmlock {
> +	rrwlock_t	locks[RRM_NUM_LOCKS];
> +} rrmlock_t;
> +
> +void rrm_init(rrmlock_t *rrl, boolean_t track_all);
> +void rrm_destroy(rrmlock_t *rrl);
> +void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
> +void rrm_enter_read(rrmlock_t *rrl, void *tag);
> +void rrm_enter_write(rrmlock_t *rrl);
> +void rrm_exit(rrmlock_t *rrl, void *tag);
> +boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
> +
> +#define	RRM_READ_HELD(x)	rrm_held(x, RW_READER)
> +#define	RRM_WRITE_HELD(x)	rrm_held(x, RW_WRITER)
> +#define	RRM_LOCK_HELD(x) \
> +	(rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
> +
> #ifdef	__cplusplus
> }
> #endif
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -64,7 +64,7 @@ struct zfsvfs {
> 	int		z_norm;		/* normalization flags */
> 	boolean_t	z_atime;	/* enable atimes mount option */
> 	boolean_t	z_unmounted;	/* unmounted */
> -	rrwlock_t	z_teardown_lock;
> +	rrmlock_t	z_teardown_lock;
> 	krwlock_t	z_teardown_inactive_lock;
> 	list_t		z_all_znodes;	/* all vnodes in the fs */
> 	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -238,7 +238,7 @@ typedef struct znode {
> /* Called on entry to each ZFS vnode and vfs operation  */
> #define	ZFS_ENTER(zfsvfs) \
> 	{ \
> -		rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
> +		rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
> 		if ((zfsvfs)->z_unmounted) { \
> 			ZFS_EXIT(zfsvfs); \
> 			return (EIO); \
> @@ -246,7 +246,7 @@ typedef struct znode {
> 	}
>
> /* Must be called before exiting the vop */
> -#define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
> +#define	ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
>
> /* Verifies the znode is valid */
> #define	ZFS_VERIFY_ZP(zp) \
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -1420,7 +1420,7 @@ zfsvfs_hold(const char *name, void *tag,
> 	if (getzfsvfs(name, zfvp) != 0)
> 		error = zfsvfs_create(name, zfvp);
> 	if (error == 0) {
> -		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
> +		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
> 		    RW_READER, tag);
> 		if ((*zfvp)->z_unmounted) {
> 			/*
> @@ -1428,7 +1428,7 @@ zfsvfs_hold(const char *name, void *tag,
> 			 * thread should be just about to disassociate the
> 			 * objset from the zfsvfs.
> 			 */
> -			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
> +			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
> 			return (SET_ERROR(EBUSY));
> 		}
> 	}
> @@ -1438,7 +1438,7 @@ zfsvfs_hold(const char *name, void *tag,
> static void
> zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
> {
> -	rrw_exit(&zfsvfs->z_teardown_lock, tag);
> +	rrm_exit(&zfsvfs->z_teardown_lock, tag);
>
> 	if (zfsvfs->z_vfs) {
> 		VFS_RELE(zfsvfs->z_vfs);
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -1004,7 +1004,7 @@ zfsvfs_create(const char *osname, zfsvfs
> 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
> 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
> 	    offsetof(znode_t, z_link_node));
> -	rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
> +	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
> 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
> 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
> 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
> @@ -1119,7 +1119,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
> 	mutex_destroy(&zfsvfs->z_znodes_lock);
> 	mutex_destroy(&zfsvfs->z_lock);
> 	list_destroy(&zfsvfs->z_all_znodes);
> -	rrw_destroy(&zfsvfs->z_teardown_lock);
> +	rrm_destroy(&zfsvfs->z_teardown_lock);
> 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
> 	rw_destroy(&zfsvfs->z_fuid_lock);
> 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
> @@ -1784,7 +1784,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> {
> 	znode_t	*zp;
>
> -	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
> +	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
>
> 	if (!unmounting) {
> 		/*
> @@ -1814,7 +1814,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> 	 */
> 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
> 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
> -		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> +		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
> 		return (SET_ERROR(EIO));
> 	}
>
> @@ -1841,7 +1841,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
> 	 */
> 	if (unmounting) {
> 		zfsvfs->z_unmounted = B_TRUE;
> -		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> +		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
> 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
> 	}
>
> @@ -2073,7 +2073,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
> 	znode_t *zp;
> 	uint64_t sa_obj = 0;
>
> -	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
> +	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
> 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
>
> 	/*
> @@ -2129,7 +2129,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
> bail:
> 	/* release the VOPs */
> 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
> -	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
> +	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
>
> 	if (err) {
> 		/*
>
> Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
> ==============================================================================
> --- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c	Fri Jul 18 18:05:09 2014	(r268851)
> +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c	Fri Jul 18 18:09:20 2014	(r268852)
> @@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf,
> 	 * can safely ensure that the filesystem is not and will not be
> 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
> 	 */
> -	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
> +	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
> 	if (zfsvfs->z_unmounted) {
> 		ZFS_EXIT(zfsvfs);
> 		rw_exit(&zfsvfs_lock);
>
>


More information about the svn-src-vendor mailing list