Re: git: ff1ae3b3639d - main - rangelocks: restore caching of the single rl entry in the struct thread

From: Mark Johnston <markj_at_freebsd.org>
Date: Thu, 29 Aug 2024 20:04:10 UTC
On Tue, Aug 06, 2024 at 04:06:39AM +0000, Konstantin Belousov wrote:
> The branch main has been updated by kib:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=ff1ae3b3639d39a6485cfc655bf565cd04b9caa6
> 
> commit ff1ae3b3639d39a6485cfc655bf565cd04b9caa6
> Author:     Konstantin Belousov <kib@FreeBSD.org>
> AuthorDate: 2023-08-23 23:29:50 +0000
> Commit:     Konstantin Belousov <kib@FreeBSD.org>
> CommitDate: 2024-08-06 04:05:58 +0000
> 
>     rangelocks: restore caching of the single rl entry in the struct thread
>     
>     Reviewed by:    markj, Olivier Certner <olce.freebsd@certner.fr>
>     Tested by:      pho
>     Sponsored by:   The FreeBSD Foundation
>     Differential revision:  https://reviews.freebsd.org/D41787
> ---
>  sys/kern/kern_rangelock.c | 33 +++++++++++++++++++++++++++------
>  sys/kern/kern_thread.c    |  1 +
>  sys/sys/rangelock.h       |  1 +
>  3 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
> index 355b2125dd9b..2f16569b19aa 100644
> --- a/sys/kern/kern_rangelock.c
> +++ b/sys/kern/kern_rangelock.c
> @@ -82,8 +82,15 @@ static struct rl_q_entry *
>  rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
>  {
>  	struct rl_q_entry *e;
> -
> -	e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
> +	struct thread *td;
> +
> +	td = curthread;
> +	if (td->td_rlqe != NULL) {
> +		e = td->td_rlqe;
> +		td->td_rlqe = NULL;
> +	} else {
> +		e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
> +	}
>  	e->rl_q_next = NULL;
>  	e->rl_q_free = NULL;
>  	e->rl_q_start = start;
> @@ -95,6 +102,12 @@ rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
>  	return (e);
>  }
>  
> +void
> +rangelock_entry_free(struct rl_q_entry *e)
> +{
> +	uma_zfree_smr(rl_entry_zone, e);
> +}
> +
>  void
>  rangelock_init(struct rangelock *lock)
>  {
> @@ -106,6 +119,7 @@ void
>  rangelock_destroy(struct rangelock *lock)
>  {
>  	struct rl_q_entry *e, *ep;
> +	struct thread *td;
>  
>  	MPASS(!lock->sleepers);
>  	for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
> @@ -386,8 +400,10 @@ rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
>      vm_ooffset_t end, int locktype)
>  {
>  	struct rl_q_entry *e, *free, *x, *xp;
> +	struct thread *td;
>  	enum RL_INSERT_RES res;
>  
> +	td = curthread;
>  	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
>  		free = NULL;
>  		e = rlqentry_alloc(start, end, locktype);
> @@ -401,10 +417,15 @@ rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
>  			e = NULL;
>  		}
>  		for (x = free; x != NULL; x = xp) {
> -		  MPASS(!rl_e_is_marked(x));
> -		  xp = x->rl_q_free;
> -		  MPASS(!rl_e_is_marked(xp));
> -		  uma_zfree_smr(rl_entry_zone, x);
> +			MPASS(!rl_e_is_marked(x));
> +			xp = x->rl_q_free;
> +			MPASS(!rl_e_is_marked(xp));
> +			if (td->td_rlqe == NULL) {
> +				smr_synchronize(rl_smr);

I think I had the impression that this was a rare case, but empirically
it is not.  As far as I can see, this smr_synchronize() call happens
every time an entry is freed, which could be very frequent.
smr_synchronize() bumps the global sequence counter and blocks until all
CPUs have had a chance to observe the new value, so is quite expensive.

I didn't try benchmarking yet, but pwrite3 from will-it-scale should be
a good candidate.

Why do we maintain this per-thread cache at all?  IMO it would make more
sense to unconditionally free the entry here.  Or perhaps I'm missing
something here.

> +				td->td_rlqe = x;
> +			} else {
> +				uma_zfree_smr(rl_entry_zone, x);
> +			}
>  		}
>  	}
>  	return (e);
> diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
> index c951e7297c89..9c3694feb945 100644
> --- a/sys/kern/kern_thread.c
> +++ b/sys/kern/kern_thread.c
> @@ -480,6 +480,7 @@ thread_fini(void *mem, int size)
>  	EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
>  	turnstile_free(td->td_turnstile);
>  	sleepq_free(td->td_sleepqueue);
> +	rangelock_entry_free(td->td_rlqe);
>  	umtx_thread_fini(td);
>  	MPASS(td->td_sel == NULL);
>  }
> diff --git a/sys/sys/rangelock.h b/sys/sys/rangelock.h
> index 310371bef879..60f394b67677 100644
> --- a/sys/sys/rangelock.h
> +++ b/sys/sys/rangelock.h
> @@ -65,6 +65,7 @@ void	*rangelock_wlock(struct rangelock *lock, vm_ooffset_t start,
>      vm_ooffset_t end);
>  void	*rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start,
>      vm_ooffset_t end);
> +void	 rangelock_entry_free(struct rl_q_entry *e);
>  #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
>  void	_rangelock_cookie_assert(void *cookie, int what, const char *file,
>      int line);