svn commit: r356119 - in head: share/man/man9 sys/kern sys/sys
Mateusz Guzik
mjg at FreeBSD.org
Fri Dec 27 11:19:58 UTC 2019
Author: mjg
Date: Fri Dec 27 11:19:57 2019
New Revision: 356119
URL: https://svnweb.freebsd.org/changeset/base/356119
Log:
Add read-mostly sleepable locks
To be used when like rmlocks, except when sleeping for readers needs to be
allowed. See the manpage for more information.
Reviewed by: kib (previous version)
Differential Revision: https://reviews.freebsd.org/D22823
Modified:
head/share/man/man9/rmlock.9
head/sys/kern/kern_rmlock.c
head/sys/sys/_rmlock.h
head/sys/sys/rmlock.h
Modified: head/share/man/man9/rmlock.9
==============================================================================
--- head/share/man/man9/rmlock.9 Fri Dec 27 05:01:13 2019 (r356118)
+++ head/share/man/man9/rmlock.9 Fri Dec 27 11:19:57 2019 (r356119)
@@ -26,7 +26,7 @@
.\" $FreeBSD$
.\"
.\" Based on rwlock.9 man page
-.Dd November 11, 2017
+.Dd December 27, 2019
.Dt RMLOCK 9
.Os
.Sh NAME
@@ -43,7 +43,13 @@
.Nm rm_sleep ,
.Nm rm_assert ,
.Nm RM_SYSINIT ,
-.Nm RM_SYSINIT_FLAGS
+.Nm RM_SYSINIT_FLAGS ,
+.Nm rms_init ,
+.Nm rms_destroy ,
+.Nm rms_rlock ,
+.Nm rms_wlock ,
+.Nm rms_runlock ,
+.Nm rms_wunlock
.Nd kernel reader/writer lock optimized for read-mostly access patterns
.Sh SYNOPSIS
.In sys/param.h
@@ -77,6 +83,18 @@
.In sys/kernel.h
.Fn RM_SYSINIT "name" "struct rmlock *rm" "const char *desc"
.Fn RM_SYSINIT_FLAGS "name" "struct rmlock *rm" "const char *desc" "int flags"
+.Ft void
+.Fn rms_init "struct rmslock *rms" "const char *name"
+.Ft void
+.Fn rms_destroy "struct rmslock *rms"
+.Ft void
+.Fn rms_rlock "struct rmslock *rms"
+.Ft void
+.Fn rms_wlock "struct rmslock *rms"
+.Ft void
+.Fn rms_runlock "struct rmslock *rms"
+.Ft void
+.Fn rms_wunlock "struct rmslock *rms"
.Sh DESCRIPTION
Read-mostly locks allow shared access to protected data by multiple threads,
or exclusive access by a single thread.
@@ -113,22 +131,22 @@ Readers can recurse if the lock is initialized with th
option;
however, writers are never allowed to recurse.
.Pp
-Sleepable read-mostly locks are created by passing
+Sleeping for writers can be allowed by passing
.Dv RM_SLEEPABLE
to
.Fn rm_init_flags .
-Unlike normal read-mostly locks,
-sleepable read-mostly locks follow the same lock ordering rules as
+It changes lock ordering rules to the same as for
.Xr sx 9
locks.
-Sleepable read-mostly locks do not propagate priority to writers,
-but they do propagate priority to readers.
-Writers are permitted to sleep while holding a read-mostly lock,
-but readers are not.
-Unlike other sleepable locks such as
+They do not propagate priority to writers, but they do propagate priority to
+readers. Note that readers are not permitted to sleep regardless of the flag.
+.Pp
+Sleepable read-mostly locks (created with
+.Fn rms_init )
+allow sleeping for both readers and writers, but don't do priority propagation
+for either. They follow
.Xr sx 9
-locks,
-readers must use try operations on other sleepable locks to avoid sleeping.
+lock ordering.
.Ss Macros and Functions
.Bl -tag -width indent
.It Fn rm_init "struct rmlock *rm" "const char *name"
@@ -286,6 +304,43 @@ Assert that the current thread does not hold a recursi
.Fa rm .
.El
.El
+.Bl -tag -width indent
+.It Fn rms_init "struct rmslock *rms" "const char *name"
+Initialize the sleepable read-mostly lock
+.Fa rms .
+The
+.Fa name
+description is used as
+.Fa wmesg
+parameter to the
+.Xr msleep 9
+routine.
+This function must be called before any other operations on the lock.
+.It Fn rms_rlock "struct rmlock *rm"
+Lock
+.Fa rms
+as a reader.
+If any thread holds this lock exclusively, the current thread blocks.
+.It Fn rms_wlock "struct rmslock *rms"
+Lock
+.Fa rms
+as a writer.
+If the lock is already taken, the current thread blocks.
+The
+.Fn rms_wlock
+function cannot be called recursively.
+.It Fn rms_runlock "struct rmslock *rms"
+This function releases a shared lock previously acquired by
+.Fn rms_rlock .
+.It Fn rms_wunlock "struct rmslock *rms"
+This function releases an exclusive lock previously acquired by
+.Fn rms_wlock .
+.It Fn rms_destroy "struct rmslock *rms"
+This functions destroys a lock previously initialized with
+.Fn rms_init .
+The
+.Fa rms
+lock must be unlocked.
.Sh SEE ALSO
.Xr locking 9 ,
.Xr mutex 9 ,
Modified: head/sys/kern/kern_rmlock.c
==============================================================================
--- head/sys/kern/kern_rmlock.c Fri Dec 27 05:01:13 2019 (r356118)
+++ head/sys/kern/kern_rmlock.c Fri Dec 27 11:19:57 2019 (r356119)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/turnstile.h>
#include <sys/lock_profile.h>
#include <machine/cpu.h>
+#include <vm/uma.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -853,3 +854,241 @@ db_show_rm(const struct lock_object *lock)
lc->lc_ddb_show(&rm->rm_wlock_object);
}
#endif
+
+/*
+ * Read-mostly sleepable locks.
+ *
+ * These primitives allow both readers and writers to sleep. However, neither
+ * readers nor writers are tracked and subsequently there is no priority
+ * propagation.
+ *
+ * They are intended to be only used when write-locking is almost never needed
+ * (e.g., they can guard against unloading a kernel module) while read-locking
+ * happens all the time.
+ *
+ * Concurrent writers take turns taking the lock while going off cpu. If this is
+ * of concern for your usecase, this is not the right primitive.
+ *
+ * Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are
+ * inserted to prevert reordering of generated code. Execution ordering is
+ * provided with the use of an IPI handler.
+ */
+
+void
+rms_init(struct rmslock *rms, const char *name)
+{
+
+ rms->writers = 0;
+ rms->readers = 0;
+ mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW);
+ rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
+ rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
+}
+
+void
+rms_destroy(struct rmslock *rms)
+{
+
+ MPASS(rms->writers == 0);
+ MPASS(rms->readers == 0);
+ mtx_destroy(&rms->mtx);
+ uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu);
+ uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx);
+}
+
+static void __noinline
+rms_rlock_fallback(struct rmslock *rms)
+{
+
+ (*zpcpu_get(rms->readers_influx)) = 0;
+ critical_exit();
+
+ mtx_lock(&rms->mtx);
+ MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
+ while (rms->writers > 0)
+ msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
+ (*zpcpu_get(rms->readers_pcpu))++;
+ mtx_unlock(&rms->mtx);
+}
+
+void
+rms_rlock(struct rmslock *rms)
+{
+ int *influx;
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+ critical_enter();
+ influx = zpcpu_get(rms->readers_influx);
+ __compiler_membar();
+ *influx = 1;
+ __compiler_membar();
+ if (__predict_false(rms->writers > 0)) {
+ rms_rlock_fallback(rms);
+ return;
+ }
+ __compiler_membar();
+ (*zpcpu_get(rms->readers_pcpu))++;
+ __compiler_membar();
+ *influx = 0;
+ critical_exit();
+}
+
+static void __noinline
+rms_runlock_fallback(struct rmslock *rms)
+{
+
+ (*zpcpu_get(rms->readers_influx)) = 0;
+ critical_exit();
+
+ mtx_lock(&rms->mtx);
+ MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
+ MPASS(rms->writers > 0);
+ MPASS(rms->readers > 0);
+ rms->readers--;
+ if (rms->readers == 0)
+ wakeup_one(&rms->writers);
+ mtx_unlock(&rms->mtx);
+}
+
+void
+rms_runlock(struct rmslock *rms)
+{
+ int *influx;
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+ critical_enter();
+ influx = zpcpu_get(rms->readers_influx);
+ __compiler_membar();
+ *influx = 1;
+ __compiler_membar();
+ if (__predict_false(rms->writers > 0)) {
+ rms_runlock_fallback(rms);
+ return;
+ }
+ __compiler_membar();
+ (*zpcpu_get(rms->readers_pcpu))--;
+ __compiler_membar();
+ *influx = 0;
+ critical_exit();
+}
+
+struct rmslock_ipi {
+ struct rmslock *rms;
+ cpuset_t signal;
+};
+
+static void
+rms_wlock_IPI(void *arg)
+{
+ struct rmslock_ipi *rmsipi;
+ struct rmslock *rms;
+ int readers;
+
+ rmsipi = arg;
+ rms = rmsipi->rms;
+
+ if (*zpcpu_get(rms->readers_influx))
+ return;
+ readers = zpcpu_replace(rms->readers_pcpu, 0);
+ if (readers != 0)
+ atomic_add_int(&rms->readers, readers);
+ CPU_CLR_ATOMIC(curcpu, &rmsipi->signal);
+}
+
+static void
+rms_wlock_switch(struct rmslock *rms)
+{
+ struct rmslock_ipi rmsipi;
+ int *in_op;
+ int cpu;
+
+ MPASS(rms->readers == 0);
+ MPASS(rms->writers == 1);
+
+ rmsipi.rms = rms;
+
+ /*
+ * Publishes rms->writers. rlock and runlock will get this ordered
+ * via IPI in the worst case.
+ */
+ atomic_thread_fence_rel();
+
+ /*
+ * Collect reader counts from all CPUs using an IPI. The handler can
+ * find itself running while the interrupted CPU was doing either
+ * rlock or runlock in which case it will fail.
+ *
+ * Successful attempts clear the cpu id in the bitmap.
+ *
+ * In case of failure we observe all failing CPUs not executing there to
+ * determine when to make the next attempt. Note that threads having
+ * the var set have preemption disabled. Setting of readers_influx
+ * only uses compiler barriers making these loads unreliable, which is
+ * fine -- the IPI handler will always see the correct result.
+ *
+ * We retry until all counts are collected. Forward progress is
+ * guaranteed by that fact that the total number of threads which can
+ * be caught like this is finite and they all are going to block on
+ * their own.
+ */
+ CPU_COPY(&all_cpus, &rmsipi.signal);
+ for (;;) {
+ smp_rendezvous_cpus(
+ rmsipi.signal,
+ smp_no_rendezvous_barrier,
+ rms_wlock_IPI,
+ smp_no_rendezvous_barrier,
+ &rmsipi);
+
+ if (CPU_EMPTY(&rmsipi.signal))
+ break;
+
+ CPU_FOREACH(cpu) {
+ if (!CPU_ISSET(cpu, &rmsipi.signal))
+ continue;
+ in_op = zpcpu_get_cpu(rms->readers_influx, cpu);
+ while (atomic_load_int(in_op))
+ cpu_spinwait();
+ }
+ }
+}
+
+void
+rms_wlock(struct rmslock *rms)
+{
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+ mtx_lock(&rms->mtx);
+ rms->writers++;
+ if (rms->writers > 1) {
+ msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
+ MPASS(rms->readers == 0);
+ return;
+ }
+
+ rms_wlock_switch(rms);
+
+ if (rms->readers > 0)
+ msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
+ else
+ mtx_unlock(&rms->mtx);
+ MPASS(rms->readers == 0);
+}
+
+void
+rms_wunlock(struct rmslock *rms)
+{
+
+ mtx_lock(&rms->mtx);
+ MPASS(rms->writers >= 1);
+ MPASS(rms->readers == 0);
+ rms->writers--;
+ if (rms->writers > 0)
+ wakeup_one(&rms->writers);
+ else
+ wakeup(&rms->readers);
+ mtx_unlock(&rms->mtx);
+}
Modified: head/sys/sys/_rmlock.h
==============================================================================
--- head/sys/sys/_rmlock.h Fri Dec 27 05:01:13 2019 (r356118)
+++ head/sys/sys/_rmlock.h Fri Dec 27 11:19:57 2019 (r356119)
@@ -68,4 +68,14 @@ struct rm_priotracker {
LIST_ENTRY(rm_priotracker) rmp_qentry;
};
+#include <sys/_mutex.h>
+
+struct rmslock {
+ struct mtx mtx;
+ int writers;
+ int readers;
+ int *readers_pcpu;
+ int *readers_influx;
+};
+
#endif /* !_SYS__RMLOCK_H_ */
Modified: head/sys/sys/rmlock.h
==============================================================================
--- head/sys/sys/rmlock.h Fri Dec 27 05:01:13 2019 (r356118)
+++ head/sys/sys/rmlock.h Fri Dec 27 11:19:57 2019 (r356119)
@@ -133,5 +133,12 @@ struct rm_args {
#define rm_assert(rm, what)
#endif
+void rms_init(struct rmslock *rms, const char *name);
+void rms_destroy(struct rmslock *rms);
+void rms_rlock(struct rmslock *rms);
+void rms_runlock(struct rmslock *rms);
+void rms_wlock(struct rmslock *rms);
+void rms_wunlock(struct rmslock *rms);
+
#endif /* _KERNEL */
#endif /* !_SYS_RMLOCK_H_ */
More information about the svn-src-all
mailing list