git: 5badbeeaf061 - main - Re-implement rangelocks part 2

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 06 Aug 2024 04:06:36 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=5badbeeaf0614568d37b2466d0f52676ff08a049

commit 5badbeeaf0614568d37b2466d0f52676ff08a049
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-08-18 17:32:01 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-08-06 04:05:58 +0000

    Re-implement rangelocks part 2
    
    Allow read locks to overlap.
    
    Reviewed by:    markj, Olivier Certner <olce.freebsd@certner.fr>
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    Differential revision:  https://reviews.freebsd.org/D41787
---
 sys/kern/kern_rangelock.c | 232 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 169 insertions(+), 63 deletions(-)

diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
index 2ed26db49f19..9c6b7fb871f9 100644
--- a/sys/kern/kern_rangelock.c
+++ b/sys/kern/kern_rangelock.c
@@ -121,11 +121,28 @@ rl_e_is_marked(const struct rl_q_entry *e)
 	return (((uintptr_t)e & 1) != 0);
 }
 
+static struct rl_q_entry *
+rl_e_unmark_unchecked(const struct rl_q_entry *e)
+{
+	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
+}
+
 static struct rl_q_entry *
 rl_e_unmark(const struct rl_q_entry *e)
 {
 	MPASS(rl_e_is_marked(e));
-	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
+	return (rl_e_unmark_unchecked(e));
+}
+
+static void
+rl_e_mark(struct rl_q_entry *e)
+{
+#if defined(INVARIANTS) && defined(__LP64__)
+	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
+	MPASS(r == 0);
+#else
+	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
+#endif
 }
 
 static struct rl_q_entry *
@@ -140,42 +157,49 @@ rl_e_is_rlock(const struct rl_q_entry *e)
 	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
 }
 
-void
-rangelock_unlock(struct rangelock *lock, void *cookie)
+static void
+rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
 {
-	struct rl_q_entry *e;
-
-	e = cookie;
 	MPASS(lock != NULL && e != NULL);
 	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
 	MPASS(e->rl_q_owner == curthread);
 
-	sleepq_lock(&lock->sleepers);
-#ifdef INVARIANTS
-	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
-	MPASS(r == 0);
-#else
-	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
-#endif
+	rl_e_mark(e);
 	lock->sleepers = false;
 	sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
+}
+
+void
+rangelock_unlock(struct rangelock *lock, void *cookie)
+{
+	sleepq_lock(&lock->sleepers);
+	rangelock_unlock_int(lock, cookie);
 	sleepq_release(&lock->sleepers);
 }
 
 /*
- * result: -1 if e1 before e2
- *          1 if e1 after e2
- *          0 if e1 and e2 overlap
+ * result: -1 if e1 before e2, or both locks are readers and e1
+ *		starts before or at e2
+ *          1 if e1 after e2, or both locks are readers and e1
+ *		starts after e2
+ *          0 if e1 and e2 overlap and at least one lock is writer
  */
 static int
 rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
 {
+	bool rds;
+
 	if (e1 == NULL)
 		return (1);
-	if (e1->rl_q_start >= e2->rl_q_end)
-		return (1);
 	if (e2->rl_q_start >= e1->rl_q_end)
 		return (-1);
+	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
+	if (e2->rl_q_start >= e1->rl_q_start && rds)
+		return (-1);
+	if (e1->rl_q_start >= e2->rl_q_end)
+		return (1);
+	if (e1->rl_q_start >= e2->rl_q_start && rds)
+		return (1);
 	return (0);
 }
 
@@ -199,7 +223,95 @@ rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
 	    (uintptr_t)new) != 0);
 }
 
-static bool
+enum RL_INSERT_RES {
+	RL_TRYLOCK_FAILED,
+	RL_LOCK_SUCCESS,
+	RL_LOCK_RETRY,
+};
+
+static enum RL_INSERT_RES
+rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
+    struct rl_q_entry **free)
+{
+	struct rl_q_entry *cur, *next, **prev;
+
+	prev = &e->rl_q_next;
+	cur = rl_q_load(prev);
+	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
+	for (;;) {
+		if (cur == NULL || cur->rl_q_start > e->rl_q_end)
+			return (RL_LOCK_SUCCESS);
+		next = rl_q_load(&cur->rl_q_next);
+		if (rl_e_is_marked(next)) {
+			next = rl_e_unmark(next);
+			if (rl_q_cas(prev, cur, next)) {
+				cur->rl_q_free = *free;
+				*free = cur;
+			}
+			cur = next;
+			continue;
+		}
+		if (rl_e_is_rlock(cur)) {
+			prev = &cur->rl_q_next;
+			cur = rl_e_unmark_unchecked(rl_q_load(prev));
+			continue;
+		}
+		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
+			sleepq_lock(&lock->sleepers);
+			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
+				sleepq_release(&lock->sleepers);
+				continue;
+			}
+			rangelock_unlock_int(lock, e);
+			if (trylock) {
+				sleepq_release(&lock->sleepers);
+				return (RL_TRYLOCK_FAILED);
+			}
+			rl_insert_sleep(lock);
+			return (RL_LOCK_RETRY);
+		}
+	}
+}
+
+static enum RL_INSERT_RES
+rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
+    bool trylock, struct rl_q_entry **free)
+{
+	struct rl_q_entry *cur, *next, **prev;
+
+	prev = &lock->head;
+	cur = rl_q_load(prev);
+	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
+	for (;;) {
+		if (cur == e)
+			return (RL_LOCK_SUCCESS);
+		next = rl_q_load(&cur->rl_q_next);
+		if (rl_e_is_marked(next)) {
+			next = rl_e_unmark(next);
+			if (rl_q_cas(prev, cur, next)) {
+				cur->rl_q_next = *free;
+				*free = cur;
+			}
+			cur = next;
+			continue;
+		}
+		if (cur->rl_q_end <= e->rl_q_start) {
+			prev = &cur->rl_q_next;
+			cur = rl_e_unmark_unchecked(rl_q_load(prev));
+			continue;
+		}
+		sleepq_lock(&lock->sleepers);
+		rangelock_unlock_int(lock, e);
+		if (trylock) {
+			sleepq_release(&lock->sleepers);
+			return (RL_TRYLOCK_FAILED);
+		}
+		rl_insert_sleep(lock);
+		return (RL_LOCK_RETRY);
+	}
+}
+
+static enum RL_INSERT_RES
 rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
     struct rl_q_entry **free)
 {
@@ -208,14 +320,15 @@ rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
 
 again:
 	prev = &lock->head;
-	if (rl_q_load(prev) == NULL && rl_q_cas(prev, NULL, e))
-		return (true);
-
-	for (cur = rl_q_load(prev);;) {
-		if (rl_e_is_marked(cur))
-			goto again;
+	cur = rl_q_load(prev);
+	if (cur == NULL && rl_q_cas(prev, NULL, e))
+		return (RL_LOCK_SUCCESS);
 
+	for (;;) {
 		if (cur != NULL) {
+			if (rl_e_is_marked(cur))
+				goto again;
+
 			next = rl_q_load(&cur->rl_q_next);
 			if (rl_e_is_marked(next)) {
 				next = rl_e_unmark(next);
@@ -244,7 +357,7 @@ again:
 			}
 			if (trylock) {
 				sleepq_release(&lock->sleepers);
-				return (false);
+				return (RL_TRYLOCK_FAILED);
 			}
 			rl_insert_sleep(lock);
 			/* e is still valid */
@@ -253,7 +366,9 @@ again:
 			e->rl_q_next = cur;
 			if (rl_q_cas(prev, cur, e)) {
 				atomic_thread_fence_acq();
-				return (true);
+				return (rl_e_is_rlock(e) ?
+				    rl_r_validate(lock, e, trylock, free) :
+				    rl_w_validate(lock, e, trylock, free));
 			}
 			/* Reset rl_q_next in case we hit fast path. */
 			e->rl_q_next = NULL;
@@ -263,27 +378,30 @@ again:
 }
 
 static struct rl_q_entry *
-rangelock_lock_int(struct rangelock *lock, struct rl_q_entry *e,
-    bool trylock)
+rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
+    vm_ooffset_t end, int locktype)
 {
-	struct rl_q_entry *free, *x, *xp;
-	bool res;
-
-	free = NULL;
-	smr_enter(rl_smr);
-	res = rl_insert(lock, e, trylock, &free);
-	smr_exit(rl_smr);
-	MPASS(trylock || res);
-	if (!res) {
-		e->rl_q_free = free;
-		free = e;
-		e = NULL;
-	}
-	for (x = free; x != NULL; x = xp) {
-		MPASS(!rl_e_is_marked(x));
-		xp = x->rl_q_free;
-		MPASS(!rl_e_is_marked(xp));
-		uma_zfree_smr(rl_entry_zone, x);
+	struct rl_q_entry *e, *free, *x, *xp;
+	enum RL_INSERT_RES res;
+
+	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
+		free = NULL;
+		e = rlqentry_alloc(start, end, locktype);
+		smr_enter(rl_smr);
+		res = rl_insert(lock, e, trylock, &free);
+		smr_exit(rl_smr);
+		if (res == RL_TRYLOCK_FAILED) {
+			MPASS(trylock);
+			e->rl_q_free = free;
+			free = e;
+			e = NULL;
+		}
+		for (x = free; x != NULL; x = xp) {
+		  MPASS(!rl_e_is_marked(x));
+		  xp = x->rl_q_free;
+		  MPASS(!rl_e_is_marked(xp));
+		  uma_zfree_smr(rl_entry_zone, x);
+		}
 	}
 	return (e);
 }
@@ -291,37 +409,25 @@ rangelock_lock_int(struct rangelock *lock, struct rl_q_entry *e,
 void *
 rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
-	struct rl_q_entry *e;
-
-	e = rlqentry_alloc(start, end, RL_LOCK_READ);
-	return (rangelock_lock_int(lock, e, false));
+	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
 }
 
 void *
 rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
-	struct rl_q_entry *e;
-
-	e = rlqentry_alloc(start, end, RL_LOCK_READ);
-	return (rangelock_lock_int(lock, e, true));
+	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
 }
 
 void *
 rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
-	struct rl_q_entry *e;
-
-	e = rlqentry_alloc(start, end, RL_LOCK_WRITE);
-	return (rangelock_lock_int(lock, e, true));
+	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
 }
 
 void *
 rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
-	struct rl_q_entry *e;
-
-	e = rlqentry_alloc(start, end, RL_LOCK_WRITE);
-	return (rangelock_lock_int(lock, e, true));
+	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
 }
 
 #ifdef INVARIANT_SUPPORT