git: a6c0d801ca59 - main - libpthread: allocate rwlocks and spinlocks in dedicated cachelines

From: Mateusz Guzik <mjg_at_FreeBSD.org>
Date: Tue, 27 Jun 2023 11:56:55 UTC
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=a6c0d801ca5934bb9b9cca6870ea7406d5db0641

commit a6c0d801ca5934bb9b9cca6870ea7406d5db0641
Author:     Greg Becker <becker.greg@att.net>
AuthorDate: 2023-06-27 01:08:29 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2023-06-27 11:56:44 +0000

    libpthread: allocate rwlocks and spinlocks in dedicated cachelines
    
    Reduces severe performance degradation due to false-sharing. Note that this
    does not account for hardware which can perform adjacent cacheline prefetch.
    
    [mjg: massaged the commit message and the patch to use aligned_alloc
    instead of malloc]
    
    PR:     272238
    MFC after:      1 week
---
 lib/libthr/thread/thr_pspinlock.c | 3 ++-
 lib/libthr/thread/thr_rwlock.c    | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/libthr/thread/thr_pspinlock.c b/lib/libthr/thread/thr_pspinlock.c
index 089416a18a07..374fe498034b 100644
--- a/lib/libthr/thread/thr_pspinlock.c
+++ b/lib/libthr/thread/thr_pspinlock.c
@@ -60,7 +60,8 @@ _pthread_spin_init(pthread_spinlock_t *lock, int pshared)
 	if (lock == NULL)
 		return (EINVAL);
 	if (pshared == PTHREAD_PROCESS_PRIVATE) {
-		lck = malloc(sizeof(struct pthread_spinlock));
+		lck = aligned_alloc(CACHE_LINE_SIZE,
+		    roundup(sizeof(struct pthread_spinlock), CACHE_LINE_SIZE));
 		if (lck == NULL)
 			return (ENOMEM);
 		*lock = lck;
diff --git a/lib/libthr/thread/thr_rwlock.c b/lib/libthr/thread/thr_rwlock.c
index b7526376a9a3..3e8af974fa18 100644
--- a/lib/libthr/thread/thr_rwlock.c
+++ b/lib/libthr/thread/thr_rwlock.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <errno.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "namespace.h"
 #include <pthread.h>
@@ -102,9 +103,11 @@ rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
 
 	if (attr == NULL || *attr == NULL ||
 	    (*attr)->pshared == PTHREAD_PROCESS_PRIVATE) {
-		prwlock = calloc(1, sizeof(struct pthread_rwlock));
+		prwlock = aligned_alloc(CACHE_LINE_SIZE,
+		    roundup(sizeof(struct pthread_rwlock), CACHE_LINE_SIZE));
 		if (prwlock == NULL)
 			return (ENOMEM);
+		memset(prwlock, 0, sizeof(struct pthread_rwlock));
 		*rwlock = prwlock;
 	} else {
 		prwlock = __thr_pshared_offpage(rwlock, 1);