git: 9d1361ef0a31 - stable/13 - x86 atomics: use lock prefix unconditionally

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Fri, 11 Feb 2022 11:13:53 UTC
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=9d1361ef0a3131d27c113893195642e53aea0043

commit 9d1361ef0a3131d27c113893195642e53aea0043
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-02-03 09:51:36 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-02-11 10:52:55 +0000

    x86 atomics: use lock prefix unconditionally
    
    (cherry picked from commit 9c0b759bf9b520537616d026f21a0a98d70acd11)
---
 sys/amd64/include/atomic.h | 67 +++++++++++-----------------------------------
 sys/i386/include/atomic.h  | 52 +++++++++++------------------------
 2 files changed, 32 insertions(+), 87 deletions(-)

diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
index 1436c36daf2a..6caf4a050b4c 100644
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -151,16 +151,10 @@ void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 #else /* !__GNUCLIKE_ASM */
 
 /*
- * For userland, always use lock prefixes so that the binaries will run
- * on both SMP and !SMP systems.
- */
-#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
-#define	MPLOCKED	"lock ; "
-#else
-#define	MPLOCKED
-#endif
-
-/*
+ * Always use lock prefixes.  The result is slighly less optimal for
+ * UP systems, but it matters less now, and sometimes UP is emulated
+ * over SMP.
+ *
  * The assembly is volatilized to avoid code chunk removal by the compiler.
  * GCC aggressively reorders operations and memory clobbering is necessary
  * in order to avoid that for memory barriers.
@@ -169,7 +163,7 @@ void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 static __inline void					\
 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
-	__asm __volatile(MPLOCKED OP			\
+	__asm __volatile("lock; " OP			\
 	: "+m" (*p)					\
 	: CONS (V)					\
 	: "cc");					\
@@ -178,7 +172,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 static __inline void					\
 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
-	__asm __volatile(MPLOCKED OP			\
+	__asm __volatile("lock; " OP			\
 	: "+m" (*p)					\
 	: CONS (V)					\
 	: "memory", "cc");				\
@@ -207,8 +201,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
 	u_char res;					\
 							\
 	__asm __volatile(				\
-	"	" MPLOCKED "		"		\
-	"	cmpxchg %3,%1 ;	"			\
+	" lock; cmpxchg %3,%1 ;	"			\
 	"# atomic_cmpset_" #TYPE "	"		\
 	: "=@cce" (res),		/* 0 */		\
 	  "+m" (*dst),			/* 1 */		\
@@ -224,8 +217,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
 	u_char res;					\
 							\
 	__asm __volatile(				\
-	"	" MPLOCKED "		"		\
-	"	cmpxchg %3,%1 ;		"		\
+	" lock; cmpxchg %3,%1 ;		"		\
 	"# atomic_fcmpset_" #TYPE "	"		\
 	: "=@cce" (res),		/* 0 */		\
 	  "+m" (*dst),			/* 1 */		\
@@ -249,8 +241,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v)
 {
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	xaddl	%0,%1 ;		"
+	" lock; xaddl	%0,%1 ;		"
 	"# atomic_fetchadd_int"
 	: "+r" (v),			/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -267,8 +258,7 @@ atomic_fetchadd_long(volatile u_long *p, u_long v)
 {
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	xaddq	%0,%1 ;		"
+	" lock;	xaddq	%0,%1 ;		"
 	"# atomic_fetchadd_long"
 	: "+r" (v),			/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -282,8 +272,7 @@ atomic_testandset_int(volatile u_int *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btsl	%2,%1 ;		"
+	" lock;	btsl	%2,%1 ;		"
 	"# atomic_testandset_int"
 	: "=@ccc" (res),		/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -298,8 +287,7 @@ atomic_testandset_long(volatile u_long *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btsq	%2,%1 ;		"
+	" lock;	btsq	%2,%1 ;		"
 	"# atomic_testandset_long"
 	: "=@ccc" (res),		/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -314,8 +302,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btrl	%2,%1 ;		"
+	" lock;	btrl	%2,%1 ;		"
 	"# atomic_testandclear_int"
 	: "=@ccc" (res),		/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -330,8 +317,7 @@ atomic_testandclear_long(volatile u_long *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btrq	%2,%1 ;		"
+	" lock;	btrq	%2,%1 ;		"
 	"# atomic_testandclear_long"
 	: "=@ccc" (res),		/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -352,39 +338,18 @@ atomic_testandclear_long(volatile u_long *p, u_int v)
  * special address for "mem".  In the kernel, we use a private per-cpu
  * cache line.  In user space, we use a word in the stack's red zone
  * (-8(%rsp)).
- *
- * For UP kernels, however, the memory of the single processor is
- * always consistent, so we only need to stop the compiler from
- * reordering accesses in a way that violates the semantics of acquire
- * and release.
  */
 
-#if defined(_KERNEL)
-
-#if defined(SMP) || defined(KLD_MODULE)
 static __inline void
 __storeload_barrier(void)
 {
-
+#if defined(_KERNEL)
 	__asm __volatile("lock; addl $0,%%gs:%0"
 	    : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
-}
-#else /* _KERNEL && UP */
-static __inline void
-__storeload_barrier(void)
-{
-
-	__compiler_membar();
-}
-#endif /* SMP */
 #else /* !_KERNEL */
-static __inline void
-__storeload_barrier(void)
-{
-
 	__asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc");
-}
 #endif /* _KERNEL*/
+}
 
 #define	ATOMIC_LOAD(TYPE)					\
 static __inline u_##TYPE					\
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index c65a7ac83a93..2e72de935c23 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -141,16 +141,10 @@ void		atomic_subtract_64(volatile uint64_t *, uint64_t);
 #else /* !__GNUCLIKE_ASM */
 
 /*
- * For userland, always use lock prefixes so that the binaries will run
- * on both SMP and !SMP systems.
- */
-#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
-#define	MPLOCKED	"lock ; "
-#else
-#define	MPLOCKED
-#endif
-
-/*
+ * Always use lock prefixes.  The result is slighly less optimal for
+ * UP systems, but it matters less now, and sometimes UP is emulated
+ * over SMP.
+ *
  * The assembly is volatilized to avoid code chunk removal by the compiler.
  * GCC aggressively reorders operations and memory clobbering is necessary
  * in order to avoid that for memory barriers.
@@ -159,7 +153,7 @@ void		atomic_subtract_64(volatile uint64_t *, uint64_t);
 static __inline void					\
 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
-	__asm __volatile(MPLOCKED OP			\
+	__asm __volatile("lock; " OP			\
 	: "+m" (*p)					\
 	: CONS (V)					\
 	: "cc");					\
@@ -168,7 +162,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 static __inline void					\
 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
-	__asm __volatile(MPLOCKED OP			\
+	__asm __volatile("lock; " OP			\
 	: "+m" (*p)					\
 	: CONS (V)					\
 	: "memory", "cc");				\
@@ -197,8 +191,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
 	u_char res;					\
 							\
 	__asm __volatile(				\
-	"	" MPLOCKED "		"		\
-	"	cmpxchg	%3,%1 ;		"		\
+	"	lock; cmpxchg	%3,%1 ;	"		\
 	"	sete	%0 ;		"		\
 	"# atomic_cmpset_" #TYPE "	"		\
 	: "=q" (res),			/* 0 */		\
@@ -215,8 +208,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
 	u_char res;					\
 							\
 	__asm __volatile(				\
-	"	" MPLOCKED "		"		\
-	"	cmpxchg	%3,%1 ;		"		\
+	"	lock; cmpxchg	%3,%1 ;	"		\
 	"	sete	%0 ;		"		\
 	"# atomic_fcmpset_" #TYPE "	"		\
 	: "=q" (res),			/* 0 */		\
@@ -240,8 +232,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v)
 {
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	xaddl	%0,%1 ;		"
+	"	lock; xaddl	%0,%1 ;	"
 	"# atomic_fetchadd_int"
 	: "+r" (v),			/* 0 */
 	  "+m" (*p)			/* 1 */
@@ -255,8 +246,7 @@ atomic_testandset_int(volatile u_int *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btsl	%2,%1 ;		"
+	"	lock; btsl	%2,%1 ;	"
 	"	setc	%0 ;		"
 	"# atomic_testandset_int"
 	: "=q" (res),			/* 0 */
@@ -272,8 +262,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	btrl	%2,%1 ;		"
+	"	lock; btrl	%2,%1 ;	"
 	"	setc	%0 ;		"
 	"# atomic_testandclear_int"
 	: "=q" (res),			/* 0 */
@@ -303,11 +292,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v)
  */
 
 #if defined(_KERNEL)
-#if defined(SMP) || defined(KLD_MODULE)
 #define	__storeload_barrier()	__mbk()
-#else /* _KERNEL && UP */
-#define	__storeload_barrier()	__compiler_membar()
-#endif /* SMP */
 #else /* !_KERNEL */
 #define	__storeload_barrier()	__mbu()
 #endif /* _KERNEL*/
@@ -484,8 +469,7 @@ atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	cmpxchg8b %1 ;		"
+	"	lock; cmpxchg8b %1 ;	"
 	"	sete	%0"
 	: "=q" (res),			/* 0 */
 	  "+m" (*dst),			/* 1 */
@@ -502,8 +486,7 @@ atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
 	u_char res;
 
 	__asm __volatile(
-	"	" MPLOCKED "		"
-	"	cmpxchg8b %1 ;		"
+	"	lock; cmpxchg8b %1 ;	"
 	"	sete	%0"
 	: "=q" (res),			/* 0 */
 	  "+m" (*dst),			/* 1 */
@@ -522,8 +505,7 @@ atomic_load_acq_64_i586(volatile uint64_t *p)
 	__asm __volatile(
 	"	movl	%%ebx,%%eax ;	"
 	"	movl	%%ecx,%%edx ;	"
-	"	" MPLOCKED "		"
-	"	cmpxchg8b %1"
+	"	lock; cmpxchg8b %1"
 	: "=&A" (res),			/* 0 */
 	  "+m" (*p)			/* 1 */
 	: : "memory", "cc");
@@ -538,8 +520,7 @@ atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
 	"	movl	%%eax,%%ebx ;	"
 	"	movl	%%edx,%%ecx ;	"
 	"1:				"
-	"	" MPLOCKED "		"
-	"	cmpxchg8b %0 ;		"
+	"	lock; cmpxchg8b %0 ;	"
 	"	jne	1b"
 	: "+m" (*p),			/* 0 */
 	  "+A" (v)			/* 1 */
@@ -554,8 +535,7 @@ atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
 	"	movl	%%eax,%%ebx ;	"
 	"	movl	%%edx,%%ecx ;	"
 	"1:				"
-	"	" MPLOCKED "		"
-	"	cmpxchg8b %0 ;		"
+	"	lock; cmpxchg8b %0 ;	"
 	"	jne	1b"
 	: "+m" (*p),			/* 0 */
 	  "+A" (v)			/* 1 */