svn commit: r357459 - head/sys/arm64/include
Mark Johnston
markj at FreeBSD.org
Mon Feb 3 18:23:35 UTC 2020
Author: markj
Date: Mon Feb 3 18:23:35 2020
New Revision: 357459
URL: https://svnweb.freebsd.org/changeset/base/357459
Log:
Add LSE-based atomic(9) implementations.
These make use of the cas*, ld* and swp instructions added in ARMv8.1.
Testing shows them to be significantly more performant than LL/SC-based
implementations.
No functional change here since the wrappers still unconditionally
select the _llsc variants.
Reviewed by: andrew, kib
MFC after: 1 month
Submitted by: Ali Saidi <alisaidi at amazon.com> (original version)
Differential Revision: https://reviews.freebsd.org/D23324
Modified:
head/sys/arm64/include/atomic.h
Modified: head/sys/arm64/include/atomic.h
==============================================================================
--- head/sys/arm64/include/atomic.h Mon Feb 3 18:23:14 2020 (r357458)
+++ head/sys/arm64/include/atomic.h Mon Feb 3 18:23:35 2020 (r357459)
@@ -63,15 +63,16 @@
static __inline void \
atomic_##op##_##bar##t##flav(volatile uint##t##_t *p, uint##t##_t val)
-#define _ATOMIC_OP_IMPL(t, w, s, op, asm_op, bar, a, l) \
+#define _ATOMIC_OP_IMPL(t, w, s, op, llsc_asm_op, lse_asm_op, pre, bar, a, l) \
_ATOMIC_OP_PROTO(t, op, bar, _llsc) \
{ \
uint##t##_t tmp; \
int res; \
\
+ pre; \
__asm __volatile( \
"1: ld"#a"xr"#s" %"#w"0, [%2]\n" \
- " "#asm_op" %"#w"0, %"#w"0, %"#w"3\n" \
+ " "#llsc_asm_op" %"#w"0, %"#w"0, %"#w"3\n" \
" st"#l"xr"#s" %w1, %"#w"0, [%2]\n" \
" cbnz %w1, 1b\n" \
: "=&r"(tmp), "=&r"(res) \
@@ -80,26 +81,45 @@ _ATOMIC_OP_PROTO(t, op, bar, _llsc) \
); \
} \
\
+_ATOMIC_OP_PROTO(t, op, bar, _lse) \
+{ \
+ uint##t##_t tmp; \
+ \
+ pre; \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "ld"#lse_asm_op#a#l#s" %"#w"2, %"#w"0, [%1]\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (tmp) \
+ : "r" (p), "r" (val) \
+ : "memory" \
+ ); \
+} \
+ \
_ATOMIC_OP_PROTO(t, op, bar, ) \
{ \
atomic_##op##_##bar##t##_llsc(p, val); \
}
-#define __ATOMIC_OP(op, asm_op, bar, a, l) \
- _ATOMIC_OP_IMPL(8, w, b, op, asm_op, bar, a, l) \
- _ATOMIC_OP_IMPL(16, w, h, op, asm_op, bar, a, l) \
- _ATOMIC_OP_IMPL(32, w, , op, asm_op, bar, a, l) \
- _ATOMIC_OP_IMPL(64, , , op, asm_op, bar, a, l)
+#define __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, bar, a, l) \
+ _ATOMIC_OP_IMPL(8, w, b, op, llsc_asm_op, lse_asm_op, pre, \
+ bar, a, l) \
+ _ATOMIC_OP_IMPL(16, w, h, op, llsc_asm_op, lse_asm_op, pre, \
+ bar, a, l) \
+ _ATOMIC_OP_IMPL(32, w, , op, llsc_asm_op, lse_asm_op, pre, \
+ bar, a, l) \
+ _ATOMIC_OP_IMPL(64, , , op, llsc_asm_op, lse_asm_op, pre, \
+ bar, a, l)
-#define _ATOMIC_OP(op, asm_op) \
- __ATOMIC_OP(op, asm_op, , , ) \
- __ATOMIC_OP(op, asm_op, acq_, a, ) \
- __ATOMIC_OP(op, asm_op, rel_, , l)
+#define _ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre) \
+ __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, , , ) \
+ __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, acq_, a, ) \
+ __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, rel_, , l)
-_ATOMIC_OP(add, add)
-_ATOMIC_OP(clear, bic)
-_ATOMIC_OP(set, orr)
-_ATOMIC_OP(subtract, sub)
+_ATOMIC_OP(add, add, add, )
+_ATOMIC_OP(clear, bic, clr, )
+_ATOMIC_OP(set, orr, set, )
+_ATOMIC_OP(subtract, add, add, val = -val)
#define _ATOMIC_CMPSET_PROTO(t, bar, flav) \
static __inline int \
@@ -133,6 +153,26 @@ _ATOMIC_CMPSET_PROTO(t, bar, _llsc) \
return (!res); \
} \
\
+_ATOMIC_CMPSET_PROTO(t, bar, _lse) \
+{ \
+ uint##t##_t oldval; \
+ int res; \
+ \
+ oldval = cmpval; \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "cas"#a#l#s" %"#w"1, %"#w"4, [%3]\n" \
+ "cmp %"#w"1, %"#w"2\n" \
+ "cset %w0, eq\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (res), "+&r" (cmpval) \
+ : "r" (oldval), "r" (p), "r" (newval) \
+ : "cc", "memory" \
+ ); \
+ \
+ return (res); \
+} \
+ \
_ATOMIC_CMPSET_PROTO(t, bar, ) \
{ \
return (atomic_cmpset_##bar##t##_llsc(p, cmpval, newval)); \
@@ -160,6 +200,27 @@ _ATOMIC_FCMPSET_PROTO(t, bar, _llsc) \
return (!res); \
} \
\
+_ATOMIC_FCMPSET_PROTO(t, bar, _lse) \
+{ \
+ uint##t##_t _cmpval, tmp; \
+ int res; \
+ \
+ _cmpval = tmp = *cmpval; \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "cas"#a#l#s" %"#w"1, %"#w"4, [%3]\n" \
+ "cmp %"#w"1, %"#w"2\n" \
+ "cset %w0, eq\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (res), "+&r" (tmp) \
+ : "r" (_cmpval), "r" (p), "r" (newval) \
+ : "cc", "memory" \
+ ); \
+ *cmpval = tmp; \
+ \
+ return (res); \
+} \
+ \
_ATOMIC_FCMPSET_PROTO(t, bar, ) \
{ \
return (atomic_fcmpset_##bar##t##_llsc(p, cmpval, newval)); \
@@ -182,7 +243,7 @@ atomic_fetchadd_##t##flav(volatile uint##t##_t *p, uin
#define _ATOMIC_FETCHADD_IMPL(t, w) \
_ATOMIC_FETCHADD_PROTO(t, _llsc) \
{ \
- uint##t##_t tmp, ret; \
+ uint##t##_t ret, tmp; \
int res; \
\
__asm __volatile( \
@@ -198,6 +259,22 @@ _ATOMIC_FETCHADD_PROTO(t, _llsc) \
return (ret); \
} \
\
+_ATOMIC_FETCHADD_PROTO(t, _lse) \
+{ \
+ uint##t##_t ret; \
+ \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "ldadd %"#w"2, %"#w"0, [%1]\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (ret) \
+ : "r" (p), "r" (val) \
+ : "memory" \
+ ); \
+ \
+ return (ret); \
+} \
+ \
_ATOMIC_FETCHADD_PROTO(t, ) \
{ \
return (atomic_fetchadd_##t##_llsc(p, val)); \
@@ -232,6 +309,22 @@ _ATOMIC_SWAP_PROTO(t, _llsc) \
return (ret); \
} \
\
+_ATOMIC_SWAP_PROTO(t, _lse) \
+{ \
+ uint##t##_t ret; \
+ \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "swp %"#w"2, %"#w"0, [%1]\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (ret) \
+ : "r" (p), "r" (val) \
+ : "memory" \
+ ); \
+ \
+ return (ret); \
+} \
+ \
_ATOMIC_SWAP_PROTO(t, ) \
{ \
return (atomic_swap_##t##_llsc(p, val)); \
@@ -254,6 +347,11 @@ _ATOMIC_READANDCLEAR_PROTO(t, _llsc) \
return (ret); \
} \
\
+_ATOMIC_READANDCLEAR_PROTO(t, _lse) \
+{ \
+ return (atomic_swap_##t##_lse(p, 0)); \
+} \
+ \
_ATOMIC_READANDCLEAR_PROTO(t, ) \
{ \
return (atomic_readandclear_##t##_llsc(p)); \
@@ -266,7 +364,7 @@ _ATOMIC_SWAP_IMPL(64, , xzr)
static __inline int \
atomic_testand##op##_##t##flav(volatile uint##t##_t *p, u_int val)
-#define _ATOMIC_TEST_OP_IMPL(t, w, op, asm_op) \
+#define _ATOMIC_TEST_OP_IMPL(t, w, op, llsc_asm_op, lse_asm_op) \
_ATOMIC_TEST_OP_PROTO(t, op, _llsc) \
{ \
uint##t##_t mask, old, tmp; \
@@ -275,7 +373,7 @@ _ATOMIC_TEST_OP_PROTO(t, op, _llsc) \
mask = 1u << (val & 0x1f); \
__asm __volatile( \
"1: ldxr %"#w"2, [%3]\n" \
- " "#asm_op" %"#w"0, %"#w"2, %"#w"4\n" \
+ " "#llsc_asm_op" %"#w"0, %"#w"2, %"#w"4\n" \
" stxr %w1, %"#w"0, [%3]\n" \
" cbnz %w1, 1b\n" \
: "=&r" (tmp), "=&r" (res), "=&r" (old) \
@@ -286,17 +384,34 @@ _ATOMIC_TEST_OP_PROTO(t, op, _llsc) \
return ((old & mask) != 0); \
} \
\
+_ATOMIC_TEST_OP_PROTO(t, op, _lse) \
+{ \
+ uint##t##_t mask, old; \
+ \
+ mask = 1u << (val & 0x1f); \
+ __asm __volatile( \
+ ".arch_extension lse\n" \
+ "ld"#lse_asm_op" %"#w"2, %"#w"0, [%1]\n" \
+ ".arch_extension nolse\n" \
+ : "=r" (old) \
+ : "r" (p), "r" (mask) \
+ : "memory" \
+ ); \
+ \
+ return ((old & mask) != 0); \
+} \
+ \
_ATOMIC_TEST_OP_PROTO(t, op, ) \
{ \
return (atomic_testand##op##_##t##_llsc(p, val)); \
}
-#define _ATOMIC_TEST_OP(op, asm_op) \
- _ATOMIC_TEST_OP_IMPL(32, w, op, asm_op) \
- _ATOMIC_TEST_OP_IMPL(64, , op, asm_op)
+#define _ATOMIC_TEST_OP(op, llsc_asm_op, lse_asm_op) \
+ _ATOMIC_TEST_OP_IMPL(32, w, op, llsc_asm_op, lse_asm_op) \
+ _ATOMIC_TEST_OP_IMPL(64, , op, llsc_asm_op, lse_asm_op)
-_ATOMIC_TEST_OP(clear, bic)
-_ATOMIC_TEST_OP(set, orr)
+_ATOMIC_TEST_OP(clear, bic, clr)
+_ATOMIC_TEST_OP(set, orr, set)
#define _ATOMIC_LOAD_ACQ_IMPL(t, w, s) \
static __inline uint##t##_t \
More information about the svn-src-all
mailing list