git: 7652c49af66d - stable/13 - Merge commit 4bb2416d42eb from llvm-project (by Jessica Clarke):

From: Jessica Clarke <jrtc27_at_FreeBSD.org>
Date: Mon, 11 Sep 2023 21:47:40 UTC
The branch stable/13 has been updated by jrtc27:

URL: https://cgit.FreeBSD.org/src/commit/?id=7652c49af66de526522793b6614acde515f9e5af

commit 7652c49af66de526522793b6614acde515f9e5af
Author:     Jessica Clarke <jrtc27@FreeBSD.org>
AuthorDate: 2023-09-04 08:33:27 +0000
Commit:     Jessica Clarke <jrtc27@FreeBSD.org>
CommitDate: 2023-09-11 21:47:28 +0000

    Merge commit 4bb2416d42eb from llvm-project (by Jessica Clarke):
    
      [builtins][AArch64] Implement _sync out-of-line atomics
    
      Whilst Clang does not use these, recent GCC does, and so on systems such
      as FreeBSD that wish to use compiler-rt as the system runtime library
      but also wish to support building programs with GCC these interfaces are
      needed.
    
      This is a light adaptation of the code committed to GCC by Sebastian Pop
      <spop@amazon.com>, relicensed with permission for use in compiler-rt.
    
      Fixes https://github.com/llvm/llvm-project/issues/63483
    
      Reviewed By: sebpop, MaskRay
    
      Differential Revision: https://reviews.llvm.org/D158536
    
    Reviewed by:    dim
    MFC after:      1 week
    Differential Revision:  https://reviews.freebsd.org/D41716
    
    (cherry picked from commit 8524dc53fd4c6b79d75b82cb82f3ac72fc25e85f)
---
 .../compiler-rt/lib/builtins/aarch64/lse.S         | 40 ++++++++++++++++++++--
 lib/libcompiler_rt/Makefile                        |  2 +-
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S b/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S
index 5dc0d5320b5a..1fe18f4a4681 100644
--- a/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S
@@ -7,7 +7,7 @@
 // Out-of-line LSE atomics helpers. Ported from libgcc library.
 // N = {1, 2, 4, 8}
 // M = {1, 2, 4, 8, 16}
-// ORDER = {'relax', 'acq', 'rel', 'acq_rel'}
+// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'}
 // Routines implemented:
 //
 //  iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
@@ -35,8 +35,8 @@ HIDDEN(___aarch64_have_lse_atomics)
 #endif
 
 // Generate mnemonics for
-// L_cas:                                 SIZE: 1,2,4,8,16 MODEL: 1,2,3,4
-// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8    MODEL: 1,2,3,4
+// L_cas:                                 SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5
+// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8    MODEL: 1,2,3,4,5
 
 #if SIZE == 1
 #define S b
@@ -64,24 +64,44 @@ HIDDEN(___aarch64_have_lse_atomics)
 #define L
 #define M 0x000000
 #define N 0x000000
+#define BARRIER
 #elif MODEL == 2
 #define SUFF _acq
 #define A a
 #define L
 #define M 0x400000
 #define N 0x800000
+#define BARRIER
 #elif MODEL == 3
 #define SUFF _rel
 #define A
 #define L l
 #define M 0x008000
 #define N 0x400000
+#define BARRIER
 #elif MODEL == 4
 #define SUFF _acq_rel
 #define A a
 #define L l
 #define M 0x408000
 #define N 0xc00000
+#define BARRIER
+#elif MODEL == 5
+#define SUFF _sync
+#ifdef L_swp
+// swp has _acq semantics.
+#define A a
+#define L
+#define M 0x400000
+#define N 0x800000
+#else
+// All other _sync functions have _seq semantics.
+#define A a
+#define L l
+#define M 0x408000
+#define N 0xc00000
+#endif
+#define BARRIER dmb ish
 #else
 #error
 #endif // MODEL
@@ -96,7 +116,12 @@ HIDDEN(___aarch64_have_lse_atomics)
 #endif
 
 #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
+#if MODEL == 5
+// Drop A for _sync functions.
+#define LDXR GLUE3(ld, xr, S)
+#else
 #define LDXR GLUE4(ld, A, xr, S)
+#endif
 #define STXR GLUE4(st, L, xr, S)
 
 // Define temporary registers.
@@ -136,9 +161,15 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
         STXR   w(tmp1), s(1), [x2]
         cbnz   w(tmp1), 0b
 1:
+        BARRIER
         ret
 #else
+#if MODEL == 5
+// Drop A for _sync functions.
+#define LDXP GLUE2(ld, xp)
+#else
 #define LDXP GLUE3(ld, A, xp)
+#endif
 #define STXP GLUE3(st, L, xp)
 #ifdef HAS_ASM_LSE
 #define CASP GLUE3(casp, A, L)  x0, x1, x2, x3, [x4]
@@ -159,6 +190,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
         STXP   w(tmp2), x2, x3, [x4]
         cbnz   w(tmp2), 0b
 1:
+        BARRIER
         ret
 #endif
 END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
@@ -180,6 +212,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
         LDXR   s(0), [x1]
         STXR   w(tmp1), s(tmp0), [x1]
         cbnz   w(tmp1), 0b
+        BARRIER
         ret
 END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
 #endif // L_swp
@@ -224,6 +257,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
         OP     s(tmp1), s(0), s(tmp0)
         STXR   w(tmp2), s(tmp1), [x1]
         cbnz   w(tmp2), 0b
+        BARRIER
         ret
 END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
 #endif // L_ldadd L_ldclr L_ldeor L_ldset
diff --git a/lib/libcompiler_rt/Makefile b/lib/libcompiler_rt/Makefile
index 1ba94bc5eec8..f894d68e6478 100644
--- a/lib/libcompiler_rt/Makefile
+++ b/lib/libcompiler_rt/Makefile
@@ -22,7 +22,7 @@ MK_WERROR.gcc=	no
 .if ${MACHINE_CPUARCH} == "aarch64"
 . for pat in cas swp ldadd ldclr ldeor ldset
 .  for size in 1 2 4 8 16
-.   for model in 1 2 3 4
+.   for model in 1 2 3 4 5
 .    if ${pat} == "cas" || ${size} != "16"
 # Use .for to define lse_name, to get a special loop-local variable
 .     for lse_name in outline_atomic_${pat}${size}_${model}.S