[Git][ghc/ghc][wip/T22012] rts/RtsSymbols: Add AArch64 outline atomic operations

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Tue Aug 29 17:12:51 UTC 2023



Ben Gamari pushed to branch wip/T22012 at Glasgow Haskell Compiler / GHC


Commits:
d549417a by Ben Gamari at 2023-08-29T13:12:45-04:00
rts/RtsSymbols: Add AArch64 outline atomic operations

Fixes #22012 by adding the symbols described in
https://github.com/llvm/llvm-project/blob/main/llvm/docs/Atomics.rst#libcalls-atomic.

Ultimately this would be better addressed by #22011, but this is a first
step in the right direction and fixes the immediate symptom.

Generated via https://gitlab.haskell.org/ghc/ghc/-/snippets/5733

- - - - -


4 changed files:

- configure.ac
- + m4/fp_armv8_outline_atomics.m4
- + rts/ARMOutlineAtomicsSymbols.h
- rts/RtsSymbols.c


Changes:

=====================================
configure.ac
=====================================
@@ -1120,6 +1120,10 @@ AC_DEFINE_UNQUOTED([RTS_LINKER_USE_MMAP], [$RtsLinkerUseMmap],
 GHC_ADJUSTORS_METHOD([Target])
 AC_SUBST([UseLibffiForAdjustors])
 
+dnl ** ARM outline atomics
+dnl --------------------------------------------------------------
+FP_ARM_OUTLINE_ATOMICS
+
 dnl ** IPE data compression
 dnl --------------------------------------------------------------
 FP_FIND_LIBZSTD


=====================================
m4/fp_armv8_outline_atomics.m4
=====================================
@@ -0,0 +1,11 @@
+# FP_ARMV8_OUTLINE_ATOMICS
+# ----------
+# Sets HAVE_ARM_OUTLINE_ATOMICS depending upon whether the target compiler
+# provides ARMv8's outline atomics symbols. See #22012.
+AC_DEFUN([FP_ARM_OUTLINE_ATOMICS], [
+    AC_CHECK_FUNC(
+        [__aarch64_ldadd1_acq],
+        [AC_DEFINE([HAVE_ARM_OUTLINE_ATOMICS], [1], [Does the toolchain use ARMv8 outline atomics])]
+    )
+])
+


=====================================
rts/ARMOutlineAtomicsSymbols.h
=====================================
@@ -0,0 +1,259 @@
+/*
+ * Declarations and RTS symbol table entries for the outline atomics
+ * symbols provided by some ARMv8 compilers.
+ *
+ * See #22012.
+ */
+
+uint8_t __aarch64_cas1_relax(uint8_t, uint8_t, uint8_t*);
+uint8_t __aarch64_cas1_acq(uint8_t, uint8_t, uint8_t*);
+uint8_t __aarch64_cas1_rel(uint8_t, uint8_t, uint8_t*);
+uint8_t __aarch64_cas1_acq_rel(uint8_t, uint8_t, uint8_t*);
+uint8_t __aarch64_cas1_sync(uint8_t, uint8_t, uint8_t*);
+uint16_t __aarch64_cas2_relax(uint16_t, uint16_t, uint16_t*);
+uint16_t __aarch64_cas2_acq(uint16_t, uint16_t, uint16_t*);
+uint16_t __aarch64_cas2_rel(uint16_t, uint16_t, uint16_t*);
+uint16_t __aarch64_cas2_acq_rel(uint16_t, uint16_t, uint16_t*);
+uint16_t __aarch64_cas2_sync(uint16_t, uint16_t, uint16_t*);
+uint32_t __aarch64_cas4_relax(uint32_t, uint32_t, uint32_t*);
+uint32_t __aarch64_cas4_acq(uint32_t, uint32_t, uint32_t*);
+uint32_t __aarch64_cas4_rel(uint32_t, uint32_t, uint32_t*);
+uint32_t __aarch64_cas4_acq_rel(uint32_t, uint32_t, uint32_t*);
+uint32_t __aarch64_cas4_sync(uint32_t, uint32_t, uint32_t*);
+uint64_t __aarch64_cas8_relax(uint64_t, uint64_t, uint64_t*);
+uint64_t __aarch64_cas8_acq(uint64_t, uint64_t, uint64_t*);
+uint64_t __aarch64_cas8_rel(uint64_t, uint64_t, uint64_t*);
+uint64_t __aarch64_cas8_acq_rel(uint64_t, uint64_t, uint64_t*);
+uint64_t __aarch64_cas8_sync(uint64_t, uint64_t, uint64_t*);
+uint128_t __aarch64_cas16_relax(uint128_t, uint128_t, uint128_t*);
+uint128_t __aarch64_cas16_acq(uint128_t, uint128_t, uint128_t*);
+uint128_t __aarch64_cas16_rel(uint128_t, uint128_t, uint128_t*);
+uint128_t __aarch64_cas16_acq_rel(uint128_t, uint128_t, uint128_t*);
+uint128_t __aarch64_cas16_sync(uint128_t, uint128_t, uint128_t*);
+uint8_t __aarch64_swp1_relax(uint8_t, uint8_t*);
+uint8_t __aarch64_swp1_acq(uint8_t, uint8_t*);
+uint8_t __aarch64_swp1_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_swp1_acq_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_swp1_sync(uint8_t, uint8_t*);
+uint16_t __aarch64_swp2_relax(uint16_t, uint16_t*);
+uint16_t __aarch64_swp2_acq(uint16_t, uint16_t*);
+uint16_t __aarch64_swp2_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_swp2_acq_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_swp2_sync(uint16_t, uint16_t*);
+uint32_t __aarch64_swp4_relax(uint32_t, uint32_t*);
+uint32_t __aarch64_swp4_acq(uint32_t, uint32_t*);
+uint32_t __aarch64_swp4_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_swp4_acq_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_swp4_sync(uint32_t, uint32_t*);
+uint64_t __aarch64_swp8_relax(uint64_t, uint64_t*);
+uint64_t __aarch64_swp8_acq(uint64_t, uint64_t*);
+uint64_t __aarch64_swp8_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_swp8_acq_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_swp8_sync(uint64_t, uint64_t*);
+uint8_t __aarch64_ldadd1_relax(uint8_t, uint8_t*);
+uint8_t __aarch64_ldadd1_acq(uint8_t, uint8_t*);
+uint8_t __aarch64_ldadd1_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldadd1_acq_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldadd1_sync(uint8_t, uint8_t*);
+uint16_t __aarch64_ldadd2_relax(uint16_t, uint16_t*);
+uint16_t __aarch64_ldadd2_acq(uint16_t, uint16_t*);
+uint16_t __aarch64_ldadd2_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldadd2_acq_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldadd2_sync(uint16_t, uint16_t*);
+uint32_t __aarch64_ldadd4_relax(uint32_t, uint32_t*);
+uint32_t __aarch64_ldadd4_acq(uint32_t, uint32_t*);
+uint32_t __aarch64_ldadd4_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldadd4_acq_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldadd4_sync(uint32_t, uint32_t*);
+uint64_t __aarch64_ldadd8_relax(uint64_t, uint64_t*);
+uint64_t __aarch64_ldadd8_acq(uint64_t, uint64_t*);
+uint64_t __aarch64_ldadd8_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldadd8_acq_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldadd8_sync(uint64_t, uint64_t*);
+uint8_t __aarch64_ldclr1_relax(uint8_t, uint8_t*);
+uint8_t __aarch64_ldclr1_acq(uint8_t, uint8_t*);
+uint8_t __aarch64_ldclr1_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldclr1_acq_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldclr1_sync(uint8_t, uint8_t*);
+uint16_t __aarch64_ldclr2_relax(uint16_t, uint16_t*);
+uint16_t __aarch64_ldclr2_acq(uint16_t, uint16_t*);
+uint16_t __aarch64_ldclr2_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldclr2_acq_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldclr2_sync(uint16_t, uint16_t*);
+uint32_t __aarch64_ldclr4_relax(uint32_t, uint32_t*);
+uint32_t __aarch64_ldclr4_acq(uint32_t, uint32_t*);
+uint32_t __aarch64_ldclr4_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldclr4_acq_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldclr4_sync(uint32_t, uint32_t*);
+uint64_t __aarch64_ldclr8_relax(uint64_t, uint64_t*);
+uint64_t __aarch64_ldclr8_acq(uint64_t, uint64_t*);
+uint64_t __aarch64_ldclr8_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldclr8_acq_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldclr8_sync(uint64_t, uint64_t*);
+uint8_t __aarch64_ldeor1_relax(uint8_t, uint8_t*);
+uint8_t __aarch64_ldeor1_acq(uint8_t, uint8_t*);
+uint8_t __aarch64_ldeor1_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldeor1_acq_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldeor1_sync(uint8_t, uint8_t*);
+uint16_t __aarch64_ldeor2_relax(uint16_t, uint16_t*);
+uint16_t __aarch64_ldeor2_acq(uint16_t, uint16_t*);
+uint16_t __aarch64_ldeor2_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldeor2_acq_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldeor2_sync(uint16_t, uint16_t*);
+uint32_t __aarch64_ldeor4_relax(uint32_t, uint32_t*);
+uint32_t __aarch64_ldeor4_acq(uint32_t, uint32_t*);
+uint32_t __aarch64_ldeor4_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldeor4_acq_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldeor4_sync(uint32_t, uint32_t*);
+uint64_t __aarch64_ldeor8_relax(uint64_t, uint64_t*);
+uint64_t __aarch64_ldeor8_acq(uint64_t, uint64_t*);
+uint64_t __aarch64_ldeor8_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldeor8_acq_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldeor8_sync(uint64_t, uint64_t*);
+uint8_t __aarch64_ldset1_relax(uint8_t, uint8_t*);
+uint8_t __aarch64_ldset1_acq(uint8_t, uint8_t*);
+uint8_t __aarch64_ldset1_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldset1_acq_rel(uint8_t, uint8_t*);
+uint8_t __aarch64_ldset1_sync(uint8_t, uint8_t*);
+uint16_t __aarch64_ldset2_relax(uint16_t, uint16_t*);
+uint16_t __aarch64_ldset2_acq(uint16_t, uint16_t*);
+uint16_t __aarch64_ldset2_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldset2_acq_rel(uint16_t, uint16_t*);
+uint16_t __aarch64_ldset2_sync(uint16_t, uint16_t*);
+uint32_t __aarch64_ldset4_relax(uint32_t, uint32_t*);
+uint32_t __aarch64_ldset4_acq(uint32_t, uint32_t*);
+uint32_t __aarch64_ldset4_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldset4_acq_rel(uint32_t, uint32_t*);
+uint32_t __aarch64_ldset4_sync(uint32_t, uint32_t*);
+uint64_t __aarch64_ldset8_relax(uint64_t, uint64_t*);
+uint64_t __aarch64_ldset8_acq(uint64_t, uint64_t*);
+uint64_t __aarch64_ldset8_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldset8_acq_rel(uint64_t, uint64_t*);
+uint64_t __aarch64_ldset8_sync(uint64_t, uint64_t*);
+
+#define RTS_ARM_OUTLINE_ATOMICS_SYMBOLS \
+    SymE_HasProto(__aarch64_cas1_relax) \
+    SymE_HasProto(__aarch64_cas1_acq) \
+    SymE_HasProto(__aarch64_cas1_rel) \
+    SymE_HasProto(__aarch64_cas1_acq_rel) \
+    SymE_HasProto(__aarch64_cas1_sync) \
+    SymE_HasProto(__aarch64_cas2_relax) \
+    SymE_HasProto(__aarch64_cas2_acq) \
+    SymE_HasProto(__aarch64_cas2_rel) \
+    SymE_HasProto(__aarch64_cas2_acq_rel) \
+    SymE_HasProto(__aarch64_cas2_sync) \
+    SymE_HasProto(__aarch64_cas4_relax) \
+    SymE_HasProto(__aarch64_cas4_acq) \
+    SymE_HasProto(__aarch64_cas4_rel) \
+    SymE_HasProto(__aarch64_cas4_acq_rel) \
+    SymE_HasProto(__aarch64_cas4_sync) \
+    SymE_HasProto(__aarch64_cas8_relax) \
+    SymE_HasProto(__aarch64_cas8_acq) \
+    SymE_HasProto(__aarch64_cas8_rel) \
+    SymE_HasProto(__aarch64_cas8_acq_rel) \
+    SymE_HasProto(__aarch64_cas8_sync) \
+    SymE_HasProto(__aarch64_cas16_relax) \
+    SymE_HasProto(__aarch64_cas16_acq) \
+    SymE_HasProto(__aarch64_cas16_rel) \
+    SymE_HasProto(__aarch64_cas16_acq_rel) \
+    SymE_HasProto(__aarch64_cas16_sync) \
+    SymE_HasProto(__aarch64_swp1_relax) \
+    SymE_HasProto(__aarch64_swp1_acq) \
+    SymE_HasProto(__aarch64_swp1_rel) \
+    SymE_HasProto(__aarch64_swp1_acq_rel) \
+    SymE_HasProto(__aarch64_swp1_sync) \
+    SymE_HasProto(__aarch64_swp2_relax) \
+    SymE_HasProto(__aarch64_swp2_acq) \
+    SymE_HasProto(__aarch64_swp2_rel) \
+    SymE_HasProto(__aarch64_swp2_acq_rel) \
+    SymE_HasProto(__aarch64_swp2_sync) \
+    SymE_HasProto(__aarch64_swp4_relax) \
+    SymE_HasProto(__aarch64_swp4_acq) \
+    SymE_HasProto(__aarch64_swp4_rel) \
+    SymE_HasProto(__aarch64_swp4_acq_rel) \
+    SymE_HasProto(__aarch64_swp4_sync) \
+    SymE_HasProto(__aarch64_swp8_relax) \
+    SymE_HasProto(__aarch64_swp8_acq) \
+    SymE_HasProto(__aarch64_swp8_rel) \
+    SymE_HasProto(__aarch64_swp8_acq_rel) \
+    SymE_HasProto(__aarch64_swp8_sync) \
+    SymE_HasProto(__aarch64_ldadd1_relax) \
+    SymE_HasProto(__aarch64_ldadd1_acq) \
+    SymE_HasProto(__aarch64_ldadd1_rel) \
+    SymE_HasProto(__aarch64_ldadd1_acq_rel) \
+    SymE_HasProto(__aarch64_ldadd1_sync) \
+    SymE_HasProto(__aarch64_ldadd2_relax) \
+    SymE_HasProto(__aarch64_ldadd2_acq) \
+    SymE_HasProto(__aarch64_ldadd2_rel) \
+    SymE_HasProto(__aarch64_ldadd2_acq_rel) \
+    SymE_HasProto(__aarch64_ldadd2_sync) \
+    SymE_HasProto(__aarch64_ldadd4_relax) \
+    SymE_HasProto(__aarch64_ldadd4_acq) \
+    SymE_HasProto(__aarch64_ldadd4_rel) \
+    SymE_HasProto(__aarch64_ldadd4_acq_rel) \
+    SymE_HasProto(__aarch64_ldadd4_sync) \
+    SymE_HasProto(__aarch64_ldadd8_relax) \
+    SymE_HasProto(__aarch64_ldadd8_acq) \
+    SymE_HasProto(__aarch64_ldadd8_rel) \
+    SymE_HasProto(__aarch64_ldadd8_acq_rel) \
+    SymE_HasProto(__aarch64_ldadd8_sync) \
+    SymE_HasProto(__aarch64_ldclr1_relax) \
+    SymE_HasProto(__aarch64_ldclr1_acq) \
+    SymE_HasProto(__aarch64_ldclr1_rel) \
+    SymE_HasProto(__aarch64_ldclr1_acq_rel) \
+    SymE_HasProto(__aarch64_ldclr1_sync) \
+    SymE_HasProto(__aarch64_ldclr2_relax) \
+    SymE_HasProto(__aarch64_ldclr2_acq) \
+    SymE_HasProto(__aarch64_ldclr2_rel) \
+    SymE_HasProto(__aarch64_ldclr2_acq_rel) \
+    SymE_HasProto(__aarch64_ldclr2_sync) \
+    SymE_HasProto(__aarch64_ldclr4_relax) \
+    SymE_HasProto(__aarch64_ldclr4_acq) \
+    SymE_HasProto(__aarch64_ldclr4_rel) \
+    SymE_HasProto(__aarch64_ldclr4_acq_rel) \
+    SymE_HasProto(__aarch64_ldclr4_sync) \
+    SymE_HasProto(__aarch64_ldclr8_relax) \
+    SymE_HasProto(__aarch64_ldclr8_acq) \
+    SymE_HasProto(__aarch64_ldclr8_rel) \
+    SymE_HasProto(__aarch64_ldclr8_acq_rel) \
+    SymE_HasProto(__aarch64_ldclr8_sync) \
+    SymE_HasProto(__aarch64_ldeor1_relax) \
+    SymE_HasProto(__aarch64_ldeor1_acq) \
+    SymE_HasProto(__aarch64_ldeor1_rel) \
+    SymE_HasProto(__aarch64_ldeor1_acq_rel) \
+    SymE_HasProto(__aarch64_ldeor1_sync) \
+    SymE_HasProto(__aarch64_ldeor2_relax) \
+    SymE_HasProto(__aarch64_ldeor2_acq) \
+    SymE_HasProto(__aarch64_ldeor2_rel) \
+    SymE_HasProto(__aarch64_ldeor2_acq_rel) \
+    SymE_HasProto(__aarch64_ldeor2_sync) \
+    SymE_HasProto(__aarch64_ldeor4_relax) \
+    SymE_HasProto(__aarch64_ldeor4_acq) \
+    SymE_HasProto(__aarch64_ldeor4_rel) \
+    SymE_HasProto(__aarch64_ldeor4_acq_rel) \
+    SymE_HasProto(__aarch64_ldeor4_sync) \
+    SymE_HasProto(__aarch64_ldeor8_relax) \
+    SymE_HasProto(__aarch64_ldeor8_acq) \
+    SymE_HasProto(__aarch64_ldeor8_rel) \
+    SymE_HasProto(__aarch64_ldeor8_acq_rel) \
+    SymE_HasProto(__aarch64_ldeor8_sync) \
+    SymE_HasProto(__aarch64_ldset1_relax) \
+    SymE_HasProto(__aarch64_ldset1_acq) \
+    SymE_HasProto(__aarch64_ldset1_rel) \
+    SymE_HasProto(__aarch64_ldset1_acq_rel) \
+    SymE_HasProto(__aarch64_ldset1_sync) \
+    SymE_HasProto(__aarch64_ldset2_relax) \
+    SymE_HasProto(__aarch64_ldset2_acq) \
+    SymE_HasProto(__aarch64_ldset2_rel) \
+    SymE_HasProto(__aarch64_ldset2_acq_rel) \
+    SymE_HasProto(__aarch64_ldset2_sync) \
+    SymE_HasProto(__aarch64_ldset4_relax) \
+    SymE_HasProto(__aarch64_ldset4_acq) \
+    SymE_HasProto(__aarch64_ldset4_rel) \
+    SymE_HasProto(__aarch64_ldset4_acq_rel) \
+    SymE_HasProto(__aarch64_ldset4_sync) \
+    SymE_HasProto(__aarch64_ldset8_relax) \
+    SymE_HasProto(__aarch64_ldset8_acq) \
+    SymE_HasProto(__aarch64_ldset8_rel) \
+    SymE_HasProto(__aarch64_ldset8_acq_rel) \
+    SymE_HasProto(__aarch64_ldset8_sync)


=====================================
rts/RtsSymbols.c
=====================================
@@ -970,6 +970,13 @@ extern char **environ;
 #define RTS_LIBGCC_SYMBOLS
 #endif
 
+// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
+#if defined(HAVE_ARM_OUTLINE_ATOMICS)
+#include "ARMOutlineAtomicsSymbols.h"
+#else
+#define RTS_ARM_OUTLINE_ATOMICS_SYMBOLS
+#endif
+
 // Symbols defined by libc
 #define RTS_LIBC_SYMBOLS                               \
       SymI_HasProto_redirect(atexit, atexit, STRENGTH_STRONG, CODE_TYPE_CODE) /* See Note [Strong symbols] */ \
@@ -1017,6 +1024,7 @@ RTS_LIBC_SYMBOLS
 RTS_LIBGCC_SYMBOLS
 RTS_FINI_ARRAY_SYMBOLS
 RTS_LIBFFI_SYMBOLS
+RTS_ARM_OUTLINE_ATOMICS_SYMBOLS
 
 #undef SymI_NeedsProto
 #undef SymI_NeedsDataProto
@@ -1058,6 +1066,7 @@ RtsSymbolVal rtsSyms[] = {
       RTS_LIBGCC_SYMBOLS
       RTS_FINI_ARRAY_SYMBOLS
       RTS_LIBFFI_SYMBOLS
+      RTS_ARM_OUTLINE_ATOMICS_SYMBOLS
       SymI_HasDataProto(nonmoving_write_barrier_enabled)
 #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
       // dyld stub code contains references to this,



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d549417a32e8becb47e85137d14b83729f9db817

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d549417a32e8becb47e85137d14b83729f9db817
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230829/153b2a2c/attachment-0001.html>


More information about the ghc-commits mailing list