[Git][ghc/ghc][master] 9 commits: compiler: Introduce MO_{ACQUIRE,RELEASE}_FENCE
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Thu Jun 29 01:09:59 UTC 2023
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
30525b00 by Ben Gamari at 2023-06-28T21:09:30-04:00
compiler: Introduce MO_{ACQUIRE,RELEASE}_FENCE
- - - - -
b787e259 by Ben Gamari at 2023-06-28T21:09:30-04:00
compiler: Drop MO_WriteBarrier
rts: Drop write_barrier
- - - - -
7550b4a5 by Ben Gamari at 2023-06-28T21:09:30-04:00
rts: Drop load_store_barrier()
This is no longer used.
- - - - -
d5f2875e by Ben Gamari at 2023-06-28T21:09:31-04:00
rts: Drop last instances of prim_{write,read}_barrier
- - - - -
965ac2ba by Ben Gamari at 2023-06-28T21:09:31-04:00
rts: Eliminate remaining uses of load_load_barrier
- - - - -
0fc5cb97 by Sven Tennie at 2023-06-28T21:09:31-04:00
compiler: Drop MO_ReadBarrier
- - - - -
7a7d326c by Ben Gamari at 2023-06-28T21:09:31-04:00
rts: Drop load_load_barrier
This is no longer used.
- - - - -
9f63da66 by Sven Tennie at 2023-06-28T21:09:31-04:00
Delete write_barrier function
- - - - -
bb0ed354 by Ben Gamari at 2023-06-28T21:09:31-04:00
rts: Make collectFreshWeakPtrs definition a prototype
x86-64/Darwin's toolchain inexplicably warns that collectFreshWeakPtrs
needs to be a prototype.
- - - - -
26 changed files:
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/Wasm/FromCmm.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/StgToCmm/Prim.hs
- rts/CloneStack.c
- rts/PrimOps.cmm
- rts/RaiseAsync.c
- rts/RtsSymbols.c
- rts/Sparks.c
- rts/TopHandler.c
- rts/include/Cmm.h
- rts/include/Stg.h
- rts/include/stg/SMP.h
- rts/sm/Evac.c
- rts/sm/GC.c
- rts/sm/GCAux.c
- rts/sm/MarkWeak.c
- rts/sm/Sanity.c
- testsuite/tests/rts/testwsdeque.c
Changes:
=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -670,8 +670,6 @@ data CallishMachOp
| MO_SubIntC Width
| MO_U_Mul2 Width
- | MO_ReadBarrier
- | MO_WriteBarrier
| MO_Touch -- Keep variables live (when using interior pointers)
-- Prefetch
@@ -701,6 +699,9 @@ data CallishMachOp
| MO_BSwap Width
| MO_BRev Width
+ | MO_AcquireFence
+ | MO_ReleaseFence
+
-- | Atomic read-modify-write. Arguments are @[dest, n]@.
| MO_AtomicRMW Width AtomicMachOp
-- | Atomic read. Arguments are @[addr]@.
=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1117,8 +1117,11 @@ callishMachOps platform = listToUFM $
( "fabs32f", (MO_F32_Fabs,) ),
( "sqrt32f", (MO_F32_Sqrt,) ),
- ( "read_barrier", (MO_ReadBarrier,)),
- ( "write_barrier", (MO_WriteBarrier,)),
+ -- TODO: It would be nice to rename the following operations to
+ -- acquire_fence and release_fence. Be aware that there'll be issues
+ -- with an overlapping token ('acquire') in the lexer.
+ ( "fence_acquire", (MO_AcquireFence,)),
+ ( "fence_release", (MO_ReleaseFence,)),
( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
( "memset", memcpyLikeTweakArgs MO_Memset ),
( "memmove", memcpyLikeTweakArgs MO_Memmove ),
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1558,9 +1558,8 @@ genCCall target dest_regs arg_regs bid = do
MO_U_Mul2 _w -> unsupported mop
-- Memory Ordering
- -- TODO DMBSY is probably *way* too much!
- MO_ReadBarrier -> return (unitOL DMBSY, Nothing)
- MO_WriteBarrier -> return (unitOL DMBSY, Nothing)
+ MO_AcquireFence -> return (unitOL DMBISH, Nothing)
+ MO_ReleaseFence -> return (unitOL DMBISH, Nothing)
MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers)
-- Prefetch
MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint.
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -136,6 +136,7 @@ regUsageOfInstr platform instr = case instr of
-- 8. Synchronization Instructions -------------------------------------------
DMBSY -> usage ([], [])
+ DMBISH -> usage ([], [])
-- 9. Floating Point Instructions --------------------------------------------
FCVT dst src -> usage (regOp src, regOp dst)
@@ -276,6 +277,7 @@ patchRegsOfInstr instr env = case instr of
-- 8. Synchronization Instructions -----------------------------------------
DMBSY -> DMBSY
+ DMBISH -> DMBISH
-- 9. Floating Point Instructions ------------------------------------------
FCVT o1 o2 -> FCVT (patchOp o1) (patchOp o2)
@@ -645,6 +647,7 @@ data Instr
-- 8. Synchronization Instructions -----------------------------------------
| DMBSY
+ | DMBISH
-- 9. Floating Point Instructions
-- Float ConVerT
| FCVT Operand Operand
@@ -724,6 +727,7 @@ instrCon i =
BL{} -> "BL"
BCOND{} -> "BCOND"
DMBSY{} -> "DMBSY"
+ DMBISH{} -> "DMBISH"
FCVT{} -> "FCVT"
SCVTF{} -> "SCVTF"
FCVTZS{} -> "FCVTZS"
=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -530,6 +530,7 @@ pprInstr platform instr = case instr of
-- 8. Synchronization Instructions -------------------------------------------
DMBSY -> line $ text "\tdmb sy"
+ DMBISH -> line $ text "\tdmb ish"
-- 9. Floating Point Instructions --------------------------------------------
FCVT o1 o2 -> op2 (text "\tfcvt") o1 o2
SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2
=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -1126,9 +1126,9 @@ genCCall :: ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock
-genCCall (PrimTarget MO_ReadBarrier) _ _
+genCCall (PrimTarget MO_AcquireFence) _ _
= return $ unitOL LWSYNC
-genCCall (PrimTarget MO_WriteBarrier) _ _
+genCCall (PrimTarget MO_ReleaseFence) _ _
= return $ unitOL LWSYNC
genCCall (PrimTarget MO_Touch) _ _
@@ -2094,8 +2094,8 @@ genCCall' config gcp target dest_regs args
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
MO_U_Mul2 {} -> unsupported
- MO_ReadBarrier -> unsupported
- MO_WriteBarrier -> unsupported
+ MO_AcquireFence -> unsupported
+ MO_ReleaseFence -> unsupported
MO_Touch -> unsupported
MO_Prefetch_Data _ -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
=====================================
compiler/GHC/CmmToAsm/Wasm/FromCmm.hs
=====================================
@@ -1186,8 +1186,8 @@ lower_CallishMachOp lbl MO_F32_ExpM1 rs xs =
lower_CallishMachOp lbl MO_F32_Fabs rs xs = lower_CMO_Un_Homo lbl "fabsf" rs xs
lower_CallishMachOp lbl MO_F32_Sqrt rs xs = lower_CMO_Un_Homo lbl "sqrtf" rs xs
lower_CallishMachOp lbl (MO_UF_Conv w0) rs xs = lower_MO_UF_Conv lbl w0 rs xs
-lower_CallishMachOp _ MO_ReadBarrier _ _ = pure $ WasmStatements WasmNop
-lower_CallishMachOp _ MO_WriteBarrier _ _ = pure $ WasmStatements WasmNop
+lower_CallishMachOp _ MO_AcquireFence _ _ = pure $ WasmStatements WasmNop
+lower_CallishMachOp _ MO_ReleaseFence _ _ = pure $ WasmStatements WasmNop
lower_CallishMachOp _ MO_Touch _ _ = pure $ WasmStatements WasmNop
lower_CallishMachOp _ (MO_Prefetch_Data {}) _ _ = pure $ WasmStatements WasmNop
lower_CallishMachOp lbl (MO_Memcpy {}) [] xs = do
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -2160,8 +2160,8 @@ genSimplePrim bid (MO_Memcpy align) [] [dst,src,n] = genMemCpy bid a
genSimplePrim bid (MO_Memmove align) [] [dst,src,n] = genMemMove bid align dst src n
genSimplePrim bid (MO_Memcmp align) [res] [dst,src,n] = genMemCmp bid align res dst src n
genSimplePrim bid (MO_Memset align) [] [dst,c,n] = genMemSet bid align dst c n
-genSimplePrim _ MO_ReadBarrier [] [] = return nilOL -- barriers compile to no code on x86/x86-64;
-genSimplePrim _ MO_WriteBarrier [] [] = return nilOL -- we keep it this long in order to prevent earlier optimisations.
+genSimplePrim _ MO_AcquireFence [] [] = return nilOL -- barriers compile to no code on x86/x86-64;
+genSimplePrim _ MO_ReleaseFence [] [] = return nilOL -- we keep it this long in order to prevent earlier optimisations.
genSimplePrim _ MO_Touch [] [_] = return nilOL
genSimplePrim _ (MO_Prefetch_Data n) [] [src] = genPrefetchData n src
genSimplePrim _ (MO_BSwap width) [dst] [src] = genByteSwap width dst src
=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -261,6 +261,11 @@ pprStmt platform stmt =
CmmUnsafeForeignCall (PrimTarget MO_Touch) _results _args -> empty
CmmUnsafeForeignCall (PrimTarget (MO_Prefetch_Data _)) _results _args -> empty
+ CmmUnsafeForeignCall (PrimTarget MO_ReleaseFence) [] [] ->
+ text "__atomic_thread_fence(__ATOMIC_RELEASE);"
+ CmmUnsafeForeignCall (PrimTarget MO_AcquireFence) [] [] ->
+ text "__atomic_thread_fence(__ATOMIC_ACQUIRE);"
+
CmmUnsafeForeignCall target@(PrimTarget op) results args ->
fn_call
where
@@ -944,8 +949,8 @@ pprCallishMachOp_for_C mop
MO_F32_ExpM1 -> text "expm1f"
MO_F32_Sqrt -> text "sqrtf"
MO_F32_Fabs -> text "fabsf"
- MO_ReadBarrier -> text "load_load_barrier"
- MO_WriteBarrier -> text "write_barrier"
+ MO_AcquireFence -> unsupported
+ MO_ReleaseFence -> unsupported
MO_Memcpy _ -> text "__builtin_memcpy"
MO_Memset _ -> text "__builtin_memset"
MO_Memmove _ -> text "__builtin_memmove"
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -171,34 +171,15 @@ getInstrinct fname retTy parTys =
fty = LMFunction funSig
in getInstrinct2 fname fty
--- | Memory barrier instruction for LLVM >= 3.0
-barrier :: LlvmM StmtData
-barrier = do
- let s = Fence False SyncSeqCst
- return (unitOL s, [])
-
--- | Insert a 'barrier', unless the target platform is in the provided list of
--- exceptions (where no code will be emitted instead).
-barrierUnless :: [Arch] -> LlvmM StmtData
-barrierUnless exs = do
- platform <- getPlatform
- if platformArch platform `elem` exs
- then return (nilOL, [])
- else barrier
-
-- | Foreign Calls
genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
-- Barriers need to be handled specially as they are implemented as LLVM
-- intrinsic functions.
-genCall (PrimTarget MO_ReadBarrier) _ _ =
- barrierUnless [ArchX86, ArchX86_64]
-
-genCall (PrimTarget MO_WriteBarrier) _ _ =
- barrierUnless [ArchX86, ArchX86_64]
-
-genCall (PrimTarget MO_Touch) _ _ =
- return (nilOL, [])
+genCall (PrimTarget MO_AcquireFence) _ _ = runStmtsDecls $
+ statement $ Fence False SyncAcquire
+genCall (PrimTarget MO_ReleaseFence) _ _ = runStmtsDecls $
+ statement $ Fence False SyncRelease
genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
dstV <- getCmmRegW (CmmLocal dst)
@@ -1008,8 +989,8 @@ cmmPrimOpFunctions mop = do
-- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
-- appropriate case of genCall.
MO_U_Mul2 {} -> unsupported
- MO_ReadBarrier -> unsupported
- MO_WriteBarrier -> unsupported
+ MO_ReleaseFence -> unsupported
+ MO_AcquireFence -> unsupported
MO_Touch -> unsupported
MO_UF_Conv _ -> unsupported
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -2177,7 +2177,7 @@ doWritePtrArrayOp addr idx val
-- This write barrier is to ensure that the heap writes to the object
-- referred to by val have happened before we write val into the array.
-- See #12469 for details.
- emitPrimCall [] MO_WriteBarrier []
+ emitPrimCall [] MO_ReleaseFence []
mkBasicIndexedWrite hdr_size addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
@@ -3048,7 +3048,7 @@ doWriteSmallPtrArrayOp addr idx val = do
mkBasicIndexedRead NaturallyAligned (smallArrPtrsHdrSize profile) Nothing ty tmp addr ty idx
whenUpdRemSetEnabled $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
- emitPrimCall [] MO_WriteBarrier [] -- #12469
+ emitPrimCall [] MO_ReleaseFence [] -- #12469
mkBasicIndexedWrite (smallArrPtrsHdrSize profile) addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
=====================================
rts/CloneStack.c
=====================================
@@ -74,9 +74,7 @@ void sendCloneStackMessage(StgTSO *tso, HsStablePtr mvar) {
msg = (MessageCloneStack *)allocate(srcCapability, sizeofW(MessageCloneStack));
msg->tso = tso;
msg->result = (StgMVar*)deRefStablePtr(mvar);
- SET_HDR(msg, &stg_MSG_CLONE_STACK_info, CCS_SYSTEM);
- // Ensure that writes constructing Message are committed before sending.
- write_barrier();
+ SET_HDR_RELEASE(msg, &stg_MSG_CLONE_STACK_info, CCS_SYSTEM);
sendMessage(srcCapability, tso->cap, (Message *)msg);
}
=====================================
rts/PrimOps.cmm
=====================================
@@ -2505,8 +2505,8 @@ stg_unpackClosurezh ( P_ closure )
{
W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
MAYBE_GC_P(stg_unpackClosurezh, closure);
- info = %GET_STD_INFO(UNTAG(closure));
- prim_read_barrier;
+ info = GET_INFO_ACQUIRE(UNTAG(closure));
+ info = %STD_INFO(info);
ptrs = TO_W_(%INFO_PTRS(info));
nptrs = TO_W_(%INFO_NPTRS(info));
@@ -2820,8 +2820,7 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
stg_getApStackValzh ( P_ ap_stack, W_ offset )
{
W_ ap_stackinfo;
- ap_stackinfo = %INFO_PTR(UNTAG(ap_stack));
- prim_read_barrier;
+ ap_stackinfo = GET_INFO_ACQUIRE(UNTAG(ap_stack));
if (ap_stackinfo == stg_AP_STACK_info) {
return (1,StgAP_STACK_payload(ap_stack,offset));
} else {
=====================================
rts/RaiseAsync.c
=====================================
@@ -238,7 +238,7 @@ throwToMsg (Capability *cap, MessageThrowTo *msg)
goto check_target;
retry:
- write_barrier();
+ RELEASE_FENCE();
debugTrace(DEBUG_sched, "throwTo: retrying...");
check_target:
@@ -874,9 +874,10 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
ap->payload[i] = (StgClosure *)*sp++;
}
- write_barrier(); // XXX: Necessary?
SET_HDR(ap,&stg_AP_STACK_info,
((StgClosure *)frame)->header.prof.ccs /* ToDo */);
+ // N.B. This will be made visible by updateThunk below, which
+ // implies a release memory barrier.
TICK_ALLOC_UP_THK(AP_STACK_sizeW(words),0);
//IF_DEBUG(scheduler,
=====================================
rts/RtsSymbols.c
=====================================
@@ -929,9 +929,6 @@ extern char **environ;
SymI_HasProto(hs_spt_remove) \
SymI_HasProto(hs_spt_keys) \
SymI_HasProto(hs_spt_key_count) \
- SymI_HasProto(write_barrier) \
- SymI_HasProto(store_load_barrier) \
- SymI_HasProto(load_load_barrier) \
SymI_HasProto(cas) \
SymI_HasProto(_assertFail) \
SymI_HasProto(keepCAFs) \
=====================================
rts/Sparks.c
=====================================
@@ -209,8 +209,7 @@ pruneSparkQueue (bool nonmovingMarkFinished, Capability *cap)
cap->spark_stats.fizzled++;
traceEventSparkFizzle(cap);
} else {
- info = RELAXED_LOAD(&spark->header.info);
- load_load_barrier();
+ info = ACQUIRE_LOAD(&spark->header.info);
if (IS_FORWARDING_PTR(info)) {
tmp = (StgClosure*)UN_FORWARDING_PTR(info);
/* if valuable work: shift inside the pool */
=====================================
rts/TopHandler.c
=====================================
@@ -32,8 +32,7 @@ StgTSO *getTopHandlerThread(void) {
// topHandlerPtr was never initialised
return NULL;
}
- const StgInfoTable *info = weak->header.info;
- load_load_barrier();
+ const StgInfoTable *info = ACQUIRE_LOAD(&weak->header.info);
if (info == &stg_WEAK_info) {
StgClosure *key = ((StgWeak*)weak)->key;
=====================================
rts/include/Cmm.h
=====================================
@@ -677,21 +677,18 @@
* explicit ordered accesses to make ordering apparent to TSAN.
*/
-// Memory barriers.
+// Memory barriers
// For discussion of how these are used to fence heap object
// accesses see Note [Heap memory barriers] in SMP.h.
#if defined(THREADED_RTS)
-#define prim_read_barrier prim %read_barrier()
#define prim_write_barrier prim %write_barrier()
// See Note [ThreadSanitizer and fences]
-#define RELEASE_FENCE prim %write_barrier()
-#define ACQUIRE_FENCE prim %read_barrier()
+#define RELEASE_FENCE prim %fence_release();
+#define ACQUIRE_FENCE prim %fence_acquire();
#else
-#define prim_read_barrier /* nothing */
-#define prim_write_barrier /* nothing */
#define RELEASE_FENCE /* nothing */
#define ACQUIRE_FENCE /* nothing */
#endif /* THREADED_RTS */
=====================================
rts/include/Stg.h
=====================================
@@ -392,7 +392,7 @@ external prototype return neither of these types to workaround #11395.
#endif
#include "stg/Prim.h" /* ghc-prim fallbacks */
-#include "stg/SMP.h" // write_barrier() inline is required
+#include "stg/SMP.h"
/* -----------------------------------------------------------------------------
Moving Floats and Doubles
=====================================
rts/include/stg/SMP.h
=====================================
@@ -44,11 +44,6 @@ void arm_atomic_spin_unlock(void);
------------------------------------------------------------------------- */
#if !IN_STG_CODE || IN_STGCRUN
-// We only want the barriers, e.g. write_barrier(), declared in .hc
-// files. Defining the other inline functions here causes type
-// mismatch errors from gcc, because the generated C code is assuming
-// that there are no prototypes in scope.
-
/*
* The atomic exchange operation: xchg(p,w) exchanges the value
* pointed to by p with the value w, returning the old value.
@@ -105,24 +100,6 @@ EXTERN_INLINE void busy_wait_nop(void);
#endif // !IN_STG_CODE
-/*
- * Various kinds of memory barrier.
- * write_barrier: prevents future stores occurring before preceding stores.
- * store_load_barrier: prevents future loads occurring before preceding stores.
- * load_load_barrier: prevents future loads occurring before earlier loads.
- *
- * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
- * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
- *
- * To check whether you got these right, try the test in
- * testsuite/tests/rts/testwsdeque.c
- * This tests the work-stealing deque implementation, which relies on
- * properly working store_load and load_load memory barriers.
- */
-EXTERN_INLINE void write_barrier(void);
-EXTERN_INLINE void store_load_barrier(void);
-EXTERN_INLINE void load_load_barrier(void);
-
/*
* Note [Heap memory barriers]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -354,7 +331,7 @@ EXTERN_INLINE void load_load_barrier(void);
* Exchange the value pointed to by p with w and return the former. This
* function is used to acquire a lock. An acquire memory barrier is sufficient
* for a lock operation because corresponding unlock operation issues a
- * store-store barrier (write_barrier()) immediately before releasing the lock.
+ * store-store barrier (release-store) immediately before releasing the lock.
*/
EXTERN_INLINE StgWord
xchg(StgPtr p, StgWord w)
@@ -463,91 +440,6 @@ busy_wait_nop(void)
#endif // !IN_STG_CODE
-/*
- * We need to tell both the compiler AND the CPU about the barriers.
- * It's no good preventing the CPU from reordering the operations if
- * the compiler has already done so - hence the "memory" restriction
- * on each of the barriers below.
- */
-EXTERN_INLINE void
-write_barrier(void) {
-#if defined(NOSMP)
- return;
-#elif defined(TSAN_ENABLED)
- // RELEASE is a bit stronger than the store-store barrier provided by
- // write_barrier, consequently we only use this case as a conservative
- // approximation when using ThreadSanitizer. See Note [ThreadSanitizer].
- __atomic_thread_fence(__ATOMIC_RELEASE);
-#elif defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
- __asm__ __volatile__ ("" : : : "memory");
-#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
- || defined(powerpc64le_HOST_ARCH)
- __asm__ __volatile__ ("lwsync" : : : "memory");
-#elif defined(s390x_HOST_ARCH)
- __asm__ __volatile__ ("" : : : "memory");
-#elif defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
- __asm__ __volatile__ ("dmb st" : : : "memory");
-#elif defined(riscv64_HOST_ARCH)
- __asm__ __volatile__ ("fence w,w" : : : "memory");
-#elif defined(loongarch64_HOST_ARCH)
- __asm__ __volatile__ ("dbar 0" : : : "memory");
-#else
-#error memory barriers unimplemented on this architecture
-#endif
-}
-
-EXTERN_INLINE void
-store_load_barrier(void) {
-#if defined(NOSMP)
- return;
-#elif defined(i386_HOST_ARCH)
- __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
-#elif defined(x86_64_HOST_ARCH)
- __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
-#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
- || defined(powerpc64le_HOST_ARCH)
- __asm__ __volatile__ ("sync" : : : "memory");
-#elif defined(s390x_HOST_ARCH)
- __asm__ __volatile__ ("bcr 14,0" : : : "memory");
-#elif defined(arm_HOST_ARCH)
- __asm__ __volatile__ ("dmb" : : : "memory");
-#elif defined(aarch64_HOST_ARCH)
- __asm__ __volatile__ ("dmb sy" : : : "memory");
-#elif defined(riscv64_HOST_ARCH)
- __asm__ __volatile__ ("fence w,r" : : : "memory");
-#elif defined(loongarch64_HOST_ARCH)
- __asm__ __volatile__ ("dbar 0" : : : "memory");
-#else
-#error memory barriers unimplemented on this architecture
-#endif
-}
-
-EXTERN_INLINE void
-load_load_barrier(void) {
-#if defined(NOSMP)
- return;
-#elif defined(i386_HOST_ARCH)
- __asm__ __volatile__ ("" : : : "memory");
-#elif defined(x86_64_HOST_ARCH)
- __asm__ __volatile__ ("" : : : "memory");
-#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
- || defined(powerpc64le_HOST_ARCH)
- __asm__ __volatile__ ("lwsync" : : : "memory");
-#elif defined(s390x_HOST_ARCH)
- __asm__ __volatile__ ("" : : : "memory");
-#elif defined(arm_HOST_ARCH)
- __asm__ __volatile__ ("dmb" : : : "memory");
-#elif defined(aarch64_HOST_ARCH)
- __asm__ __volatile__ ("dmb ld" : : : "memory");
-#elif defined(riscv64_HOST_ARCH)
- __asm__ __volatile__ ("fence r,r" : : : "memory");
-#elif defined(loongarch64_HOST_ARCH)
- __asm__ __volatile__ ("dbar 0" : : : "memory");
-#else
-#error memory barriers unimplemented on this architecture
-#endif
-}
-
// Load a pointer from a memory location that might be being modified
// concurrently. This prevents the compiler from optimising away
// multiple loads of the memory location, as it might otherwise do in
@@ -586,13 +478,6 @@ load_load_barrier(void) {
/* ---------------------------------------------------------------------- */
#else /* !THREADED_RTS */
-EXTERN_INLINE void write_barrier(void);
-EXTERN_INLINE void store_load_barrier(void);
-EXTERN_INLINE void load_load_barrier(void);
-EXTERN_INLINE void write_barrier (void) {} /* nothing */
-EXTERN_INLINE void store_load_barrier(void) {} /* nothing */
-EXTERN_INLINE void load_load_barrier (void) {} /* nothing */
-
// Relaxed atomic operations
#define RELAXED_LOAD(ptr) *ptr
#define RELAXED_STORE(ptr,val) *ptr = val
=====================================
rts/sm/Evac.c
=====================================
@@ -1394,8 +1394,7 @@ selector_loop:
// the same selector thunk.
SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr);
OVERWRITING_CLOSURE((StgClosure*)p);
- SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info);
- write_barrier();
+ SET_INFO_RELEASE((StgClosure*)p, &stg_WHITEHOLE_info);
#if defined(PARALLEL_GC)
abort(); // LDV is incompatible with parallel GC
#endif
=====================================
rts/sm/GC.c
=====================================
@@ -1508,7 +1508,6 @@ waitForGcThreads (Capability *cap, bool idle_cap[])
if (i == me || idle_cap[i]) { continue; }
if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
prodCapability(getCapability(i), cap->running_task);
- write_barrier();
interruptCapability(getCapability(i));
}
}
=====================================
rts/sm/GCAux.c
=====================================
@@ -91,8 +91,8 @@ isAlive(StgClosure *p)
return TAG_CLOSURE(tag,(StgClosure*)UN_FORWARDING_PTR(info));
}
+ info = ACQUIRE_LOAD(&q->header.info);
info = INFO_PTR_TO_STRUCT(info);
- load_load_barrier();
switch (info->type) {
=====================================
rts/sm/MarkWeak.c
=====================================
@@ -457,7 +457,7 @@ static void checkWeakPtrSanity(StgWeak *hd, StgWeak *tl)
* Traverse the capabilities' local new-weak-pointer lists at the beginning of
* GC and move them to the nursery's weak_ptr_list.
*/
-void collectFreshWeakPtrs()
+void collectFreshWeakPtrs( void )
{
uint32_t i;
// move recently allocated weak_ptr_list to the old list as well
=====================================
rts/sm/Sanity.c
=====================================
@@ -355,8 +355,7 @@ checkClosure( const StgClosure* p )
p = UNTAG_CONST_CLOSURE(p);
- info = p->header.info;
- load_load_barrier();
+ info = ACQUIRE_LOAD(&p->header.info);
if (IS_FORWARDING_PTR(info)) {
barf("checkClosure: found EVACUATED closure %d", info->type);
@@ -367,7 +366,6 @@ checkClosure( const StgClosure* p )
#endif
info = INFO_PTR_TO_STRUCT(info);
- load_load_barrier();
switch (info->type) {
@@ -772,8 +770,7 @@ checkSTACK (StgStack *stack)
void
checkTSO(StgTSO *tso)
{
- const StgInfoTable *info = (const StgInfoTable*) tso->_link->header.info;
- load_load_barrier();
+ const StgInfoTable *info = (const StgInfoTable*) ACQUIRE_LOAD(&tso->_link)->header.info;
ASSERT(tso->_link == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
=====================================
testsuite/tests/rts/testwsdeque.c
=====================================
@@ -34,47 +34,25 @@ void *
myStealWSDeque_ (WSDeque *q, uint32_t n)
{
void * stolen;
- StgWord b,t;
// Can't do this on someone else's spark pool:
// ASSERT_WSDEQUE_INVARIANTS(q);
// NB. these loads must be ordered, otherwise there is a race
// between steal and pop.
- t = q->top;
- load_load_barrier();
- b = q->bottom;
+ StgWord t = ACQUIRE_LOAD(&q->top);
+ SEQ_CST_FENCE();
+ StgWord b = ACQUIRE_LOAD(&q->bottom);
- // NB. b and t are unsigned; we need a signed value for the test
- // below, because it is possible that t > b during a
- // concurrent popWSQueue() operation.
- if ((long)b - (long)t <= 0 ) {
- return NULL; /* already looks empty, abort */
+ void *result = NULL;
+ if (t < b) {
+ /* Non-empty queue */
+ result = RELAXED_LOAD(&q->elements[t % q->size]);
+ if (!cas_top(q, t, t+1)) {
+ return NULL;
+ }
}
- // NB. the load of q->bottom must be ordered before the load of
- // q->elements[t & q-> moduloSize]. See comment "KG:..." below
- // and Ticket #13633.
- load_load_barrier();
- /* now access array, see pushBottom() */
- stolen = q->elements[t & q->moduloSize];
-
- /* now decide whether we have won */
- if ( !(CASTOP(&(q->top),t,t+1)) ) {
- /* lost the race, someone else has changed top in the meantime */
- return NULL;
- } /* else: OK, top has been incremented by the cas call */
-
- // debugBelch("stealWSDeque_: t=%d b=%d\n", t, b);
-
-// Can't do this on someone else's spark pool:
-// ASSERT_WSDEQUE_INVARIANTS(q);
-
- bufs[n] ++;
- if (bufs[n] == BUF) { bufs[n] = 0; }
- last_b[n][bufs[n]] = b;
- last_t[n][bufs[n]] = t;
- last_v[n][bufs[n]] = (StgWord)stolen;
- return stolen;
+ return result;
}
void *
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b3e1436f968c0c36a27ea0339ee2554970b329fe...bb0ed354b9b05c0774c1e9379823bceb785987ce
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b3e1436f968c0c36a27ea0339ee2554970b329fe...bb0ed354b9b05c0774c1e9379823bceb785987ce
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230628/e8e3a61c/attachment-0001.html>
More information about the ghc-commits
mailing list