[Git][ghc/ghc][wip/tsan/no-barriers] 7 commits: compiler: Introduce MO_{ACQUIRE,RELEASE}_FENCE

Sven Tennie (@supersven) gitlab at gitlab.haskell.org
Sun Jun 11 10:44:20 UTC 2023



Sven Tennie pushed to branch wip/tsan/no-barriers at Glasgow Haskell Compiler / GHC


Commits:
245ef4b7 by Ben Gamari at 2023-06-11T10:43:44+00:00
compiler: Introduce MO_{ACQUIRE,RELEASE}_FENCE

- - - - -
29edbfce by Ben Gamari at 2023-06-11T10:43:59+00:00
compiler: Drop MO_WriteBarrier

rts: Drop write_barrier

This is no longer used.

- - - - -
4daba583 by Ben Gamari at 2023-06-11T10:43:59+00:00
rts: Drop load_store_barrier()

This is no longer used.

- - - - -
5b1a7fa5 by Ben Gamari at 2023-06-11T10:43:59+00:00
rts: Drop last instances of prim_{write,read}_barrier

- - - - -
335c0afe by Ben Gamari at 2023-06-11T10:43:59+00:00
rts: Eliminate remaining uses of load_load_barrier

- - - - -
d4d74e13 by Ben Gamari at 2023-06-11T10:43:59+00:00
compiler: Drop MO_ReadBarrier

- - - - -
c7795e1f by Ben Gamari at 2023-06-11T10:43:59+00:00
rts: Drop load_load_barrier

This is no longer used.

- - - - -


24 changed files:

- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/Wasm/FromCmm.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- rts/CloneStack.c
- rts/PrimOps.cmm
- rts/RaiseAsync.c
- rts/RtsSymbols.c
- rts/Sparks.c
- rts/TopHandler.c
- rts/include/Cmm.h
- rts/include/Stg.h
- rts/include/stg/SMP.h
- rts/sm/Evac.c
- rts/sm/GC.c
- rts/sm/GCAux.c
- rts/sm/Sanity.c
- testsuite/tests/rts/testwsdeque.c


Changes:

=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -670,7 +670,6 @@ data CallishMachOp
   | MO_SubIntC   Width
   | MO_U_Mul2    Width
 
-  | MO_ReadBarrier
   | MO_WriteBarrier
   | MO_Touch         -- Keep variables live (when using interior pointers)
 
@@ -701,6 +700,9 @@ data CallishMachOp
   | MO_BSwap Width
   | MO_BRev Width
 
+  | MO_AcquireFence
+  | MO_ReleaseFence
+
   -- | Atomic read-modify-write. Arguments are @[dest, n]@.
   | MO_AtomicRMW Width AtomicMachOp
   -- | Atomic read. Arguments are @[addr]@.


=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1117,7 +1117,8 @@ callishMachOps platform = listToUFM $
         ( "fabs32f", (MO_F32_Fabs,) ),
         ( "sqrt32f", (MO_F32_Sqrt,) ),
 
-        ( "read_barrier", (MO_ReadBarrier,)),
+        ( "acquire_fence", (MO_AcquireFence,)),
+        ( "release_fence", (MO_ReleaseFence,)),
         ( "write_barrier", (MO_WriteBarrier,)),
         ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
         ( "memset", memcpyLikeTweakArgs MO_Memset ),


=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1558,8 +1558,9 @@ genCCall target dest_regs arg_regs bid = do
         MO_U_Mul2     _w -> unsupported mop
 
         -- Memory Ordering
+        MO_AcquireFence     ->  return (unitOL DMBISH, Nothing)
+        MO_ReleaseFence     ->  return (unitOL DMBISH, Nothing)
         -- TODO DMBSY is probably *way* too much!
-        MO_ReadBarrier      ->  return (unitOL DMBSY, Nothing)
         MO_WriteBarrier     ->  return (unitOL DMBSY, Nothing)
         MO_Touch            ->  return (nilOL, Nothing) -- Keep variables live (when using interior pointers)
         -- Prefetch


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -136,6 +136,7 @@ regUsageOfInstr platform instr = case instr of
 
   -- 8. Synchronization Instructions -------------------------------------------
   DMBSY                    -> usage ([], [])
+  DMBISH                   -> usage ([], [])
 
   -- 9. Floating Point Instructions --------------------------------------------
   FCVT dst src             -> usage (regOp src, regOp dst)
@@ -276,6 +277,7 @@ patchRegsOfInstr instr env = case instr of
 
     -- 8. Synchronization Instructions -----------------------------------------
     DMBSY          -> DMBSY
+    DMBISH         -> DMBISH
 
     -- 9. Floating Point Instructions ------------------------------------------
     FCVT o1 o2     -> FCVT (patchOp o1) (patchOp o2)
@@ -645,6 +647,7 @@ data Instr
 
     -- 8. Synchronization Instructions -----------------------------------------
     | DMBSY
+    | DMBISH
     -- 9. Floating Point Instructions
     -- Float ConVerT
     | FCVT Operand Operand
@@ -724,6 +727,7 @@ instrCon i =
       BL{} -> "BL"
       BCOND{} -> "BCOND"
       DMBSY{} -> "DMBSY"
+      DMBISH{} -> "DMBISH"
       FCVT{} -> "FCVT"
       SCVTF{} -> "SCVTF"
       FCVTZS{} -> "FCVTZS"


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -530,6 +530,7 @@ pprInstr platform instr = case instr of
 
   -- 8. Synchronization Instructions -------------------------------------------
   DMBSY -> line $ text "\tdmb sy"
+  DMBISH -> line $ text "\tdmb ish"
   -- 9. Floating Point Instructions --------------------------------------------
   FCVT o1 o2 -> op2 (text "\tfcvt") o1 o2
   SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2


=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -1126,8 +1126,11 @@ genCCall :: ForeignTarget      -- function to call
          -> [CmmFormal]        -- where to put the result
          -> [CmmActual]        -- arguments (of mixed type)
          -> NatM InstrBlock
-genCCall (PrimTarget MO_ReadBarrier) _ _
+genCCall (PrimTarget MO_AcquireFence) _ _
  = return $ unitOL LWSYNC
+genCCall (PrimTarget MO_ReleaseFence) _ _
+ = return $ unitOL LWSYNC
+
 genCCall (PrimTarget MO_WriteBarrier) _ _
  = return $ unitOL LWSYNC
 
@@ -2094,8 +2097,9 @@ genCCall' config gcp target dest_regs args
                     MO_AddIntC {}    -> unsupported
                     MO_SubIntC {}    -> unsupported
                     MO_U_Mul2 {}     -> unsupported
-                    MO_ReadBarrier   -> unsupported
                     MO_WriteBarrier  -> unsupported
+                    MO_AcquireFence  -> unsupported
+                    MO_ReleaseFence  -> unsupported
                     MO_Touch         -> unsupported
                     MO_Prefetch_Data _ -> unsupported
                 unsupported = panic ("outOfLineCmmOp: " ++ show mop


=====================================
compiler/GHC/CmmToAsm/Wasm/FromCmm.hs
=====================================
@@ -1186,7 +1186,8 @@ lower_CallishMachOp lbl MO_F32_ExpM1 rs xs =
 lower_CallishMachOp lbl MO_F32_Fabs rs xs = lower_CMO_Un_Homo lbl "fabsf" rs xs
 lower_CallishMachOp lbl MO_F32_Sqrt rs xs = lower_CMO_Un_Homo lbl "sqrtf" rs xs
 lower_CallishMachOp lbl (MO_UF_Conv w0) rs xs = lower_MO_UF_Conv lbl w0 rs xs
-lower_CallishMachOp _ MO_ReadBarrier _ _ = pure $ WasmStatements WasmNop
+lower_CallishMachOp _ MO_AcquireFence _ _ = pure $ WasmStatements WasmNop
+lower_CallishMachOp _ MO_ReleaseFence _ _ = pure $ WasmStatements WasmNop
 lower_CallishMachOp _ MO_WriteBarrier _ _ = pure $ WasmStatements WasmNop
 lower_CallishMachOp _ MO_Touch _ _ = pure $ WasmStatements WasmNop
 lower_CallishMachOp _ (MO_Prefetch_Data {}) _ _ = pure $ WasmStatements WasmNop


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -2160,7 +2160,8 @@ genSimplePrim bid (MO_Memcpy align)    []      [dst,src,n]    = genMemCpy  bid a
 genSimplePrim bid (MO_Memmove align)   []      [dst,src,n]    = genMemMove bid align dst src n
 genSimplePrim bid (MO_Memcmp align)    [res]   [dst,src,n]    = genMemCmp  bid align res dst src n
 genSimplePrim bid (MO_Memset align)    []      [dst,c,n]      = genMemSet  bid align dst c n
-genSimplePrim _   MO_ReadBarrier       []      []             = return nilOL -- barriers compile to no code on x86/x86-64;
+genSimplePrim _   MO_AcquireFence      []      []             = return nilOL -- barriers compile to no code on x86/x86-64;
+genSimplePrim _   MO_ReleaseFence      []      []             = return nilOL -- we keep it this long in order to prevent earlier optimisations.
 genSimplePrim _   MO_WriteBarrier      []      []             = return nilOL -- we keep it this long in order to prevent earlier optimisations.
 genSimplePrim _   MO_Touch             []      [_]            = return nilOL
 genSimplePrim _   (MO_Prefetch_Data n) []      [src]          = genPrefetchData n src


=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -261,6 +261,11 @@ pprStmt platform stmt =
     CmmUnsafeForeignCall (PrimTarget MO_Touch) _results _args -> empty
     CmmUnsafeForeignCall (PrimTarget (MO_Prefetch_Data _)) _results _args -> empty
 
+    CmmUnsafeForeignCall (PrimTarget MO_ReleaseFence) [] [] ->
+        text "__atomic_thread_fence(__ATOMIC_RELEASE);"
+    CmmUnsafeForeignCall (PrimTarget MO_AcquireFence) [] [] ->
+        text "__atomic_thread_fence(__ATOMIC_ACQUIRE);"
+
     CmmUnsafeForeignCall target@(PrimTarget op) results args ->
         fn_call
       where
@@ -944,7 +949,8 @@ pprCallishMachOp_for_C mop
         MO_F32_ExpM1    -> text "expm1f"
         MO_F32_Sqrt     -> text "sqrtf"
         MO_F32_Fabs     -> text "fabsf"
-        MO_ReadBarrier  -> text "load_load_barrier"
+        MO_AcquireFence -> unsupported
+        MO_ReleaseFence -> unsupported
         MO_WriteBarrier -> text "write_barrier"
         MO_Memcpy _     -> text "__builtin_memcpy"
         MO_Memset _     -> text "__builtin_memset"


=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -191,8 +191,10 @@ genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
 
 -- Barriers need to be handled specially as they are implemented as LLVM
 -- intrinsic functions.
-genCall (PrimTarget MO_ReadBarrier) _ _ =
-    barrierUnless [ArchX86, ArchX86_64]
+genCall (PrimTarget MO_AcquireFence) _ _ = runStmtsDecls $
+    statement $ Fence False SyncAcquire
+genCall (PrimTarget MO_ReleaseFence) _ _ = runStmtsDecls $
+    statement $ Fence False SyncRelease
 
 genCall (PrimTarget MO_WriteBarrier) _ _ =
     barrierUnless [ArchX86, ArchX86_64]
@@ -1008,8 +1010,9 @@ cmmPrimOpFunctions mop = do
     -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
     -- appropriate case of genCall.
     MO_U_Mul2 {}     -> unsupported
-    MO_ReadBarrier   -> unsupported
     MO_WriteBarrier  -> unsupported
+    MO_ReleaseFence  -> unsupported
+    MO_AcquireFence  -> unsupported
     MO_Touch         -> unsupported
     MO_UF_Conv _     -> unsupported
 


=====================================
rts/CloneStack.c
=====================================
@@ -74,9 +74,7 @@ void sendCloneStackMessage(StgTSO *tso, HsStablePtr mvar) {
   msg = (MessageCloneStack *)allocate(srcCapability, sizeofW(MessageCloneStack));
   msg->tso = tso;
   msg->result = (StgMVar*)deRefStablePtr(mvar);
-  SET_HDR(msg, &stg_MSG_CLONE_STACK_info, CCS_SYSTEM);
-  // Ensure that writes constructing Message are committed before sending.
-  write_barrier();
+  SET_HDR_RELEASE(msg, &stg_MSG_CLONE_STACK_info, CCS_SYSTEM);
 
   sendMessage(srcCapability, tso->cap, (Message *)msg);
 }


=====================================
rts/PrimOps.cmm
=====================================
@@ -2505,8 +2505,8 @@ stg_unpackClosurezh ( P_ closure )
 {
     W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
     MAYBE_GC_P(stg_unpackClosurezh, closure);
-    info  = %GET_STD_INFO(UNTAG(closure));
-    prim_read_barrier;
+    info  = GET_INFO_ACQUIRE(UNTAG(closure));
+    info  = INFO_PTR_TO_INFO_TABLE(info);
 
     ptrs  = TO_W_(%INFO_PTRS(info));
     nptrs = TO_W_(%INFO_NPTRS(info));
@@ -2820,8 +2820,7 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
 stg_getApStackValzh ( P_ ap_stack, W_ offset )
 {
    W_ ap_stackinfo;
-   ap_stackinfo = %INFO_PTR(UNTAG(ap_stack));
-   prim_read_barrier;
+   ap_stackinfo = GET_INFO_ACQUIRE(UNTAG(ap_stack));
    if (ap_stackinfo == stg_AP_STACK_info) {
        return (1,StgAP_STACK_payload(ap_stack,offset));
    } else {


=====================================
rts/RaiseAsync.c
=====================================
@@ -238,7 +238,7 @@ throwToMsg (Capability *cap, MessageThrowTo *msg)
     goto check_target;
 
 retry:
-    write_barrier();
+    RELEASE_FENCE(); // TODO: is this necessary?
     debugTrace(DEBUG_sched, "throwTo: retrying...");
 
 check_target:
@@ -874,9 +874,10 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
                 ap->payload[i] = (StgClosure *)*sp++;
             }
 
-            write_barrier(); // XXX: Necessary?
             SET_HDR(ap,&stg_AP_STACK_info,
                     ((StgClosure *)frame)->header.prof.ccs /* ToDo */);
+            // N.B. This will be made visible by updateThunk below, which
+            // implies a release memory barrier.
             TICK_ALLOC_UP_THK(AP_STACK_sizeW(words),0);
 
             //IF_DEBUG(scheduler,


=====================================
rts/RtsSymbols.c
=====================================
@@ -910,8 +910,6 @@ extern char **environ;
       SymI_HasProto(hs_spt_keys)                                        \
       SymI_HasProto(hs_spt_key_count)                                   \
       SymI_HasProto(write_barrier)                                      \
-      SymI_HasProto(store_load_barrier)                                 \
-      SymI_HasProto(load_load_barrier)                                  \
       SymI_HasProto(cas)                                                \
       SymI_HasProto(_assertFail)                                        \
       SymI_HasProto(keepCAFs)                                           \


=====================================
rts/Sparks.c
=====================================
@@ -209,8 +209,7 @@ pruneSparkQueue (bool nonmovingMarkFinished, Capability *cap)
           cap->spark_stats.fizzled++;
           traceEventSparkFizzle(cap);
       } else {
-          info = RELAXED_LOAD(&spark->header.info);
-          load_load_barrier();
+          info = ACQUIRE_LOAD(&spark->header.info);
           if (IS_FORWARDING_PTR(info)) {
               tmp = (StgClosure*)UN_FORWARDING_PTR(info);
               /* if valuable work: shift inside the pool */


=====================================
rts/TopHandler.c
=====================================
@@ -32,8 +32,7 @@ StgTSO *getTopHandlerThread(void) {
         // topHandlerPtr was never initialised
         return NULL;
     }
-    const StgInfoTable *info = weak->header.info;
-    load_load_barrier();
+    const StgInfoTable *info = ACQUIRE_LOAD(&weak->header.info);
     if (info == &stg_WEAK_info) {
         StgClosure *key = ((StgWeak*)weak)->key;
 


=====================================
rts/include/Cmm.h
=====================================
@@ -681,17 +681,14 @@
 // For discussion of how these are used to fence heap object
 // accesses see Note [Heap memory barriers] in SMP.h.
 #if defined(THREADED_RTS)
-#define prim_read_barrier prim %read_barrier()
 #define prim_write_barrier prim %write_barrier()
 
 // See Note [ThreadSanitizer and fences]
-#define RELEASE_FENCE prim %write_barrier()
-#define ACQUIRE_FENCE prim %read_barrier()
+#define RELEASE_FENCE prim %release_fence()
+#define ACQUIRE_FENCE prim %acquire_fence()
 
 #else
 
-#define prim_read_barrier /* nothing */
-#define prim_write_barrier /* nothing */
 #define RELEASE_FENCE /* nothing */
 #define ACQUIRE_FENCE /* nothing */
 #endif /* THREADED_RTS */


=====================================
rts/include/Stg.h
=====================================
@@ -392,7 +392,7 @@ external prototype return neither of these types to workaround #11395.
 #endif
 
 #include "stg/Prim.h" /* ghc-prim fallbacks */
-#include "stg/SMP.h" // write_barrier() inline is required
+#include "stg/SMP.h"
 
 /* -----------------------------------------------------------------------------
    Moving Floats and Doubles


=====================================
rts/include/stg/SMP.h
=====================================
@@ -108,20 +108,14 @@ EXTERN_INLINE void busy_wait_nop(void);
 /*
  * Various kinds of memory barrier.
  *  write_barrier: prevents future stores occurring before preceding stores.
- *  store_load_barrier: prevents future loads occurring before preceding stores.
- *  load_load_barrier: prevents future loads occurring before earlier loads.
  *
  * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
  * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
  *
  * To check whether you got these right, try the test in
  *   testsuite/tests/rts/testwsdeque.c
- * This tests the work-stealing deque implementation, which relies on
- * properly working store_load and load_load memory barriers.
  */
 EXTERN_INLINE void write_barrier(void);
-EXTERN_INLINE void store_load_barrier(void);
-EXTERN_INLINE void load_load_barrier(void);
 
 /*
  * Note [Heap memory barriers]
@@ -354,7 +348,7 @@ EXTERN_INLINE void load_load_barrier(void);
  * Exchange the value pointed to by p with w and return the former. This
  * function is used to acquire a lock. An acquire memory barrier is sufficient
  * for a lock operation because corresponding unlock operation issues a
- * store-store barrier (write_barrier()) immediately before releasing the lock.
+ * store-store barrier (release-store) immediately before releasing the lock.
  */
 EXTERN_INLINE StgWord
 xchg(StgPtr p, StgWord w)
@@ -496,58 +490,6 @@ write_barrier(void) {
 #endif
 }
 
-EXTERN_INLINE void
-store_load_barrier(void) {
-#if defined(NOSMP)
-    return;
-#elif defined(i386_HOST_ARCH)
-    __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
-#elif defined(x86_64_HOST_ARCH)
-    __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
-#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
-    || defined(powerpc64le_HOST_ARCH)
-    __asm__ __volatile__ ("sync" : : : "memory");
-#elif defined(s390x_HOST_ARCH)
-    __asm__ __volatile__ ("bcr 14,0" : : : "memory");
-#elif defined(arm_HOST_ARCH)
-    __asm__ __volatile__ ("dmb" : : : "memory");
-#elif defined(aarch64_HOST_ARCH)
-    __asm__ __volatile__ ("dmb sy" : : : "memory");
-#elif defined(riscv64_HOST_ARCH)
-    __asm__ __volatile__ ("fence w,r" : : : "memory");
-#elif defined(loongarch64_HOST_ARCH)
-    __asm__ __volatile__ ("dbar 0" : : : "memory");
-#else
-#error memory barriers unimplemented on this architecture
-#endif
-}
-
-EXTERN_INLINE void
-load_load_barrier(void) {
-#if defined(NOSMP)
-    return;
-#elif defined(i386_HOST_ARCH)
-    __asm__ __volatile__ ("" : : : "memory");
-#elif defined(x86_64_HOST_ARCH)
-    __asm__ __volatile__ ("" : : : "memory");
-#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
-    || defined(powerpc64le_HOST_ARCH)
-    __asm__ __volatile__ ("lwsync" : : : "memory");
-#elif defined(s390x_HOST_ARCH)
-    __asm__ __volatile__ ("" : : : "memory");
-#elif defined(arm_HOST_ARCH)
-    __asm__ __volatile__ ("dmb" : : : "memory");
-#elif defined(aarch64_HOST_ARCH)
-    __asm__ __volatile__ ("dmb ld" : : : "memory");
-#elif defined(riscv64_HOST_ARCH)
-    __asm__ __volatile__ ("fence r,r" : : : "memory");
-#elif defined(loongarch64_HOST_ARCH)
-    __asm__ __volatile__ ("dbar 0" : : : "memory");
-#else
-#error memory barriers unimplemented on this architecture
-#endif
-}
-
 // Load a pointer from a memory location that might be being modified
 // concurrently.  This prevents the compiler from optimising away
 // multiple loads of the memory location, as it might otherwise do in
@@ -587,11 +529,7 @@ load_load_barrier(void) {
 #else /* !THREADED_RTS */
 
 EXTERN_INLINE void write_barrier(void);
-EXTERN_INLINE void store_load_barrier(void);
-EXTERN_INLINE void load_load_barrier(void);
 EXTERN_INLINE void write_barrier     () {} /* nothing */
-EXTERN_INLINE void store_load_barrier() {} /* nothing */
-EXTERN_INLINE void load_load_barrier () {} /* nothing */
 
 // Relaxed atomic operations
 #define RELAXED_LOAD(ptr) *ptr


=====================================
rts/sm/Evac.c
=====================================
@@ -1394,8 +1394,7 @@ selector_loop:
                   // the same selector thunk.
                   SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr);
                   OVERWRITING_CLOSURE((StgClosure*)p);
-                  SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info);
-                  write_barrier();
+                  SET_INFO_RELEASE((StgClosure*)p, &stg_WHITEHOLE_info);
 #if defined(PARALLEL_GC)
                   abort();  // LDV is incompatible with parallel GC
 #endif


=====================================
rts/sm/GC.c
=====================================
@@ -1508,7 +1508,6 @@ waitForGcThreads (Capability *cap, bool idle_cap[])
             if (i == me || idle_cap[i]) { continue; }
             if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
                 prodCapability(getCapability(i), cap->running_task);
-                write_barrier();
                 interruptCapability(getCapability(i));
             }
         }


=====================================
rts/sm/GCAux.c
=====================================
@@ -91,8 +91,8 @@ isAlive(StgClosure *p)
         return TAG_CLOSURE(tag,(StgClosure*)UN_FORWARDING_PTR(info));
     }
 
+    ACQUIRE_FENCE_ON(&q->header.info);
     info = INFO_PTR_TO_STRUCT(info);
-    load_load_barrier();
 
     switch (info->type) {
 


=====================================
rts/sm/Sanity.c
=====================================
@@ -355,8 +355,7 @@ checkClosure( const StgClosure* p )
 
     p = UNTAG_CONST_CLOSURE(p);
 
-    info = p->header.info;
-    load_load_barrier();
+    info = ACQUIRE_LOAD(&p->header.info);
 
     if (IS_FORWARDING_PTR(info)) {
         barf("checkClosure: found EVACUATED closure %d", info->type);
@@ -367,7 +366,6 @@ checkClosure( const StgClosure* p )
 #endif
 
     info = INFO_PTR_TO_STRUCT(info);
-    load_load_barrier();
 
     switch (info->type) {
 
@@ -772,8 +770,7 @@ checkSTACK (StgStack *stack)
 void
 checkTSO(StgTSO *tso)
 {
-    const StgInfoTable *info = (const StgInfoTable*) tso->_link->header.info;
-    load_load_barrier();
+    const StgInfoTable *info = (const StgInfoTable*) ACQUIRE_LOAD(&tso->_link)->header.info;
 
     ASSERT(tso->_link == END_TSO_QUEUE ||
            info == &stg_MVAR_TSO_QUEUE_info ||


=====================================
testsuite/tests/rts/testwsdeque.c
=====================================
@@ -34,47 +34,25 @@ void *
 myStealWSDeque_ (WSDeque *q, uint32_t n)
 {
     void * stolen;
-    StgWord b,t; 
     
 // Can't do this on someone else's spark pool:
 // ASSERT_WSDEQUE_INVARIANTS(q); 
     
     // NB. these loads must be ordered, otherwise there is a race
     // between steal and pop.
-    t = q->top;
-    load_load_barrier();
-    b = q->bottom;
+    StgWord t = ACQUIRE_LOAD(&q->top);
+    SEQ_CST_FENCE();
+    StgWord b = ACQUIRE_LOAD(&q->bottom);
     
-    // NB. b and t are unsigned; we need a signed value for the test
-    // below, because it is possible that t > b during a
-    // concurrent popWSQueue() operation.
-    if ((long)b - (long)t <= 0 ) { 
-        return NULL; /* already looks empty, abort */
+    void *result = NULL;
+    if (t < b) {
+        /* Non-empty queue */
+        result = RELAXED_LOAD(&q->elements[t % q->size]);
+        if (!cas_top(q, t, t+1)) {
+            return NULL;
+        }
     }
-    // NB. the load of q->bottom must be ordered before the load of
-    // q->elements[t & q-> moduloSize]. See comment "KG:..." below
-    // and Ticket #13633.
-    load_load_barrier();
-    /* now access array, see pushBottom() */
-    stolen = q->elements[t & q->moduloSize];
-    
-    /* now decide whether we have won */
-    if ( !(CASTOP(&(q->top),t,t+1)) ) {
-        /* lost the race, someone else has changed top in the meantime */
-        return NULL;
-    }  /* else: OK, top has been incremented by the cas call */
-
-    // debugBelch("stealWSDeque_: t=%d b=%d\n", t, b);
-
-// Can't do this on someone else's spark pool:
-// ASSERT_WSDEQUE_INVARIANTS(q); 
-
-    bufs[n] ++;
-    if (bufs[n] == BUF) { bufs[n] = 0; }
-    last_b[n][bufs[n]] = b;
-    last_t[n][bufs[n]] = t;
-    last_v[n][bufs[n]] = (StgWord)stolen;
-    return stolen;
+    return result;
 }
 
 void *



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f96a528ffe6242771ce4725b7979a750b6b38279...c7795e1f7b1329fc82d731a48cddd0b6f0900c6b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f96a528ffe6242771ce4725b7979a750b6b38279...c7795e1f7b1329fc82d731a48cddd0b6f0900c6b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230611/b58db897/attachment-0001.html>


More information about the ghc-commits mailing list