[commit: ghc] wip/T15449: rts: Ensure thunk updates are safe on non-TSO platforms (df9aa86)

git at git.haskell.org git at git.haskell.org
Tue Feb 12 07:00:59 UTC 2019


Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/T15449
Link       : http://ghc.haskell.org/trac/ghc/changeset/df9aa8651b054bcf869fd7d7d5dfe76c5c5b031f/ghc

>---------------------------------------------------------------

commit df9aa8651b054bcf869fd7d7d5dfe76c5c5b031f
Author: Ben Gamari <ben at smart-cactus.org>
Date:   Sun Feb 10 12:49:51 2019 -0500

    rts: Ensure thunk updates are safe on non-TSO platforms
    
    Previously we wouldn't bother to initialize the `indirectee` field of a thunk
    during construction. However, on architectures with weak memory ordering this
    can result in unsoundness with an expensive dual write barrier in
    `updateWithIndirection`.
    
    To see how this happens, consider a thunk X and two threads. Say thread 1
    evaluates X. When thread 1 finishes evaluation it will call
    `updateWithIndirection` to replace X with an indirection to the result, Y.
    To first order `updateWithIndirection` does the following,
    
        void updateWithIndirection (Capability *cap, StgClosure *p1, StgClosure *p2)
        {
            write_barrier();
            ((StgInd *)p1)->indirectee = p2;
            SET_INFO(p1, &stg_BLACKHOLE_info);
        }
    
    The write barrier ensures that the writes constructing the result Y are made
    visible to other cores before it is introduced as the indirectee. We then set
    the `indirectee` and then the info table pointer. However, we don't impose any
    ordering relationship on these two writes.
    This means on a weak memory model machine we could observe an
    indirection such that `p->info == stg_BLACKHOLE_info` yet without a valid value
    in `indirectee`.
    
    One solution to this would be to add another `write_barrier` between these two
    writes. However, write barriers are expensive.
    
    Instead of adding more write barriers we instead take care to initialize the
    `indirectee` field with a known value (a non-enterable closure,
    `stg_NO_INDIRECTEE_closure`) on architecture that don't have total store
    ordering. The indirection entry code can then check for this value and loop as
    necessary.
    
    This incurs two costs:
    
     * an additional write during thunk allocation. However, given that we have to
       touch the cache line anyways this should have negligible performance impact
       since the write goes straight to the store buffer.
    
     * an additional branch in the indirection closures' entry code. However,
       indirections are eventually short-cutting out of existence anyways, so we
       should be able to avoid this cost much of the time.


>---------------------------------------------------------------

df9aa8651b054bcf869fd7d7d5dfe76c5c5b031f
 compiler/cmm/CLabel.hs         |  2 ++
 compiler/codeGen/StgCmmHeap.hs |  7 +++++--
 includes/stg/SMP.h             |  7 +++++++
 rts/StgMiscClosures.cmm        | 43 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 40b4e70..ef3a567 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -54,6 +54,7 @@ module CLabel (
         mkBadAlignmentLabel,
         mkArrWords_infoLabel,
         mkSRTInfoLabel,
+        mkNO_INDIRECTEELabel,
 
         mkTopTickyCtrLabel,
         mkCAFBlackHoleInfoTableLabel,
@@ -511,6 +512,7 @@ mkSMAP_FROZEN_CLEAN_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_P
 mkSMAP_FROZEN_DIRTY_infoLabel   = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
 mkSMAP_DIRTY_infoLabel          = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
 mkBadAlignmentLabel             = CmmLabel rtsUnitId (fsLit "stg_badAlignment")      CmmEntry
+mkNO_INDIRECTEELabel            = CmmLabel rtsUnitId (fsLit "stg_NO_INDIRECTEE")     CmmEntry
 
 mkSRTInfoLabel :: Int -> CLabel
 mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 3b170eb..2ea0831 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -144,9 +144,12 @@ emitSetDynHdr base info_ptr ccs
        hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
   where
     header :: DynFlags -> [CmmExpr]
-    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
-        -- ToDo: Parallel stuff
+    header dflags = [info_ptr] ++ dynProfHdr dflags ccs ++ indirectee
         -- No ticky header
+    indirectee
+      -- TODO: Make this dependent upon TSO
+      | True = [CmmLit (CmmLabel mkNO_INDIRECTEELabel)]
+      | otherwise = []
 
 -- Store the item (expr,off) in base[off]
 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h
index 4020aef..9fc5389 100644
--- a/includes/stg/SMP.h
+++ b/includes/stg/SMP.h
@@ -18,6 +18,13 @@ void arm_atomic_spin_lock(void);
 void arm_atomic_spin_unlock(void);
 #endif
 
+/* Does the platform maintain ordering of stores by a single core? */
+#if !defined(THREADED_RTS) || defined(x86_64_HOST_ARCH) || defined(i386_HOST_ARCH)
+#define ARCH_TOTAL_STORE_ORDER 1
+#else
+#define ARCH_TOTAL_STORE_ORDER 0
+#endif
+
 #if defined(THREADED_RTS)
 
 /* ----------------------------------------------------------------------------
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index fdd9f15..44746c5 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -242,7 +242,14 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
     (P_ node)
 {
     TICK_ENT_DYN_IND(); /* tick */
+retry:
     node = UNTAG(StgInd_indirectee(node));
+#if !ARCH_TOTAL_STORE_ORDER
+    if (node == stg_NO_INDIRECTEE_info) {
+        // See Note [Write barrier on thunk updates]
+        goto retry;
+    }
+#endif
     TICK_ENT_VIA_NODE();
     jump %GET_ENTRY(node) (node);
 }
@@ -250,7 +257,14 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
     /* explicit stack */
 {
     TICK_ENT_DYN_IND(); /* tick */
+retry:
     R1 = UNTAG(StgInd_indirectee(R1));
+#if !ARCH_TOTAL_STORE_ORDER
+    if (R1 == stg_NO_INDIRECTEE_info) {
+        // See Note [Write barrier on thunk updates]
+        goto retry;
+    }
+#endif
     TICK_ENT_VIA_NODE();
     jump %GET_ENTRY(R1) [R1];
 }
@@ -260,7 +274,14 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND")
     (P_ node)
 {
     TICK_ENT_DYN_IND(); /* tick */
+retry:
     node = StgInd_indirectee(node);
+#if !ARCH_TOTAL_STORE_ORDER
+    if (node == stg_NO_INDIRECTEE_info) {
+        // See Note [Write barrier on thunk updates]
+        goto retry;
+    }
+#endif
     TICK_ENT_VIA_NODE();
     jump %ENTRY_CODE(Sp(0)) (node);
 }
@@ -269,7 +290,14 @@ INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC")
     /* explicit stack */
 {
     TICK_ENT_STATIC_IND(); /* tick */
+retry:
     R1 = UNTAG(StgInd_indirectee(R1));
+#if !ARCH_TOTAL_STORE_ORDER
+    if (node == stg_NO_INDIRECTEE_info) {
+        // See Note [Write barrier on thunk updates]
+        goto retry;
+    }
+#endif
     TICK_ENT_VIA_NODE();
     jump %GET_ENTRY(R1) [R1];
 }
@@ -297,7 +325,15 @@ retry:
         return (p);
     }
 
+#if !ARCH_TOTAL_STORE_ORDER
+    if (p == stg_NO_INDIRECTEE) {
+        // See Note [Write barrier on thunk updates]
+        goto retry;
+    }
+#endif
+
     info = StgHeader_info(p);
+
     if (info == stg_IND_info) {
         // This could happen, if e.g. we got a BLOCKING_QUEUE that has
         // just been replaced with an IND by another thread in
@@ -360,6 +396,13 @@ INFO_TABLE(stg_BLOCKING_QUEUE_CLEAN,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKIN
 INFO_TABLE(stg_BLOCKING_QUEUE_DIRTY,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKING_QUEUE")
 { foreign "C" barf("BLOCKING_QUEUE_DIRTY object (%p) entered!", R1) never returns; }
 
+// We initialize the SMP header indirectee field to this value to avoid needing
+// two write barriers during thunk updates.
+INFO_TABLE_CONSTR(stg_NO_INDIRECTEE,0,0,0,CONSTR_NOCAF,"NO_INDIRECTEE","NO_INDIRECTEE")
+{ foreign "C" barf("NO_INDIRECTEE object (%p) entered!", R1) never returns; }
+
+CLOSURE(stg_NO_INDIRECTEE_closure,stg_NO_INDIRECTEE);
+
 
 /* ----------------------------------------------------------------------------
    Whiteholes are used for the "locked" state of a closure (see lockClosure())



More information about the ghc-commits mailing list