[Git][ghc/ghc][wip/tsan/codegen] 6 commits: nonmoving: Refactor update remembered set initialization

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Wed Nov 16 23:51:37 UTC 2022



Ben Gamari pushed to branch wip/tsan/codegen at Glasgow Haskell Compiler / GHC


Commits:
03ab2abe by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Refactor update remembered set initialization

This avoids a lock inversion between the storage manager mutex and
the stable pointer table mutex by not dropping the SM_MUTEX in
nonmovingCollect. This requires quite a bit of rejiggering but it
does seem like a better strategy.

- - - - -
5720bb58 by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Ensure that we aren't holding locks when closing them

TSAN complains about this sort of thing.

- - - - -
39e9e430 by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Make bitmap accesses atomic

This is a benign race on any sensible hard since these are byte
accesses. Nevertheless, atomic accesses are necessary to satisfy
TSAN.

- - - - -
35505cdf by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Fix benign race in update remembered set check

Relaxed load is fine here since we will take the lock before looking at
the list.

- - - - -
e0cbbbb9 by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Fix race in shortcutting

We must use an acquire load to read the info table pointer since if we
find an indirection we must be certain that we see the indirectee.

- - - - -
fc09360d by Ben Gamari at 2022-11-16T18:51:28-05:00
nonmoving: Make free list counter accesses atomic

Since these may race with the allocator(s).

- - - - -


8 changed files:

- rts/include/rts/storage/ClosureMacros.h
- rts/sm/GC.c
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingMark.c
- rts/sm/NonMovingMark.h
- rts/sm/NonMovingShortcut.c
- rts/sm/Storage.c


Changes:

=====================================
rts/include/rts/storage/ClosureMacros.h
=====================================
@@ -97,6 +97,12 @@ EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c)
     return INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
 }
 
+EXTERN_INLINE const StgInfoTable *get_itbl_acquire(const StgClosure *c);
+EXTERN_INLINE const StgInfoTable *get_itbl_acquire(const StgClosure *c)
+{
+    return INFO_PTR_TO_STRUCT(ACQUIRE_LOAD(&c->header.info));
+}
+
 EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c);
 EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c)
 {


=====================================
rts/sm/GC.c
=====================================
@@ -844,11 +844,9 @@ GarbageCollect (uint32_t collect_gen,
   // Flush the update remembered sets. See Note [Eager update remembered set
   // flushing] in NonMovingMark.c
   if (RtsFlags.GcFlags.useNonmoving) {
-      RELEASE_SM_LOCK;
       for (n = 0; n < getNumCapabilities(); n++) {
-          nonmovingAddUpdRemSetBlocks(&getCapability(n)->upd_rem_set.queue);
+          nonmovingAddUpdRemSetBlocks(&getCapability(n)->upd_rem_set);
       }
-      ACQUIRE_SM_LOCK;
   }
 
   // Mark and sweep the oldest generation.
@@ -869,8 +867,6 @@ GarbageCollect (uint32_t collect_gen,
       // old_weak_ptr_list should be empty.
       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
 
-      // we may need to take the lock to allocate mark queue blocks
-      RELEASE_SM_LOCK;
       // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
       // be kept alive because we'll use them in finalizeSchedulers(). Similarly
       // resurrected_threads are also going to be used in resurrectedThreads()
@@ -880,10 +876,9 @@ GarbageCollect (uint32_t collect_gen,
 #if !defined(THREADED_RTS)
       // In the non-threaded runtime this is the only time we push to the
       // upd_rem_set
-      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set);
 #endif
       nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
-      ACQUIRE_SM_LOCK;
   }
 
   // Update the max size of older generations after a major GC:


=====================================
rts/sm/NonMoving.c
=====================================
@@ -523,7 +523,7 @@ static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
 {
     // See Note [Live data accounting in nonmoving collector].
-    if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) {
+    if (RELAXED_LOAD(&nonmovingHeap.n_free) > NONMOVING_MAX_FREE) {
         bdescr *bd = Bdescr((StgPtr) seg);
         ACQUIRE_SM_LOCK;
         ASSERT(oldest_gen->n_blocks >= bd->blocks);
@@ -754,7 +754,7 @@ void nonmovingStop(void)
                    "waiting for nonmoving collector thread to terminate");
         ACQUIRE_LOCK(&concurrent_coll_finished_lock);
         waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
-        ACQUIRE_LOCK(&nonmoving_collection_mutex);
+        RELEASE_LOCK(&concurrent_coll_finished_lock);
     }
 #endif
 }
@@ -767,6 +767,9 @@ void nonmovingExit(void)
     nonmovingStop();
 
 #if defined(THREADED_RTS)
+    ACQUIRE_LOCK(&nonmoving_collection_mutex);
+    RELEASE_LOCK(&nonmoving_collection_mutex);
+
     closeMutex(&concurrent_coll_finished_lock);
     closeCondition(&concurrent_coll_finished);
     closeMutex(&nonmoving_collection_mutex);
@@ -900,7 +903,7 @@ static void nonmovingPrepareMark(void)
 static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list)
 {
     for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)w);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)w);
         // Do not mark finalizers and values here, those fields will be marked
         // in `nonmovingMarkDeadWeaks` (for dead weaks) or
         // `nonmovingTidyWeaks` (for live weaks)
@@ -919,8 +922,14 @@ static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_p
     // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we
     //   need to mark it.
     for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)w);
-        nonmovingMarkDeadWeak(mark_queue, w);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)w);
+
+        // Mark the value and finalizer since they will be needed regardless of
+        // whether we find the weak is live.
+        if (w->cfinalizers != &stg_NO_FINALIZER_closure) {
+            markQueuePushClosureGC(mark_queue, w->value);
+        }
+        markQueuePushClosureGC(mark_queue, w->finalizer);
     }
 }
 
@@ -962,7 +971,7 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 
     // Mark threads resurrected during moving heap scavenging
     for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)tso);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)tso);
     }
     trace(TRACE_nonmoving_gc, "Finished marking roots for nonmoving GC");
 


=====================================
rts/sm/NonMoving.h
=====================================
@@ -276,12 +276,12 @@ extern uint8_t nonmovingMarkEpoch;
 
 INLINE_HEADER void nonmovingSetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
 {
-    seg->bitmap[i] = nonmovingMarkEpoch;
+    RELAXED_STORE(&seg->bitmap[i], nonmovingMarkEpoch);
 }
 
 INLINE_HEADER uint8_t nonmovingGetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
 {
-    return seg->bitmap[i];
+    return RELAXED_LOAD(&seg->bitmap[i]);
 }
 
 INLINE_HEADER void nonmovingSetClosureMark(StgPtr p)


=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -27,6 +27,8 @@
 #include "sm/Storage.h"
 #include "CNF.h"
 
+static void nonmovingResetUpdRemSetQueue (MarkQueue *rset);
+static void nonmovingResetUpdRemSet (UpdRemSet *rset);
 static bool check_in_nonmoving_heap(StgClosure *p);
 static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
 static void trace_tso (MarkQueue *queue, StgTSO *tso);
@@ -261,17 +263,9 @@ static uint32_t markQueueLength(MarkQueue *q);
 #endif
 static void init_mark_queue_(MarkQueue *queue);
 
-/* Transfers the given capability's update-remembered set to the global
- * remembered set.
- *
- * Really the argument type should be UpdRemSet* but this would be rather
- * inconvenient without polymorphism.
- */
-void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+static void nonmovingAddUpdRemSetBlocks_(MarkQueue *rset)
 {
-    if (markQueueIsEmpty(rset)) return;
-
-    // find the tail of the queue
+    // find the tail of the remembered set mark queue
     bdescr *start = rset->blocks;
     bdescr *end = start;
     while (end->link != NULL)
@@ -282,14 +276,45 @@ void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
     end->link = upd_rem_set_block_list;
     upd_rem_set_block_list = start;
     RELEASE_LOCK(&upd_rem_set_lock);
+}
+
+/*
+ * Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ */
+static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
+{
+    if (markQueueIsEmpty(rset)) return;
 
-    // Reset remembered set
+    nonmovingAddUpdRemSetBlocks_(rset);
+    // Reset the state of the remembered set.
     ACQUIRE_SM_LOCK;
     init_mark_queue_(rset);
     rset->is_upd_rem_set = true;
     RELEASE_SM_LOCK;
 }
 
+/*
+ * Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ *
+ * Caller must hold SM_LOCK.
+ */
+void nonmovingAddUpdRemSetBlocks(UpdRemSet *rset)
+{
+    if (markQueueIsEmpty(&rset->queue)) return;
+
+    nonmovingAddUpdRemSetBlocks_(&rset->queue);
+    init_mark_queue_(&rset->queue);
+    rset->queue.is_upd_rem_set = true;
+}
+
 #if defined(THREADED_RTS)
 /* Called by capabilities to flush their update remembered sets when
  * synchronising with the non-moving collector as it transitions from mark to
@@ -301,7 +326,7 @@ void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
                "Capability %d flushing update remembered set: %d",
                cap->no, markQueueLength(&cap->upd_rem_set.queue));
     traceConcUpdRemSetFlush(cap);
-    nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
+    nonmovingAddUpdRemSetBlocks_lock(&cap->upd_rem_set.queue);
     atomic_inc(&upd_rem_set_flush_count, 1);
     signalCondition(&upd_rem_set_flushed_cond);
     // After this mutation will remain suspended until nonmovingFinishFlush
@@ -399,7 +424,7 @@ void nonmovingFinishFlush(Task *task)
 {
     // See Note [Unintentional marking in resurrectThreads]
     for (uint32_t i = 0; i < getNumCapabilities(); i++) {
-        reset_upd_rem_set(&getCapability(i)->upd_rem_set);
+        nonmovingResetUpdRemSet(&getCapability(i)->upd_rem_set);
     }
     // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
     // filled and we flushed them.
@@ -424,7 +449,8 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
     if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
         // Yes, this block is full.
         if (q->is_upd_rem_set) {
-            nonmovingAddUpdRemSetBlocks(q);
+            // Flush the block to the global update remembered set
+            nonmovingAddUpdRemSetBlocks_lock(q);
         } else {
             // allocate a fresh block.
             ACQUIRE_SM_LOCK;
@@ -780,7 +806,7 @@ void markQueuePushClosure (MarkQueue *q,
 /* TODO: Do we really never want to specify the origin here? */
 void markQueueAddRoot (MarkQueue* q, StgClosure** root)
 {
-    markQueuePushClosure(q, *root, NULL);
+    markQueuePushClosureGC(q, *root);
 }
 
 /* Push a closure to the mark queue without origin information */
@@ -908,18 +934,24 @@ void initMarkQueue (MarkQueue *queue)
 }
 
 /* Must hold sm_mutex. */
-void init_upd_rem_set (UpdRemSet *rset)
+void nonmovingInitUpdRemSet (UpdRemSet *rset)
 {
     init_mark_queue_(&rset->queue);
     rset->queue.is_upd_rem_set = true;
 }
 
-void reset_upd_rem_set (UpdRemSet *rset)
+static void nonmovingResetUpdRemSetQueue (MarkQueue *rset)
 {
     // UpdRemSets always have one block for the mark queue. This assertion is to
     // update this code if we change that.
-    ASSERT(rset->queue.blocks->link == NULL);
-    rset->queue.top->head = 0;
+    ASSERT(rset->is_upd_rem_set);
+    ASSERT(rset->blocks->link == NULL);
+    rset->top->head = 0;
+}
+
+void nonmovingResetUpdRemSet (UpdRemSet *rset)
+{
+    nonmovingResetUpdRemSetQueue(&rset->queue);
 }
 
 void freeMarkQueue (MarkQueue *queue)
@@ -1742,7 +1774,9 @@ nonmovingMark (MarkQueue *queue)
         }
         case NULL_ENTRY:
             // Perhaps the update remembered set has more to mark...
-            if (upd_rem_set_block_list) {
+            // N.B. This must be atomic since we have not yet taken
+            // upd_rem_set_lock.
+            if (RELAXED_LOAD(&upd_rem_set_block_list) != NULL) {
                 ACQUIRE_LOCK(&upd_rem_set_lock);
                 bdescr *old = queue->blocks;
                 queue->blocks = upd_rem_set_block_list;


=====================================
rts/sm/NonMovingMark.h
=====================================
@@ -137,8 +137,7 @@ extern bdescr *upd_rem_set_block_list;
 
 void nonmovingMarkInitUpdRemSet(void);
 
-void init_upd_rem_set(UpdRemSet *rset);
-void reset_upd_rem_set(UpdRemSet *rset);
+void nonmovingInitUpdRemSet(UpdRemSet *rset);
 void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
 void updateRemembSetPushThunk(Capability *cap, StgThunk *p);
 void updateRemembSetPushTSO(Capability *cap, StgTSO *tso);
@@ -165,7 +164,7 @@ void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_th
 bool nonmovingIsAlive(StgClosure *p);
 void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w);
 void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w);
-void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset);
+void nonmovingAddUpdRemSetBlocks(UpdRemSet *rset);
 
 void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent);
 void markQueuePushClosureGC(MarkQueue *q, StgClosure *p);


=====================================
rts/sm/NonMovingShortcut.c
=====================================
@@ -153,7 +153,8 @@ selectee_changed:
     // Selectee is a non-moving object, mark it.
     markQueuePushClosure(queue, selectee, NULL);
 
-    const StgInfoTable *selectee_info_tbl = get_itbl(selectee);
+    // This may synchronize with the release in updateWithIndirection.
+    const StgInfoTable *selectee_info_tbl = get_itbl_acquire(selectee);
     switch (selectee_info_tbl->type) {
         case WHITEHOLE: {
             // Probably a loop. Abort.


=====================================
rts/sm/Storage.c
=====================================
@@ -325,7 +325,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
     if (RtsFlags.GcFlags.useNonmoving) {
         nonmovingAddCapabilities(to);
         for (i = 0; i < to; ++i) {
-            init_upd_rem_set(&getCapability(i)->upd_rem_set);
+            nonmovingInitUpdRemSet(&getCapability(i)->upd_rem_set);
         }
     }
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c1ec5e7c4d00ce4008a79a25cad5d2cf85f35271...fc09360da6ba7c10661ece37eeffa2531295e77d

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c1ec5e7c4d00ce4008a79a25cad5d2cf85f35271...fc09360da6ba7c10661ece37eeffa2531295e77d
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221116/863790d9/attachment-0001.html>


More information about the ghc-commits mailing list