[Git][ghc/ghc][master] 13 commits: nonmoving: Fix race in marking of blackholes

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sat Dec 24 00:09:50 UTC 2022



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
18d2acd2 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Fix race in marking of blackholes

We must use an acquire-fence when marking to ensure that the indirectee
is visible.

- - - - -
11241efa by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Fix segment list races

- - - - -
602455c9 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Use atomic when looking at bd->gen

Since it may have been mutated by a moving GC.

- - - - -
9d63b160 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Eliminate race in bump_static_flag

To ensure that we don't race with a mutator entering a new CAF we take
the SM mutex before touching static_flag. The other option here would be
to instead modify newCAF to use a CAS but the present approach is a bit
safer.

- - - - -
26837523 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Ensure that mutable fields have acquire barrier

- - - - -
8093264a by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Fix races in collector status tracking

Mark a number of accesses to do with tracking of the status of the
concurrent collection thread as atomic. No interesting races here,
merely necessary to satisfy TSAN.

- - - - -
387d4fcc by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Make segment state updates atomic

- - - - -
543cae00 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Refactor update remembered set initialization

This avoids a lock inversion between the storage manager mutex and
the stable pointer table mutex by not dropping the SM_MUTEX in
nonmovingCollect. This requires quite a bit of rejiggering but it
does seem like a better strategy.

- - - - -
c9936718 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Ensure that we aren't holding locks when closing them

TSAN complains about this sort of thing.

- - - - -
0cd31f7d by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Make bitmap accesses atomic

This is a benign race on any sensible hard since these are byte
accesses. Nevertheless, atomic accesses are necessary to satisfy
TSAN.

- - - - -
d3fe110a by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Fix benign race in update remembered set check

Relaxed load is fine here since we will take the lock before looking at
the list.

- - - - -
ab6cf893 by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Fix race in shortcutting

We must use an acquire load to read the info table pointer since if we
find an indirection we must be certain that we see the indirectee.

- - - - -
36c9f23c by Ben Gamari at 2022-12-23T19:09:30-05:00
nonmoving: Make free list counter accesses atomic

Since these may race with the allocator(s).

- - - - -


9 changed files:

- rts/include/rts/storage/ClosureMacros.h
- rts/include/stg/SMP.h
- rts/sm/GC.c
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingMark.c
- rts/sm/NonMovingMark.h
- rts/sm/NonMovingShortcut.c
- rts/sm/Storage.c


Changes:

=====================================
rts/include/rts/storage/ClosureMacros.h
=====================================
@@ -97,6 +97,12 @@ EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c)
     return INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
 }
 
+EXTERN_INLINE const StgInfoTable *get_itbl_acquire(const StgClosure *c);
+EXTERN_INLINE const StgInfoTable *get_itbl_acquire(const StgClosure *c)
+{
+    return INFO_PTR_TO_STRUCT(ACQUIRE_LOAD(&c->header.info));
+}
+
 EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c);
 EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c)
 {


=====================================
rts/include/stg/SMP.h
=====================================
@@ -580,6 +580,7 @@ load_load_barrier(void) {
 // These are typically necessary only in very specific cases (e.g. WSDeque)
 // where the ordered operations aren't expressive enough to capture the desired
 // ordering.
+#define ACQUIRE_FENCE() __atomic_thread_fence(__ATOMIC_ACQUIRE)
 #define RELEASE_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE)
 #define SEQ_CST_FENCE() __atomic_thread_fence(__ATOMIC_SEQ_CST)
 
@@ -614,6 +615,7 @@ EXTERN_INLINE void load_load_barrier () {} /* nothing */
 #define SEQ_CST_RELAXED_CAS(p,o,n) cas(p,o,n)
 
 // Fences
+#define ACQUIRE_FENCE()
 #define RELEASE_FENCE()
 #define SEQ_CST_FENCE()
 


=====================================
rts/sm/GC.c
=====================================
@@ -355,7 +355,7 @@ GarbageCollect (uint32_t collect_gen,
   deadlock_detect_gc = deadlock_detect;
 
 #if defined(THREADED_RTS)
-  if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
+  if (major_gc && RtsFlags.GcFlags.useNonmoving && RELAXED_LOAD(&concurrent_coll_running)) {
       /* If there is already a concurrent major collection running then
        * there is no benefit to starting another.
        * TODO: Catch heap-size runaway.
@@ -842,11 +842,9 @@ GarbageCollect (uint32_t collect_gen,
   // Flush the update remembered sets. See Note [Eager update remembered set
   // flushing] in NonMovingMark.c
   if (RtsFlags.GcFlags.useNonmoving) {
-      RELEASE_SM_LOCK;
       for (n = 0; n < getNumCapabilities(); n++) {
-          nonmovingAddUpdRemSetBlocks(&getCapability(n)->upd_rem_set.queue);
+          nonmovingAddUpdRemSetBlocks(&getCapability(n)->upd_rem_set);
       }
-      ACQUIRE_SM_LOCK;
   }
 
   // Mark and sweep the oldest generation.
@@ -867,8 +865,6 @@ GarbageCollect (uint32_t collect_gen,
       // old_weak_ptr_list should be empty.
       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
 
-      // we may need to take the lock to allocate mark queue blocks
-      RELEASE_SM_LOCK;
       // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
       // be kept alive because we'll use them in finalizeSchedulers(). Similarly
       // resurrected_threads are also going to be used in resurrectedThreads()
@@ -878,10 +874,9 @@ GarbageCollect (uint32_t collect_gen,
 #if !defined(THREADED_RTS)
       // In the non-threaded runtime this is the only time we push to the
       // upd_rem_set
-      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set);
 #endif
       nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
-      ACQUIRE_SM_LOCK;
   }
 
   // Update the max size of older generations after a major GC:


=====================================
rts/sm/NonMoving.c
=====================================
@@ -85,6 +85,10 @@ Mutex concurrent_coll_finished_lock;
  *  - A set of *filled* segments, which contain no unallocated blocks and will
  *    be collected during the next major GC cycle
  *
+ * These sets are maintained as atomic singly-linked lists. This is not
+ * susceptible to the ABA problem since we are guaranteed to push a given
+ * segment to a list only once per garbage collection cycle.
+ *
  * Storage for segments is allocated using the block allocator using an aligned
  * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating
  * the segment header for a clone a simple matter of bit-masking (as
@@ -518,7 +522,7 @@ static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
 {
     // See Note [Live data accounting in nonmoving collector].
-    if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) {
+    if (RELAXED_LOAD(&nonmovingHeap.n_free) > NONMOVING_MAX_FREE) {
         bdescr *bd = Bdescr((StgPtr) seg);
         ACQUIRE_SM_LOCK;
         ASSERT(oldest_gen->n_blocks >= bd->blocks);
@@ -543,7 +547,7 @@ void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
 static struct NonmovingSegment *nonmovingPopFreeSegment(void)
 {
     while (true) {
-        struct NonmovingSegment *seg = nonmovingHeap.free;
+        struct NonmovingSegment *seg = ACQUIRE_LOAD(&nonmovingHeap.free);
         if (seg == NULL) {
             return NULL;
         }
@@ -641,13 +645,15 @@ static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int b
 static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca)
 {
     while (true) {
-        struct NonmovingSegment *seg = alloca->active;
+        // Synchronizes with CAS in nonmovingPushActiveSegment
+        struct NonmovingSegment *seg = ACQUIRE_LOAD(&alloca->active);
         if (seg == NULL) {
             return NULL;
         }
+        struct NonmovingSegment *next = RELAXED_LOAD(&seg->link);
         if (cas((StgVolatilePtr) &alloca->active,
                 (StgWord) seg,
-                (StgWord) seg->link) == (StgWord) seg) {
+                (StgWord) next) == (StgWord) seg) {
             return seg;
         }
     }
@@ -742,11 +748,12 @@ void nonmovingStop(void)
 {
     if (! RtsFlags.GcFlags.useNonmoving) return;
 #if defined(THREADED_RTS)
-    if (mark_thread) {
+    if (RELAXED_LOAD(&mark_thread)) {
         debugTrace(DEBUG_nonmoving_gc,
                    "waiting for nonmoving collector thread to terminate");
         ACQUIRE_LOCK(&concurrent_coll_finished_lock);
         waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+        RELEASE_LOCK(&concurrent_coll_finished_lock);
     }
 #endif
 }
@@ -759,6 +766,9 @@ void nonmovingExit(void)
     nonmovingStop();
 
 #if defined(THREADED_RTS)
+    ACQUIRE_LOCK(&nonmoving_collection_mutex);
+    RELEASE_LOCK(&nonmoving_collection_mutex);
+
     closeMutex(&concurrent_coll_finished_lock);
     closeCondition(&concurrent_coll_finished);
     closeMutex(&nonmoving_collection_mutex);
@@ -892,7 +902,7 @@ static void nonmovingPrepareMark(void)
 static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list)
 {
     for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)w);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)w);
         // Do not mark finalizers and values here, those fields will be marked
         // in `nonmovingMarkDeadWeaks` (for dead weaks) or
         // `nonmovingTidyWeaks` (for live weaks)
@@ -911,8 +921,14 @@ static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_p
     // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we
     //   need to mark it.
     for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)w);
-        nonmovingMarkDeadWeak(mark_queue, w);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)w);
+
+        // Mark the value and finalizer since they will be needed regardless of
+        // whether we find the weak is live.
+        if (w->cfinalizers != &stg_NO_FINALIZER_closure) {
+            markQueuePushClosureGC(mark_queue, w->value);
+        }
+        markQueuePushClosureGC(mark_queue, w->finalizer);
     }
 }
 
@@ -921,7 +937,7 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 #if defined(THREADED_RTS)
     // We can't start a new collection until the old one has finished
     // We also don't run in final GC
-    if (concurrent_coll_running || getSchedState() > SCHED_RUNNING) {
+    if (RELAXED_LOAD(&concurrent_coll_running) || getSchedState() > SCHED_RUNNING) {
         return;
     }
 #endif
@@ -953,7 +969,7 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 
     // Mark threads resurrected during moving heap scavenging
     for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
-        markQueuePushClosure_(mark_queue, (StgClosure*)tso);
+        markQueuePushClosureGC(mark_queue, (StgClosure*)tso);
     }
     trace(TRACE_nonmoving_gc, "Finished marking roots for nonmoving GC");
 
@@ -995,13 +1011,15 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
     // a major GC, because that's what we do when exiting scheduler (see
     // exitScheduler()).
     if (getSchedState() == SCHED_RUNNING) {
-        concurrent_coll_running = true;
+        RELAXED_STORE(&concurrent_coll_running, true);
         nonmoving_write_barrier_enabled = true;
         debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
-        if (createOSThread(&mark_thread, "non-moving mark thread",
+        OSThreadId thread;
+        if (createOSThread(&thread, "non-moving mark thread",
                            nonmovingConcurrentMark, mark_queue) != 0) {
             barf("nonmovingCollect: failed to spawn mark thread: %s", strerror(errno));
         }
+        RELAXED_STORE(&mark_thread, thread);
     } else {
         nonmovingConcurrentMark(mark_queue);
     }
@@ -1240,12 +1258,12 @@ finish:
     exitMyTask();
 
     // We are done...
-    mark_thread = 0;
+    RELAXED_STORE(&mark_thread, 0);
     stat_endNonmovingGc();
 
     // Signal that the concurrent collection is finished, allowing the next
     // non-moving collection to proceed
-    concurrent_coll_running = false;
+    RELAXED_STORE(&concurrent_coll_running, false);
     signalCondition(&concurrent_coll_finished);
     RELEASE_LOCK(&nonmoving_collection_mutex);
 #endif


=====================================
rts/sm/NonMoving.h
=====================================
@@ -44,7 +44,7 @@ enum NonmovingSegmentState {
     FREE, CURRENT, ACTIVE, FILLED, FILLED_SWEEPING
 };
 
-#define SET_SEGMENT_STATE(seg, st) (seg)->state = (st)
+#define SET_SEGMENT_STATE(seg, st) RELAXED_STORE(&(seg)->state, (st))
 #else
 #define SET_SEGMENT_STATE(_seg,_st)
 #endif
@@ -169,7 +169,7 @@ INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
         nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
     SET_SEGMENT_STATE(seg, ACTIVE);
     while (true) {
-        struct NonmovingSegment *current_active = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->active);
+        struct NonmovingSegment *current_active = RELAXED_LOAD(&alloc->active);
         seg->link = current_active;
         if (cas((StgVolatilePtr) &alloc->active, (StgWord) current_active, (StgWord) seg) == (StgWord) current_active) {
             break;
@@ -184,8 +184,8 @@ INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
         nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
     SET_SEGMENT_STATE(seg, FILLED);
     while (true) {
-        struct NonmovingSegment *current_filled = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->filled);
-        seg->link = current_filled;
+        struct NonmovingSegment *current_filled = (struct NonmovingSegment*) RELAXED_LOAD(&alloc->filled);
+        RELAXED_STORE(&seg->link, current_filled);
         if (cas((StgVolatilePtr) &alloc->filled, (StgWord) current_filled, (StgWord) seg) == (StgWord) current_filled) {
             break;
         }
@@ -276,12 +276,12 @@ extern uint8_t nonmovingMarkEpoch;
 
 INLINE_HEADER void nonmovingSetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
 {
-    seg->bitmap[i] = nonmovingMarkEpoch;
+    RELAXED_STORE(&seg->bitmap[i], nonmovingMarkEpoch);
 }
 
 INLINE_HEADER uint8_t nonmovingGetMark(struct NonmovingSegment *seg, nonmoving_block_idx i)
 {
-    return seg->bitmap[i];
+    return RELAXED_LOAD(&seg->bitmap[i]);
 }
 
 INLINE_HEADER void nonmovingSetClosureMark(StgPtr p)


=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -27,6 +27,8 @@
 #include "sm/Storage.h"
 #include "CNF.h"
 
+static void nonmovingResetUpdRemSetQueue (MarkQueue *rset);
+static void nonmovingResetUpdRemSet (UpdRemSet *rset);
 static bool check_in_nonmoving_heap(StgClosure *p);
 static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
 static void trace_tso (MarkQueue *queue, StgTSO *tso);
@@ -261,17 +263,9 @@ static uint32_t markQueueLength(MarkQueue *q);
 #endif
 static void init_mark_queue_(MarkQueue *queue);
 
-/* Transfers the given capability's update-remembered set to the global
- * remembered set.
- *
- * Really the argument type should be UpdRemSet* but this would be rather
- * inconvenient without polymorphism.
- */
-void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+static void nonmovingAddUpdRemSetBlocks_(MarkQueue *rset)
 {
-    if (markQueueIsEmpty(rset)) return;
-
-    // find the tail of the queue
+    // find the tail of the remembered set mark queue
     bdescr *start = rset->blocks;
     bdescr *end = start;
     while (end->link != NULL)
@@ -282,14 +276,45 @@ void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
     end->link = upd_rem_set_block_list;
     upd_rem_set_block_list = start;
     RELEASE_LOCK(&upd_rem_set_lock);
+}
+
+/*
+ * Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ */
+static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
+{
+    if (markQueueIsEmpty(rset)) return;
 
-    // Reset remembered set
+    nonmovingAddUpdRemSetBlocks_(rset);
+    // Reset the state of the remembered set.
     ACQUIRE_SM_LOCK;
     init_mark_queue_(rset);
     rset->is_upd_rem_set = true;
     RELEASE_SM_LOCK;
 }
 
+/*
+ * Transfers the given capability's update-remembered set to the global
+ * remembered set.
+ *
+ * Really the argument type should be UpdRemSet* but this would be rather
+ * inconvenient without polymorphism.
+ *
+ * Caller must hold SM_LOCK.
+ */
+void nonmovingAddUpdRemSetBlocks(UpdRemSet *rset)
+{
+    if (markQueueIsEmpty(&rset->queue)) return;
+
+    nonmovingAddUpdRemSetBlocks_(&rset->queue);
+    init_mark_queue_(&rset->queue);
+    rset->queue.is_upd_rem_set = true;
+}
+
 #if defined(THREADED_RTS)
 /* Called by capabilities to flush their update remembered sets when
  * synchronising with the non-moving collector as it transitions from mark to
@@ -301,7 +326,7 @@ void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
                "Capability %d flushing update remembered set: %d",
                cap->no, markQueueLength(&cap->upd_rem_set.queue));
     traceConcUpdRemSetFlush(cap);
-    nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
+    nonmovingAddUpdRemSetBlocks_lock(&cap->upd_rem_set.queue);
     atomic_inc(&upd_rem_set_flush_count, 1);
     signalCondition(&upd_rem_set_flushed_cond);
     // After this mutation will remain suspended until nonmovingFinishFlush
@@ -399,7 +424,7 @@ void nonmovingFinishFlush(Task *task)
 {
     // See Note [Unintentional marking in resurrectThreads]
     for (uint32_t i = 0; i < getNumCapabilities(); i++) {
-        reset_upd_rem_set(&getCapability(i)->upd_rem_set);
+        nonmovingResetUpdRemSet(&getCapability(i)->upd_rem_set);
     }
     // Also reset upd_rem_set_block_list in case some of the UpdRemSets were
     // filled and we flushed them.
@@ -424,7 +449,8 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
     if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
         // Yes, this block is full.
         if (q->is_upd_rem_set) {
-            nonmovingAddUpdRemSetBlocks(q);
+            // Flush the block to the global update remembered set
+            nonmovingAddUpdRemSetBlocks_lock(q);
         } else {
             // allocate a fresh block.
             ACQUIRE_SM_LOCK;
@@ -780,7 +806,7 @@ void markQueuePushClosure (MarkQueue *q,
 /* TODO: Do we really never want to specify the origin here? */
 void markQueueAddRoot (MarkQueue* q, StgClosure** root)
 {
-    markQueuePushClosure(q, *root, NULL);
+    markQueuePushClosureGC(q, *root);
 }
 
 /* Push a closure to the mark queue without origin information */
@@ -908,18 +934,24 @@ void initMarkQueue (MarkQueue *queue)
 }
 
 /* Must hold sm_mutex. */
-void init_upd_rem_set (UpdRemSet *rset)
+void nonmovingInitUpdRemSet (UpdRemSet *rset)
 {
     init_mark_queue_(&rset->queue);
     rset->queue.is_upd_rem_set = true;
 }
 
-void reset_upd_rem_set (UpdRemSet *rset)
+static void nonmovingResetUpdRemSetQueue (MarkQueue *rset)
 {
     // UpdRemSets always have one block for the mark queue. This assertion is to
     // update this code if we change that.
-    ASSERT(rset->queue.blocks->link == NULL);
-    rset->queue.top->head = 0;
+    ASSERT(rset->is_upd_rem_set);
+    ASSERT(rset->blocks->link == NULL);
+    rset->top->head = 0;
+}
+
+void nonmovingResetUpdRemSet (UpdRemSet *rset)
+{
+    nonmovingResetUpdRemSetQueue(&rset->queue);
 }
 
 void freeMarkQueue (MarkQueue *queue)
@@ -1187,15 +1219,17 @@ trace_stack (MarkQueue *queue, StgStack *stack)
 static bool
 bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
 {
-    while (1) {
-        StgWord link = (StgWord) *link_field;
-        StgWord new = (link & ~STATIC_BITS) | static_flag;
-        if ((link & STATIC_BITS) == static_flag)
-            return false;
-        else if (cas((StgVolatilePtr) link_field, link, new) == link) {
-            return true;
-        }
+    ACQUIRE_SM_LOCK;
+    bool needs_marking;
+    StgWord link = (StgWord) *link_field;
+    if ((link & STATIC_BITS) == static_flag) {
+        needs_marking = false;
+    } else {
+        *link_field = (StgClosure *) ((link & ~STATIC_BITS) | static_flag);
+        needs_marking = true;
     }
+    RELEASE_SM_LOCK;
+    return needs_marking;
 }
 
 /* N.B. p0 may be tagged */
@@ -1211,10 +1245,16 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
     StgWord tag = GET_CLOSURE_TAG(p);
     p = UNTAG_CLOSURE(p);
 
+    // Push an immutable field to the mark queue.
 #   define PUSH_FIELD(obj, field)                                \
         markQueuePushClosure(queue,                              \
                                 (StgClosure *) (obj)->field,     \
                                 (StgClosure **) &(obj)->field)
+    // Push a mutable field to the mark queue.
+#   define PUSH_FIELD_MUT(obj, field)                            \
+        markQueuePushClosure(queue,                              \
+                                (StgClosure *) ACQUIRE_LOAD(&(obj)->field),     \
+                                (StgClosure **) &(obj)->field)
 
     if (!HEAP_ALLOCED_GC(p)) {
         const StgInfoTable *info = get_itbl(p);
@@ -1281,7 +1321,10 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
 
     bd = Bdescr((StgPtr) p);
 
-    if (bd->gen != oldest_gen) {
+    // This must be a relaxed load since the object may be a large object,
+    // in which case evacuation by the moving collector will result in
+    // mutation.
+    if (RELAXED_LOAD(&bd->gen) != oldest_gen) {
         // Here we have an object living outside of the non-moving heap. While
         // we likely evacuated nearly everything to the nonmoving heap during
         // preparation there are nevertheless a few ways in which we might trace
@@ -1386,16 +1429,16 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
     case MVAR_CLEAN:
     case MVAR_DIRTY: {
         StgMVar *mvar = (StgMVar *) p;
-        PUSH_FIELD(mvar, head);
-        PUSH_FIELD(mvar, tail);
-        PUSH_FIELD(mvar, value);
+        PUSH_FIELD_MUT(mvar, head);
+        PUSH_FIELD_MUT(mvar, tail);
+        PUSH_FIELD_MUT(mvar, value);
         break;
     }
 
     case TVAR: {
         StgTVar *tvar = ((StgTVar *)p);
-        PUSH_FIELD(tvar, current_value);
-        PUSH_FIELD(tvar, first_watch_queue_entry);
+        PUSH_FIELD_MUT(tvar, current_value);
+        PUSH_FIELD_MUT(tvar, first_watch_queue_entry);
         break;
     }
 
@@ -1510,8 +1553,12 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
     }
 
     case BLACKHOLE: {
-        PUSH_FIELD((StgInd *) p, indirectee);
-        StgClosure *indirectee = ((StgInd*)p)->indirectee;
+        // Synchronizes with the release-store in updateWithIndirection.
+        // See Note [Heap memory barriers] in SMP.h.
+        StgInd *ind = (StgInd *) p;
+        ACQUIRE_FENCE();
+        StgClosure *indirectee = RELAXED_LOAD(&ind->indirectee);
+        markQueuePushClosure(queue, indirectee, &ind->indirectee);
         if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
             // do nothing
         } else {
@@ -1522,7 +1569,7 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
 
     case MUT_VAR_CLEAN:
     case MUT_VAR_DIRTY:
-        PUSH_FIELD((StgMutVar *)p, var);
+        PUSH_FIELD_MUT((StgMutVar *)p, var);
         break;
 
     case BLOCKING_QUEUE: {
@@ -1577,7 +1624,7 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
         StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs *) p;
         for (StgWord i = 0; i < arr->ptrs; i++) {
             StgClosure **field = &arr->payload[i];
-            markQueuePushClosure(queue, *field, field);
+            markQueuePushClosure(queue, ACQUIRE_LOAD(field), field);
         }
         break;
     }
@@ -1639,6 +1686,7 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
     }
 
 #   undef PUSH_FIELD
+#   undef PUSH_FIELD_MUT
 
     /* Set the mark bit: it's important that we do this only after we actually push
      * the object's pointers since in the case of marking stacks there may be a
@@ -1719,13 +1767,16 @@ nonmovingMark (MarkQueue *queue)
                 end = arr->ptrs;
             }
             for (StgWord i = start; i < end; i++) {
-                markQueuePushClosure_(queue, arr->payload[i]);
+                StgClosure *c = ACQUIRE_LOAD(&arr->payload[i]);
+                markQueuePushClosure_(queue, c);
             }
             break;
         }
         case NULL_ENTRY:
             // Perhaps the update remembered set has more to mark...
-            if (upd_rem_set_block_list) {
+            // N.B. This must be atomic since we have not yet taken
+            // upd_rem_set_lock.
+            if (RELAXED_LOAD(&upd_rem_set_block_list) != NULL) {
                 ACQUIRE_LOCK(&upd_rem_set_lock);
                 bdescr *old = queue->blocks;
                 queue->blocks = upd_rem_set_block_list;


=====================================
rts/sm/NonMovingMark.h
=====================================
@@ -137,8 +137,7 @@ extern bdescr *upd_rem_set_block_list;
 
 void nonmovingMarkInitUpdRemSet(void);
 
-void init_upd_rem_set(UpdRemSet *rset);
-void reset_upd_rem_set(UpdRemSet *rset);
+void nonmovingInitUpdRemSet(UpdRemSet *rset);
 void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
 void updateRemembSetPushThunk(Capability *cap, StgThunk *p);
 void updateRemembSetPushTSO(Capability *cap, StgTSO *tso);
@@ -165,7 +164,7 @@ void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_th
 bool nonmovingIsAlive(StgClosure *p);
 void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w);
 void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w);
-void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset);
+void nonmovingAddUpdRemSetBlocks(UpdRemSet *rset);
 
 void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent);
 void markQueuePushClosureGC(MarkQueue *q, StgClosure *p);


=====================================
rts/sm/NonMovingShortcut.c
=====================================
@@ -153,7 +153,8 @@ selectee_changed:
     // Selectee is a non-moving object, mark it.
     markQueuePushClosure(queue, selectee, NULL);
 
-    const StgInfoTable *selectee_info_tbl = get_itbl(selectee);
+    // This may synchronize with the release in updateWithIndirection.
+    const StgInfoTable *selectee_info_tbl = get_itbl_acquire(selectee);
     switch (selectee_info_tbl->type) {
         case WHITEHOLE: {
             // Probably a loop. Abort.


=====================================
rts/sm/Storage.c
=====================================
@@ -325,7 +325,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
     if (RtsFlags.GcFlags.useNonmoving) {
         nonmovingAddCapabilities(to);
         for (i = 0; i < to; ++i) {
-            init_upd_rem_set(&getCapability(i)->upd_rem_set);
+            nonmovingInitUpdRemSet(&getCapability(i)->upd_rem_set);
         }
     }
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/16a1bcd1d72c7a4567671f6a7f610df3fc477519...36c9f23c9dc52928b5d2971f38f6e0b15e38528e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/16a1bcd1d72c7a4567671f6a7f610df3fc477519...36c9f23c9dc52928b5d2971f38f6e0b15e38528e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221223/3859ec61/attachment-0001.html>


More information about the ghc-commits mailing list