[Git][ghc/ghc][wip/T22264] 5 commits: nonmoving: Move current segment array into Capability

Thu Dec 8 23:49:04 UTC 2022


Ben Gamari pushed to branch wip/T22264 at Glasgow Haskell Compiler / GHC


Commits:
15a16dd6 by Ben Gamari at 2022-12-06T15:55:05-05:00
nonmoving: Move current segment array into Capability

- - - - -
97de202f by Ben Gamari at 2022-12-06T15:55:05-05:00
rts: Fix typo

- - - - -
74eff8f3 by Ben Gamari at 2022-12-06T15:55:05-05:00
n_capabilities

- - - - -
b854c8fd by Ben Gamari at 2022-12-06T15:55:05-05:00
rts: Reenable assertion

- - - - -
3d7baa30 by Ben Gamari at 2022-12-08T17:28:44-05:00
nonmoving: Sync-phase mark budgeting

Here we significantly improve the bound on sync phase pause times by
imposing a limit on the amount of work that we can perform during the
sync. If we find that we have exceeded our marking budget then we allow
the mutators to resume, return to concurrent marking, and try
synchronizing again later.

- - - - -


14 changed files:

- rts/Capability.c
- rts/Capability.h
- rts/Schedule.c
- rts/Trace.c
- rts/Trace.h
- rts/sm/GC.c
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingCensus.c
- rts/sm/NonMovingCensus.h
- rts/sm/NonMovingMark.c
- rts/sm/NonMovingMark.h
- rts/sm/Sanity.c
- rts/sm/Storage.c


Changes:

=====================================
rts/Capability.c
=====================================
@@ -1257,6 +1257,9 @@ freeCapability (Capability *cap)
 {
     stgFree(cap->mut_lists);
     stgFree(cap->saved_mut_lists);
+    if (cap->current_segments) {
+        stgFree(cap->current_segments);
+    }
 #if defined(THREADED_RTS)
     freeSparkPool(cap->sparks);
 #endif


=====================================
rts/Capability.h
=====================================
@@ -89,6 +89,9 @@ struct Capability_ {
 
     // The update remembered set for the non-moving collector
     UpdRemSet upd_rem_set;
+    // Array of current segments for the non-moving collector.
+    // Of length NONMOVING_ALLOCA_CNT.
+    struct NonmovingSegment **current_segments;
 
     // block for allocating pinned objects into
     bdescr *pinned_object_block;


=====================================
rts/Schedule.c
=====================================
@@ -2329,7 +2329,7 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
             // must be done before calling moreCapabilities(), because that
             // will emit events about creating the new capabilities and adding
             // them to existing capsets.
-            tracingAddCapapilities(n_capabilities, new_n_capabilities);
+            tracingAddCapabilities(n_capabilities, new_n_capabilities);
 #endif
 
             // Resize the capabilities array
@@ -2344,7 +2344,8 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
 
     // update n_capabilities before things start running
     if (new_n_capabilities > n_capabilities) {
-        RELAXED_STORE(&n_capabilities, enabled_capabilities = new_n_capabilities);
+        RELAXED_STORE(&n_capabilities, new_n_capabilities);
+        RELAXED_STORE(&enabled_capabilities, new_n_capabilities);
     }
 
     // We're done: release the original Capabilities


=====================================
rts/Trace.c
=====================================
@@ -143,7 +143,7 @@ void flushTrace ()
     }
 }
 
-void tracingAddCapapilities (uint32_t from, uint32_t to)
+void tracingAddCapabilities (uint32_t from, uint32_t to)
 {
     if (eventlog_enabled) {
         moreCapEventBufs(from,to);


=====================================
rts/Trace.h
=====================================
@@ -28,7 +28,7 @@ void initTracing (void);
 void endTracing  (void);
 void freeTracing (void);
 void resetTracing (void);
-void tracingAddCapapilities (uint32_t from, uint32_t to);
+void tracingAddCapabilities (uint32_t from, uint32_t to);
 
 #endif /* TRACING */
 


=====================================
rts/sm/GC.c
=====================================
@@ -837,11 +837,13 @@ GarbageCollect (uint32_t collect_gen,
 
   // Flush the update remembered sets. See Note [Eager update remembered set
   // flushing] in NonMovingMark.c
+  ACQUIRE_SM_LOCK;
   if (RtsFlags.GcFlags.useNonmoving) {
       for (n = 0; n < getNumCapabilities(); n++) {
           nonmovingAddUpdRemSetBlocks(&getCapability(n)->upd_rem_set);
       }
   }
+  RELEASE_SM_LOCK;
 
   // Mark and sweep the oldest generation.
   // N.B. This can only happen after we've moved


=====================================
rts/sm/NonMoving.c
=====================================
@@ -253,6 +253,9 @@ Mutex concurrent_coll_finished_lock;
  *  - Note [Weak pointer processing and the non-moving GC] (MarkWeak.c) describes
  *    how weak pointers are handled when the non-moving GC is in use.
  *
+ *  - Note [Sync phase marking budget] describes how we avoid long mutator
+ *    pauses during the sync phase
+ *
  * [ueno 2016]:
  *   Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
  *   collector for functional programs on multicore processors. SIGPLAN Not. 51,
@@ -504,10 +507,44 @@ Mutex concurrent_coll_finished_lock;
  * remembered set during the preparatory GC. This allows us to safely skip the
  * non-moving write barrier without jeopardizing the snapshot invariant.
  *
+ *
+ * Note [Sync phase marking budget]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The non-moving collector is intended to provide reliably low collection
+ * latencies. These latencies are primarily due to two sources:
+ *
+ *  a. the preparatory moving collection at the beginning of the major GC cycle
+ *  b. the post-mark synchronization pause at the end
+ *
+ * While the cost of (a) is inherently bounded by the young generation size,
+ * (b) can in principle be unbounded since the mutator may hide large swathes
+ * of heap from the collector's concurrent mark phase via mutation. These will
+ * only become visible to the collector during the post-mark synchronization
+ * phase.
+ *
+ * Since we don't want to do unbounded marking work in the pause, we impose a
+ * limit (specifically, sync_phase_marking_budget) on the amount of work
+ * (namely, the number of marked closures) that we can do during the pause. If
+ * we deplete our marking budget during the pause then we allow the mutators to
+ * resume and return to concurrent marking (keeping the update remembered set
+ * write barrier enabled). After we have finished marking we will again
+ * attempt the post-mark synchronization.
+ *
+ * The choice of sync_phase_marking_budget was made empirically. On 2022
+ * hardware and a "typical" test program we tend to mark ~10^7 closures per
+ * second. Consequently, a sync_phase_marking_budget of 10^5 should produce
+ * ~10 ms pauses, which seems like a reasonable tradeoff.
+ *
+ * TODO: Perhaps sync_phase_marking_budget should be controllable via a
+ * command-line argument?
+ *
  */
 
 memcount nonmoving_live_words = 0;
 
+// See Note [Sync phase marking budget].
+MarkBudget sync_phase_marking_budget = 200000;
+
 #if defined(THREADED_RTS)
 static void* nonmovingConcurrentMark(void *mark_queue);
 #endif
@@ -674,10 +711,11 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
     // object and not moved) which is covered by allocator 9.
     ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
 
-    struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
+    unsigned int alloca_idx = log_block_size - NONMOVING_ALLOCA0;
+    struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
 
     // Allocate into current segment
-    struct NonmovingSegment *current = alloca->current[cap->no];
+    struct NonmovingSegment *current = cap->current_segments[alloca_idx];
     ASSERT(current); // current is never NULL
     void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
     ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
@@ -712,29 +750,12 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
         // make it current
         new_current->link = NULL;
         SET_SEGMENT_STATE(new_current, CURRENT);
-        alloca->current[cap->no] = new_current;
+        cap->current_segments[alloca_idx] = new_current;
     }
 
     return ret;
 }
 
-/* Allocate a nonmovingAllocator */
-static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
-{
-    size_t allocator_sz =
-        sizeof(struct NonmovingAllocator) +
-        sizeof(void*) * n_caps; // current segment pointer for each capability
-    struct NonmovingAllocator *alloc =
-        stgMallocBytes(allocator_sz, "nonmovingInit");
-    memset(alloc, 0, allocator_sz);
-    return alloc;
-}
-
-static void free_nonmoving_allocator(struct NonmovingAllocator *alloc)
-{
-    stgFree(alloc);
-}
-
 void nonmovingInit(void)
 {
     if (! RtsFlags.GcFlags.useNonmoving) return;
@@ -743,10 +764,7 @@ void nonmovingInit(void)
     initCondition(&concurrent_coll_finished);
     initMutex(&concurrent_coll_finished_lock);
 #endif
-    for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
-        nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(getNumCapabilities());
-    }
-    nonmovingMarkInitUpdRemSet();
+    nonmovingMarkInit();
 }
 
 // Stop any nonmoving collection in preparation for RTS shutdown.
@@ -779,44 +797,24 @@ void nonmovingExit(void)
     closeCondition(&concurrent_coll_finished);
     closeMutex(&nonmoving_collection_mutex);
 #endif
-
-    for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
-        free_nonmoving_allocator(nonmovingHeap.allocators[i]);
-    }
 }
 
-/*
- * Assumes that no garbage collector or mutator threads are running to safely
- * resize the nonmoving_allocators.
- */
-void nonmovingAddCapabilities(uint32_t new_n_caps)
+/* Initialize a new capability. Caller must hold SM_LOCK */
+void nonmovingInitCapability(Capability *cap)
 {
-    unsigned int old_n_caps = nonmovingHeap.n_caps;
-    struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
-
-    ACQUIRE_SM_LOCK;
+    // Initialize current segment array
+    struct NonmovingSegment **segs =
+        stgMallocBytes(sizeof(struct NonmovingSegment*) * NONMOVING_ALLOCA_CNT, "current segment array");
     for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
-        struct NonmovingAllocator *old = allocs[i];
-        allocs[i] = alloc_nonmoving_allocator(new_n_caps);
-
-        // Copy the old state
-        allocs[i]->filled = old->filled;
-        allocs[i]->active = old->active;
-        for (unsigned int j = 0; j < old_n_caps; j++) {
-            allocs[i]->current[j] = old->current[j];
-        }
-        stgFree(old);
-
-        // Initialize current segments for the new capabilities
-        for (unsigned int j = old_n_caps; j < new_n_caps; j++) {
-            allocs[i]->current[j] = nonmovingAllocSegment(getCapability(j)->node);
-            nonmovingInitSegment(allocs[i]->current[j], NONMOVING_ALLOCA0 + i);
-            SET_SEGMENT_STATE(allocs[i]->current[j], CURRENT);
-            allocs[i]->current[j]->link = NULL;
-        }
+        segs[i] = nonmovingAllocSegment(cap->node);
+        nonmovingInitSegment(segs[i], NONMOVING_ALLOCA0 + i);
+        SET_SEGMENT_STATE(segs[i], CURRENT);
     }
-    nonmovingHeap.n_caps = new_n_caps;
-    RELEASE_SM_LOCK;
+    cap->current_segments = segs;
+
+    // Initialize update remembered set
+    cap->upd_rem_set.queue.blocks = NULL;
+    nonmovingInitUpdRemSet(&cap->upd_rem_set);
 }
 
 void nonmovingClearBitmap(struct NonmovingSegment *seg)
@@ -838,11 +836,12 @@ static void nonmovingPrepareMark(void)
 
     nonmovingBumpEpoch();
     for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
-        struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+        struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
 
         // Update current segments' snapshot pointers
         for (uint32_t cap_n = 0; cap_n < nonmovingHeap.n_caps; ++cap_n) {
-            struct NonmovingSegment *seg = alloca->current[cap_n];
+            Capability *cap = getCapability(cap_n);
+            struct NonmovingSegment *seg = cap->current_segments[alloca_idx];
             nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
         }
 
@@ -925,7 +924,9 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 
     MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
     mark_queue->blocks = NULL;
+    ACQUIRE_SM_LOCK;
     initMarkQueue(mark_queue);
+    RELEASE_SM_LOCK;
     current_mark_queue = mark_queue;
 
     // Mark roots
@@ -1024,19 +1025,25 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 }
 
 /* Mark queue, threads, and weak pointers until no more weaks have been
- * resuscitated
+ * resuscitated. If *budget is non-zero then we will mark no more than
+ * Returns true if we there is no more marking work to be done, false if
+ * we exceeded our marking budget.
  */
-static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
+static bool nonmovingMarkThreadsWeaks(MarkBudget *budget, MarkQueue *mark_queue)
 {
     while (true) {
         // Propagate marks
-        nonmovingMark(mark_queue);
+        nonmovingMark(budget, mark_queue);
+        if (*budget == 0) {
+            return false;
+        }
 
         // Tidy threads and weaks
         nonmovingTidyThreads();
 
-        if (! nonmovingTidyWeaks(mark_queue))
-            return;
+        if (! nonmovingTidyWeaks(mark_queue)) {
+            return true;
+        }
     }
 }
 
@@ -1069,7 +1076,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     // Walk the list of filled segments that we collected during preparation,
     // updated their snapshot pointers and move them to the sweep list.
     for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
-        struct NonmovingSegment *filled = nonmovingHeap.allocators[alloca_idx]->saved_filled;
+        struct NonmovingSegment *filled = nonmovingHeap.allocators[alloca_idx].saved_filled;
         uint32_t n_filled = 0;
         if (filled) {
             struct NonmovingSegment *seg = filled;
@@ -1088,14 +1095,18 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
             seg->link = nonmovingHeap.sweep_list;
             nonmovingHeap.sweep_list = filled;
         }
-        nonmovingHeap.allocators[alloca_idx]->saved_filled = NULL;
+        nonmovingHeap.allocators[alloca_idx].saved_filled = NULL;
     }
 
     // Mark Weak#s
     nonmovingMarkWeakPtrList(mark_queue);
 
     // Do concurrent marking; most of the heap will get marked here.
-    nonmovingMarkThreadsWeaks(mark_queue);
+concurrent_marking:
+    {
+        MarkBudget budget = UNLIMITED_MARK_BUDGET;
+        nonmovingMarkThreadsWeaks(&budget, mark_queue);
+    }
 
 #if defined(THREADED_RTS)
     Task *task = newBoundTask();
@@ -1118,9 +1129,17 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     nonmovingBeginFlush(task);
 
     bool all_caps_syncd;
+    MarkBudget sync_marking_budget = sync_phase_marking_budget;
     do {
         all_caps_syncd = nonmovingWaitForFlush();
-        nonmovingMarkThreadsWeaks(mark_queue);
+        if (nonmovingMarkThreadsWeaks(&sync_marking_budget, mark_queue) == false) {
+            // We ran out of budget for marking. Abort sync.
+            // See Note [Sync phase marking budget].
+            traceConcSyncEnd();
+            stat_endNonmovingGcSync();
+            releaseAllCapabilities(n_capabilities, NULL, task);
+            goto concurrent_marking;
+        }
     } while (!all_caps_syncd);
 #endif
 
@@ -1131,7 +1150,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     // Do last marking of weak pointers
     while (true) {
         // Propagate marks
-        nonmovingMark(mark_queue);
+        nonmovingMarkUnlimitedBudget(mark_queue);
 
         if (!nonmovingTidyWeaks(mark_queue))
             break;
@@ -1140,7 +1159,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
 
     // Propagate marks
-    nonmovingMark(mark_queue);
+    nonmovingMarkUnlimitedBudget(mark_queue);
 
     // Now remove all dead objects from the mut_list to ensure that a younger
     // generation collection doesn't attempt to look at them after we've swept.
@@ -1292,10 +1311,12 @@ void assert_in_nonmoving_heap(StgPtr p)
     }
 
     for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
-        struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+        struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
+
         // Search current segments
         for (uint32_t cap_idx = 0; cap_idx < nonmovingHeap.n_caps; ++cap_idx) {
-            struct NonmovingSegment *seg = alloca->current[cap_idx];
+            Capability *cap = getCapability(cap_idx);
+            struct NonmovingSegment *seg = cap->current_segments[alloca_idx];
             if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
                 return;
             }
@@ -1354,33 +1375,16 @@ void nonmovingPrintSegment(struct NonmovingSegment *seg)
     debugBelch("End of segment\n\n");
 }
 
-void nonmovingPrintAllocator(struct NonmovingAllocator *alloc)
-{
-    debugBelch("Allocator at %p\n", (void*)alloc);
-    debugBelch("Filled segments:\n");
-    for (struct NonmovingSegment *seg = alloc->filled; seg != NULL; seg = seg->link) {
-        debugBelch("%p ", (void*)seg);
-    }
-    debugBelch("\nActive segments:\n");
-    for (struct NonmovingSegment *seg = alloc->active; seg != NULL; seg = seg->link) {
-        debugBelch("%p ", (void*)seg);
-    }
-    debugBelch("\nCurrent segments:\n");
-    for (uint32_t i = 0; i < nonmovingHeap.n_caps; ++i) {
-        debugBelch("%p ", alloc->current[i]);
-    }
-    debugBelch("\n");
-}
-
 void locate_object(P_ obj)
 {
     // Search allocators
     for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
-        struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
-        for (uint32_t cap = 0; cap < nonmovingHeap.n_caps; ++cap) {
-            struct NonmovingSegment *seg = alloca->current[cap];
+        struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
+        for (uint32_t cap_n = 0; cap_n < getNumCapabilities(); ++cap_n) {
+            Capability *cap = getCapability(cap_n);
+            struct NonmovingSegment *seg = cap->current_segments[alloca_idx];
             if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
-                debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg);
+                debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap_n, alloca_idx, (void*)seg);
                 return;
             }
         }


=====================================
rts/sm/NonMoving.h
=====================================
@@ -85,8 +85,7 @@ struct NonmovingAllocator {
     struct NonmovingSegment *filled;
     struct NonmovingSegment *saved_filled;
     struct NonmovingSegment *active;
-    // indexed by capability number
-    struct NonmovingSegment *current[];
+    // N.B. Per-capabilty "current" segment lives in Capability
 };
 
 // first allocator is of size 2^NONMOVING_ALLOCA0 (in bytes)
@@ -100,7 +99,7 @@ struct NonmovingAllocator {
 #define NONMOVING_MAX_FREE 16
 
 struct NonmovingHeap {
-    struct NonmovingAllocator *allocators[NONMOVING_ALLOCA_CNT];
+    struct NonmovingAllocator allocators[NONMOVING_ALLOCA_CNT];
     // free segment list. This is a cache where we keep up to
     // NONMOVING_MAX_FREE segments to avoid thrashing the block allocator.
     // Note that segments in this list are still counted towards
@@ -151,7 +150,7 @@ void nonmovingCollect(StgWeak **dead_weaks,
                        StgTSO **resurrected_threads);
 
 void *nonmovingAllocate(Capability *cap, StgWord sz);
-void nonmovingAddCapabilities(uint32_t new_n_caps);
+void nonmovingInitCapability(Capability *cap);
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
 void nonmovingClearBitmap(struct NonmovingSegment *seg);
 
@@ -168,7 +167,7 @@ INLINE_HEADER uint8_t nonmovingSegmentLogBlockSize(struct NonmovingSegment *seg)
 INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
 {
     struct NonmovingAllocator *alloc =
-        nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+        &nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
     SET_SEGMENT_STATE(seg, ACTIVE);
     while (true) {
         struct NonmovingSegment *current_active = RELAXED_LOAD(&alloc->active);
@@ -183,7 +182,7 @@ INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
 INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
 {
     struct NonmovingAllocator *alloc =
-        nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+        &nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
     SET_SEGMENT_STATE(seg, FILLED);
     while (true) {
         struct NonmovingSegment *current_filled = (struct NonmovingSegment*) RELAXED_LOAD(&alloc->filled);


=====================================
rts/sm/NonMovingCensus.c
=====================================
@@ -21,10 +21,12 @@
 // stopped. In this case is safe to look at active and current segments so we can
 // also collect statistics on live words.
 static struct NonmovingAllocCensus
-nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_words)
+nonmovingAllocatorCensus_(uint32_t alloc_idx, bool collect_live_words)
 {
     struct NonmovingAllocCensus census = {collect_live_words, 0, 0, 0, 0};
+    struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[alloc_idx];
 
+    // filled segments
     for (struct NonmovingSegment *seg = alloc->filled;
          seg != NULL;
          seg = seg->link)
@@ -40,6 +42,7 @@ nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_wo
         }
     }
 
+    // active segments
     for (struct NonmovingSegment *seg = alloc->active;
          seg != NULL;
          seg = seg->link)
@@ -56,9 +59,11 @@ nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_wo
         }
     }
 
-    for (unsigned int cap=0; cap < getNumCapabilities(); cap++)
+    // current segments
+    for (unsigned int cap_n=0; cap_n < getNumCapabilities(); cap_n++)
     {
-        struct NonmovingSegment *seg = alloc->current[cap];
+        Capability *cap = getCapability(cap_n);
+        struct NonmovingSegment *seg = cap->current_segments[alloc_idx];
         unsigned int n = nonmovingSegmentBlockCount(seg);
         for (unsigned int i=0; i < n; i++) {
             if (nonmovingGetMark(seg, i)) {
@@ -76,15 +81,15 @@ nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_wo
  * all blocks in nonmoving heap are valid closures.
  */
 struct NonmovingAllocCensus
-nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc)
+nonmovingAllocatorCensusWithWords(uint32_t alloc_idx)
 {
-    return nonmovingAllocatorCensus_(alloc, true);
+    return nonmovingAllocatorCensus_(alloc_idx, true);
 }
 
 struct NonmovingAllocCensus
-nonmovingAllocatorCensus(struct NonmovingAllocator *alloc)
+nonmovingAllocatorCensus(uint32_t alloc_idx)
 {
-    return nonmovingAllocatorCensus_(alloc, false);
+    return nonmovingAllocatorCensus_(alloc_idx, false);
 }
 
 
@@ -130,7 +135,7 @@ void nonmovingPrintAllocatorCensus(bool collect_live_words)
 
     for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
         struct NonmovingAllocCensus census =
-            nonmovingAllocatorCensus_(nonmovingHeap.allocators[i], collect_live_words);
+            nonmovingAllocatorCensus_(i, collect_live_words);
 
         print_alloc_census(i, census);
     }
@@ -143,8 +148,7 @@ void nonmovingTraceAllocatorCensus()
         return;
 
     for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
-        const struct NonmovingAllocCensus census =
-            nonmovingAllocatorCensus(nonmovingHeap.allocators[i]);
+        const struct NonmovingAllocCensus census = nonmovingAllocatorCensus(i);
         const uint32_t log_blk_size = i + NONMOVING_ALLOCA0;
         traceNonmovingHeapCensus(log_blk_size, &census);
     }


=====================================
rts/sm/NonMovingCensus.h
=====================================
@@ -20,10 +20,10 @@ struct NonmovingAllocCensus {
 
 
 struct NonmovingAllocCensus
-nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc);
+nonmovingAllocatorCensusWithWords(uint32_t alloc_idx);
 
 struct NonmovingAllocCensus
-nonmovingAllocatorCensus(struct NonmovingAllocator *alloc);
+nonmovingAllocatorCensus(uint32_t alloc_idx);
 
 void nonmovingPrintAllocatorCensus(bool collect_live_words);
 void nonmovingTraceAllocatorCensus(void);


=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -253,7 +253,7 @@ StgWord nonmoving_write_barrier_enabled = false;
 MarkQueue *current_mark_queue = NULL;
 
 /* Initialise update remembered set data structures */
-void nonmovingMarkInitUpdRemSet() {
+void nonmovingMarkInit() {
 #if defined(THREADED_RTS)
     initMutex(&upd_rem_set_lock);
     initCondition(&upd_rem_set_flushed_cond);
@@ -295,7 +295,9 @@ static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
 
     nonmovingAddUpdRemSetBlocks_(rset);
     // Reset the state of the remembered set.
+    ACQUIRE_SM_LOCK;
     init_mark_queue_(rset);
+    RELEASE_SM_LOCK;
     rset->is_upd_rem_set = true;
 }
 
@@ -928,7 +930,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q)
 /* Must hold sm_mutex. */
 static void init_mark_queue_ (MarkQueue *queue)
 {
-    bdescr *bd = allocGroup_lock(MARK_QUEUE_BLOCKS);
+    bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
     ASSERT(queue->blocks == NULL);
     queue->blocks = bd;
     queue->top = (MarkQueueBlock *) bd->start;
@@ -939,12 +941,14 @@ static void init_mark_queue_ (MarkQueue *queue)
 #endif
 }
 
+/* Must hold sm_mutex */
 void initMarkQueue (MarkQueue *queue)
 {
     init_mark_queue_(queue);
     queue->is_upd_rem_set = false;
 }
 
+/* Must hold sm_mutex */
 void nonmovingInitUpdRemSet (UpdRemSet *rset)
 {
     init_mark_queue_(&rset->queue);
@@ -1763,15 +1767,23 @@ done:
  *  b. the nursery has been fully evacuated into the non-moving generation.
  *  c. the mark queue has been seeded with a set of roots.
  *
+ * If budget is not UNLIMITED_MARK_BUDGET, then we will mark no more than the
+ * indicated number of objects and deduct the work done from the budget.
  */
 GNUC_ATTR_HOT void
-nonmovingMark (MarkQueue *queue)
+nonmovingMark (MarkBudget* budget, MarkQueue *queue)
 {
     traceConcMarkBegin();
     debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
-    unsigned int count = 0;
+    uint64_t count = 0;
     while (true) {
         count++;
+        if (*budget == 0) {
+            return;
+        } else if (*budget != UNLIMITED_MARK_BUDGET) {
+            *budget -= 1;
+        }
+
         MarkQueueEnt ent = markQueuePop(queue);
 
         switch (nonmovingMarkQueueEntryType(&ent)) {


=====================================
rts/sm/NonMovingMark.h
=====================================
@@ -112,6 +112,11 @@ typedef struct {
     MarkQueue queue;
 } UpdRemSet;
 
+// How much marking work we are allowed to perform
+// See Note [Sync phase marking budget] in NonMoving.c
+typedef int64_t MarkBudget;
+#define UNLIMITED_MARK_BUDGET INT64_MIN
+
 // Number of blocks to allocate for a mark queue
 #define MARK_QUEUE_BLOCKS 16
 
@@ -136,7 +141,7 @@ extern MarkQueue *current_mark_queue;
 extern bdescr *upd_rem_set_block_list;
 
 
-void nonmovingMarkInitUpdRemSet(void);
+void nonmovingMarkInit(void);
 
 void nonmovingInitUpdRemSet(UpdRemSet *rset);
 void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
@@ -156,7 +161,12 @@ void markQueueAddRoot(MarkQueue* q, StgClosure** root);
 
 void initMarkQueue(MarkQueue *queue);
 void freeMarkQueue(MarkQueue *queue);
-void nonmovingMark(struct MarkQueue_ *__restrict__ queue);
+void nonmovingMark(MarkBudget *budget, struct MarkQueue_ *__restrict__ queue);
+INLINE_HEADER void nonmovingMarkUnlimitedBudget(struct MarkQueue_ *restrict queue) {
+    MarkBudget budget = UNLIMITED_MARK_BUDGET;
+    nonmovingMark(&budget, queue);
+}
+
 
 void nonmovingMarkWeakPtrList(struct MarkQueue_ *queue);
 bool nonmovingTidyWeaks(struct MarkQueue_ *queue);


=====================================
rts/sm/Sanity.c
=====================================
@@ -637,12 +637,13 @@ void checkNonmovingHeap (const struct NonmovingHeap *heap)
     checkLargeObjects(nonmoving_marked_large_objects);
     checkCompactObjects(nonmoving_compact_objects);
     for (unsigned int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
-        const struct NonmovingAllocator *alloc = heap->allocators[i];
+        const struct NonmovingAllocator *alloc = &heap->allocators[i];
         checkNonmovingSegments(alloc->filled);
         checkNonmovingSegments(alloc->saved_filled);
         checkNonmovingSegments(alloc->active);
-        for (unsigned int cap=0; cap < getNumCapabilities(); cap++) {
-            checkNonmovingSegments(alloc->current[cap]);
+        for (unsigned int cap_n=0; cap_n < getNumCapabilities(); cap_n++) {
+            Capability *cap = getCapability(cap_n);
+            checkNonmovingSegments(cap->current_segments[i]);
         }
     }
 }
@@ -926,7 +927,7 @@ static void checkGeneration (generation *gen,
     uint32_t n;
     gen_workspace *ws;
 
-    //ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+    ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 
 #if defined(THREADED_RTS)
@@ -1070,12 +1071,13 @@ findMemoryLeak (void)
         markBlocks(nonmoving_compact_objects);
         markBlocks(nonmoving_marked_compact_objects);
         for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
-            struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i];
+            struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[i];
             markNonMovingSegments(alloc->filled);
             markNonMovingSegments(alloc->saved_filled);
             markNonMovingSegments(alloc->active);
             for (j = 0; j < getNumCapabilities(); j++) {
-                markNonMovingSegments(alloc->current[j]);
+                Capability *cap = getCapability(j);
+                markNonMovingSegments(cap->current_segments[i]);
             }
         }
         markNonMovingSegments(nonmovingHeap.sweep_list);
@@ -1180,23 +1182,18 @@ countNonMovingSegments(struct NonmovingSegment *segs)
     return ret;
 }
 
-static W_
-countNonMovingAllocator(struct NonmovingAllocator *alloc)
-{
-    W_ ret = countNonMovingSegments(alloc->filled)
-           + countNonMovingSegments(alloc->active);
-    for (uint32_t i = 0; i < getNumCapabilities(); ++i) {
-        ret += countNonMovingSegments(alloc->current[i]);
-    }
-    return ret;
-}
-
 static W_
 countNonMovingHeap(struct NonmovingHeap *heap)
 {
     W_ ret = 0;
     for (int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++) {
-        ret += countNonMovingAllocator(heap->allocators[alloc_idx]);
+        struct NonmovingAllocator *alloc = &heap->allocators[alloc_idx];
+        ret += countNonMovingSegments(alloc->filled);
+        ret += countNonMovingSegments(alloc->active);
+        for (uint32_t c = 0; c < getNumCapabilities(); ++c) {
+            Capability *cap = getCapability(c);
+            ret += countNonMovingSegments(cap->current_segments[alloc_idx]);
+        }
     }
     ret += countNonMovingSegments(heap->sweep_list);
     ret += countNonMovingSegments(heap->free);


=====================================
rts/sm/Storage.c
=====================================
@@ -213,13 +213,10 @@ initStorage (void)
       generations[g].to = &generations[g+1];
   }
   oldest_gen->to = oldest_gen;
-  RELEASE_SM_LOCK;
 
   // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
   nonmovingInit();
-
-  if (RtsFlags.GcFlags.useNonmoving)
-      nonmovingAddCapabilities(getNumCapabilities());
+  RELEASE_SM_LOCK;
 
   /* The oldest generation has one step. */
   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
@@ -313,16 +310,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
                 allocBlockOnNode(capNoToNumaNode(n));
         }
     }
-    RELEASE_SM_LOCK;
 
-    // Initialize NonmovingAllocators and UpdRemSets
+    // Initialize non-moving collector
     if (RtsFlags.GcFlags.useNonmoving) {
-        nonmovingAddCapabilities(to);
         for (i = from; i < to; i++) {
-            getCapability(i)->upd_rem_set.queue.blocks = NULL;
-            nonmovingInitUpdRemSet(&getCapability(i)->upd_rem_set);
+            nonmovingInitCapability(getCapability(i));
         }
     }
+    RELEASE_SM_LOCK;
 
 #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
     newThreadLocalKey(&gctKey);



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4a91acb13ad757f710c3cbcd35d29c06176cdef0...3d7baa30746e92f39981d2db95de78eb80bd1cdb

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4a91acb13ad757f710c3cbcd35d29c06176cdef0...3d7baa30746e92f39981d2db95de78eb80bd1cdb
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221208/18cc9d69/attachment-0001.html>