[Git][ghc/ghc][master] nonmoving: Add support for heap profiling

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Mon Feb 12 23:51:04 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
bedb4f0d by Teo Camarasu at 2024-02-12T18:50:33-05:00
nonmoving: Add support for heap profiling

Add support for heap profiling while using the nonmoving collector.

We greatly simply the implementation by disabling concurrent collection for
GCs when heap profiling is enabled. This entails that the marked objects on
the nonmoving heap are exactly the live objects.

Note that we match the behaviour for live bytes accounting by taking the size
of objects on the nonmoving heap to be that of the segment's block
rather than the object itself.

Resolves #22221

- - - - -


6 changed files:

- docs/users_guide/9.10.1-notes.rst
- rts/Capability.h
- rts/ProfHeap.c
- rts/RtsFlags.c
- rts/sm/GC.c
- testsuite/tests/profiling/should_run/all.T


Changes:

=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -168,6 +168,8 @@ Runtime system
   In one real-world application, this has reduced resident set size by about 20% and modestly improved run-time.
   See :ghc-ticket:`23340`.
   :rts-flag:`--nonmoving-dense-allocator-count=⟨count⟩` has been added to fine-tune this behaviour.
+- Add support for heap profiling with the non-moving GC.
+  See :ghc-ticket:`22221`.
 
 - Add a :rts-flag:`--no-automatic-time-samples` flag which stops time profiling samples being automatically started on
   startup. Time profiling can be controlled manually using functions in ``GHC.Profiling``.


=====================================
rts/Capability.h
=====================================
@@ -98,7 +98,7 @@ struct Capability_ {
     // The update remembered set for the non-moving collector
     UpdRemSet upd_rem_set;
     // Array of current segments for the non-moving collector.
-    // Of length NONMOVING_ALLOCA_CNT.
+    // Of length nonmoving_alloca_cnt.
     struct NonmovingSegment **current_segments;
 
     // block for allocating pinned objects into


=====================================
rts/ProfHeap.c
=====================================
@@ -1280,6 +1280,116 @@ heapCensusBlock(Census *census, bdescr *bd)
     }
 }
 
+// determine whether a closure should be assigned to the PRIM cost-centre.
+static bool
+closureIsPrim (StgPtr p)
+{
+  bool prim = false;
+  const StgInfoTable *info = get_itbl((const StgClosure *)p);
+  switch (info->type) {
+    case THUNK:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_2_0:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_SELECTOR:
+    case FUN:
+    case BLACKHOLE:
+    case BLOCKING_QUEUE:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_1_1:
+    case FUN_0_2:
+    case FUN_2_0:
+    case CONSTR:
+    case CONSTR_NOCAF:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+    case CONSTR_2_0:
+    case IND:
+    case AP:
+    case PAP:
+    case AP_STACK:
+    case CONTINUATION:
+        prim = false;
+        break;
+
+    case BCO:
+    case MVAR_CLEAN:
+    case MVAR_DIRTY:
+    case TVAR:
+    case WEAK:
+    case PRIM:
+    case MUT_PRIM:
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY:
+    case ARR_WORDS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+    case MUT_ARR_PTRS_FROZEN_CLEAN:
+    case MUT_ARR_PTRS_FROZEN_DIRTY:
+    case SMALL_MUT_ARR_PTRS_CLEAN:
+    case SMALL_MUT_ARR_PTRS_DIRTY:
+    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+    case TSO:
+    case STACK:
+    case TREC_CHUNK:
+        prim = true;
+        break;
+
+    case COMPACT_NFDATA:
+        barf("heapCensus, found compact object in the wrong list");
+        break;
+
+    default:
+        barf("heapCensus, unknown object: %d", info->type);
+  }
+  return prim;
+}
+
+static void
+heapCensusSegment (Census* census, struct NonmovingSegment* seg )
+{
+  unsigned int block_size = nonmovingSegmentBlockSize(seg);
+  unsigned int block_count = nonmovingSegmentBlockCount(seg);
+
+  for (unsigned int b = 0; b < block_count; b++) {
+    StgPtr p = nonmovingSegmentGetBlock(seg, b);
+    // ignore unmarked heap objects
+    if (!nonmovingClosureMarkedThisCycle(p)) continue;
+    // NB: We round up the size of objects to the segment block size.
+    // This aligns with live bytes accounting for the nonmoving collector.
+    heapProfObject(census, (StgClosure*)p, block_size / sizeof(W_), closureIsPrim(p));
+  }
+}
+
+/* Note [Non-concurrent nonmoving collector heap census]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * When using the nonmoving collector, we currently disable concurrent collection
+ * to simplify heap census accounting.
+ *
+ * Without concurrent allocation, marked objects on the nonmoving heap are exactly
+ * the live objects.
+ *
+ * We disable concurrent collection both for GCs that lead to a heap census and not.
+ * This is because a concurrent collection can overlap with a GC that is meant
+ * to perform a heap census. Alternatively we could better handle the case where
+ * a non-concurrent collection is triggered while a non-concurrent collection
+ * is running.
+ */
+
+static void
+heapCensusSegmentList (Census* census, struct NonmovingSegment* seg )
+{
+  for (; seg; seg = seg->link) {
+    heapCensusSegment(census, seg);
+  }
+}
+
 /* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
@@ -1350,6 +1460,24 @@ void heapCensus (Time t)
       }
   }
 
+  if (RtsFlags.GcFlags.useNonmoving) {
+    for (unsigned int i = 0; i < nonmoving_alloca_cnt; i++) {
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].filled);
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].saved_filled);
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].active);
+
+      heapCensusChain(census, nonmoving_large_objects);
+      heapCensusCompactList(census, nonmoving_compact_objects);
+
+      // segments living on capabilities
+      for (unsigned int j = 0; j < getNumCapabilities(); j++) {
+        Capability* cap = getCapability(j);
+        heapCensusSegment(census, cap->current_segments[i]);
+      }
+    }
+
+  }
+
   // dump out the census info
 #if defined(PROFILING)
     // We can't generate any info for LDV profiling until


=====================================
rts/RtsFlags.c
=====================================
@@ -1987,11 +1987,6 @@ static void normaliseRtsOpts (void)
     }
 #endif
 
-    if (RtsFlags.ProfFlags.doHeapProfile != NO_HEAP_PROFILING &&
-            RtsFlags.GcFlags.useNonmoving) {
-        barf("The non-moving collector doesn't support profiling");
-    }
-
     if (RtsFlags.GcFlags.compact && RtsFlags.GcFlags.useNonmoving) {
         errorBelch("The non-moving collector cannot be used in conjunction with\n"
                    "the compacting collector.");


=====================================
rts/sm/GC.c
=====================================
@@ -874,7 +874,9 @@ GarbageCollect (struct GcConfig config,
       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
 
 #if defined(THREADED_RTS)
-      concurrent = !config.nonconcurrent;
+      // Concurrent collection is currently incompatible with heap profiling.
+      // See Note [Non-concurrent nonmoving collector heap census]
+      concurrent = !config.nonconcurrent && !RtsFlags.ProfFlags.doHeapProfile;
 #else
       // In the non-threaded runtime this is the only time we push to the
       // upd_rem_set


=====================================
testsuite/tests/profiling/should_run/all.T
=====================================
@@ -13,9 +13,9 @@ test('T11489', [req_profiling], makefile_test, ['T11489'])
 
 test('dynamic-prof', [], compile_and_run, [''])
 
-test('dynamic-prof2', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
+test('dynamic-prof2', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
 
-test('dynamic-prof3', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
+test('dynamic-prof3', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
 
 # Remove the ipName field as it's volatile (depends on e.g. architecture and may change with every new GHC version)
 def normalise_InfoProv_ipName(str):
@@ -34,9 +34,7 @@ test('staticcallstack002',
      ['-O0 -g3 -fdistinct-constructor-tables -finfo-table-map'])
 
 test('T21455',
-     [extra_run_opts('+RTS -hT -postem'),
-      # Nonmoving collector doesn't support -hT
-      omit_ways(['nonmoving', 'nonmoving_thr', 'nonmoving_thr_sanity'])],
+     [extra_run_opts('+RTS -hT -postem')],
      compile_and_run,
      [''])
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/bedb4f0de102936099bda4e995cc83f1c344366c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/bedb4f0de102936099bda4e995cc83f1c344366c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240212/c1243038/attachment-0001.html>


More information about the ghc-commits mailing list