[Git][ghc/ghc][wip/expansions-appdo] 4 commits: nonmoving: Add support for heap profiling

Apoorv Ingle (@ani) gitlab at gitlab.haskell.org
Tue Feb 13 01:10:56 UTC 2024



Apoorv Ingle pushed to branch wip/expansions-appdo at Glasgow Haskell Compiler / GHC


Commits:
bedb4f0d by Teo Camarasu at 2024-02-12T18:50:33-05:00
nonmoving: Add support for heap profiling

Add support for heap profiling while using the nonmoving collector.

We greatly simply the implementation by disabling concurrent collection for
GCs when heap profiling is enabled. This entails that the marked objects on
the nonmoving heap are exactly the live objects.

Note that we match the behaviour for live bytes accounting by taking the size
of objects on the nonmoving heap to be that of the segment's block
rather than the object itself.

Resolves #22221

- - - - -
d0d5acb5 by Teo Camarasu at 2024-02-12T18:51:09-05:00
doc: Add requires prof annotation to options that require it

Resolves #24421

- - - - -
22ebb064 by Apoorv Ingle at 2024-02-12T19:10:40-06:00
make applicative do work with expansions, possibly badly

Fixes: #24406

- - - - -
38c3d11c by Apoorv Ingle at 2024-02-12T19:10:40-06:00
enable the expansion flow

- - - - -


10 changed files:

- compiler/GHC/Tc/Gen/Do.hs
- compiler/GHC/Tc/Gen/Match.hs
- + compiler/ghc-llvm-version.h
- docs/users_guide/9.10.1-notes.rst
- docs/users_guide/profiling.rst
- rts/Capability.h
- rts/ProfHeap.c
- rts/RtsFlags.c
- rts/sm/GC.c
- testsuite/tests/profiling/should_run/all.T


Changes:

=====================================
compiler/GHC/Tc/Gen/Do.hs
=====================================
@@ -80,11 +80,6 @@ expand_do_stmts _ (stmt@(L _ (ParStmt {})):_) =
   pprPanic "expand_do_stmts: ParStmt" $ ppr stmt
   -- handeled by `GHC.Tc.Gen.Match.tcLcStmt`
 
-expand_do_stmts _ (stmt@(L _ (ApplicativeStmt{})): _) =
-  pprPanic "expand_do_stmts: Applicative Stmt" $ ppr stmt
-  -- Handeled by tcSyntaxOp see `GHC.Tc.Gen.Match.tcStmtsAndThen`
-
-
 expand_do_stmts _ [stmt@(L loc (LastStmt _ (L body_loc body) _ ret_expr))]
 -- See  Note [Expanding HsDo with XXExprGhcRn] Equation (5) below
 -- last statement of a list comprehension, needs to explicitly return it
@@ -191,6 +186,60 @@ expand_do_stmts do_or_lc
                              -- NB: LazyPat because we do not want to eagerly evaluate the pattern
                              -- and potentially loop forever
 
+
+expand_do_stmts do_or_lc ((L _ (ApplicativeStmt _ args mb_join)): lstmts) =
+-- See Note [Applicative BodyStmt]
+--
+--                  stmts ~~> stmts'
+--   -------------------------------------------------------------------------
+--     [(<$>, \ x -> e1), (<*>, e2), (<*>, e3), .. ] ; stmts  ~~> (\ x -> stmts') <$> e1 <*> e2 ...
+--
+-- Very similar to HsToCore.Expr.dsDo
+
+-- args are [(<$>, e1), (<*>, e2), .., ]
+  do { expr' <- unLoc <$> expand_do_stmts do_or_lc lstmts
+     -- extracts pats and arg bodies (rhss) from args
+     ; (pats_can_fail, rhss) <- unzip <$> mapM (do_arg . snd) args
+
+     -- add blocks for failable patterns
+     ; body_with_fails <- foldrM match_args expr' pats_can_fail
+
+     -- builds (body <$> e1 <*> e2 ...)
+     ; let expand_ado_expr = foldl mk_apps body_with_fails (zip (map fst args) rhss)
+
+     -- wrap the expanded expression with a `join` if needed
+     ; let final_expr = case mb_join of
+                          Just (SyntaxExprRn join_op) -> wrapGenSpan $ genHsApp join_op (wrapGenSpan expand_ado_expr)
+                          _ -> wrapGenSpan expand_ado_expr
+     ; traceTc "expand_do_stmts AppStmt" (ppr final_expr)
+     ; return final_expr
+     }
+  where
+    do_arg :: ApplicativeArg GhcRn -> TcM ((LPat GhcRn, FailOperator GhcRn), LHsExpr GhcRn)
+    do_arg (ApplicativeArgOne
+            { xarg_app_arg_one = mb_fail_op
+            , app_arg_pattern = pat@(L loc _)
+            , arg_expr        = rhs
+            }) =
+      return ((pat, mb_fail_op), mkExpandedStmtAt loc (L loc (BindStmt xbsn pat rhs)) (unLoc rhs))
+    do_arg (ApplicativeArgMany _ stmts ret pat ctxt) =
+      do { expr <- expand_do_stmts ctxt $ stmts ++ [wrapGenSpan $ mkLastStmt (wrapGenSpan ret)]
+         ; return ((pat, Nothing)
+                  , {- wrapGenSpan $ mkExpandedExpr (HsDo noExtField ctxt (wrapGenSpan stmts)) (unLoc expr)-} expr) }
+
+    match_args :: (LPat GhcRn, FailOperator GhcRn) -> HsExpr GhcRn -> TcM (HsExpr GhcRn)
+    match_args (pat, fail_op) body = unLoc <$> mk_failable_expr do_or_lc pat (wrapGenSpan body) fail_op
+
+    mk_apps :: HsExpr GhcRn -> (SyntaxExprRn, LHsExpr GhcRn) -> HsExpr GhcRn
+    mk_apps l_expr (op, r_expr) =
+      case op of
+        SyntaxExprRn op -> genHsExpApps op [ wrapGenSpan l_expr, r_expr ]
+        NoSyntaxExprRn -> pprPanic "expand_do_stmts applicative op:" (ppr op)
+
+    xbsn :: XBindStmtRn
+    xbsn = XBindStmtRn NoSyntaxExprRn Nothing
+
+
 expand_do_stmts _ stmts = pprPanic "expand_do_stmts: impossible happened" $ (ppr stmts)
 
 -- checks the pattern `pat`for irrefutability which decides if we need to wrap it with a fail block
@@ -229,7 +278,7 @@ mk_fail_block doFlav pat@(L ploc _) e (Just (SyntaxExprRn fail_op)) =
           mk_fail_msg_expr :: DynFlags -> LPat GhcRn -> LHsExpr GhcRn
           mk_fail_msg_expr dflags pat
             = nlHsLit $ mkHsString $ showPpr dflags $
-              text "Pattern match failure in" <+> pprHsDoFlavour (DoExpr Nothing)
+              text "Pattern match failure in" <+> pprHsDoFlavour doFlav
                    <+> text "at" <+> ppr (getLocA pat)
 
 


=====================================
compiler/GHC/Tc/Gen/Match.hs
=====================================
@@ -354,14 +354,8 @@ tcDoStmts ListComp (L l stmts) res_ty
         ; return $ mkHsWrapCo co (HsDo list_ty ListComp (L l stmts')) }
 
 tcDoStmts doExpr@(DoExpr _) ss@(L l stmts) res_ty
-  = do  { isApplicativeDo <- xoptM LangExt.ApplicativeDo
-        ; if isApplicativeDo
-          then do { stmts' <- tcStmts (HsDoStmt doExpr) tcDoStmt stmts res_ty
-                  ; res_ty <- readExpType res_ty
-                  ; return (HsDo res_ty doExpr (L l stmts')) }
-          else do { expanded_expr <- expandDoStmts doExpr stmts
-                                               -- Do expansion on the fly
-                  ; mkExpandedExprTc (HsDo noExtField doExpr ss) <$> tcExpr (unLoc expanded_expr) res_ty }
+  = do  { expanded_expr <- expandDoStmts doExpr stmts -- Do expansion on the fly
+        ; mkExpandedExprTc (HsDo noExtField doExpr ss) <$> tcExpr (unLoc expanded_expr) res_ty
         }
 
 tcDoStmts mDoExpr@(MDoExpr _) ss@(L _ stmts) res_ty


=====================================
compiler/ghc-llvm-version.h
=====================================
@@ -0,0 +1,11 @@
+/* compiler/ghc-llvm-version.h.  Generated from ghc-llvm-version.h.in by configure.  */
+#if !defined(__GHC_LLVM_VERSION_H__)
+#define __GHC_LLVM_VERSION_H__
+
+/* The maximum supported LLVM version number */
+#define sUPPORTED_LLVM_VERSION_MAX (16)
+
+/* The minimum supported LLVM version number */
+#define sUPPORTED_LLVM_VERSION_MIN (13)
+
+#endif /* __GHC_LLVM_VERSION_H__ */


=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -168,6 +168,8 @@ Runtime system
   In one real-world application, this has reduced resident set size by about 20% and modestly improved run-time.
   See :ghc-ticket:`23340`.
   :rts-flag:`--nonmoving-dense-allocator-count=⟨count⟩` has been added to fine-tune this behaviour.
+- Add support for heap profiling with the non-moving GC.
+  See :ghc-ticket:`22221`.
 
 - Add a :rts-flag:`--no-automatic-time-samples` flag which stops time profiling samples being automatically started on
   startup. Time profiling can be controlled manually using functions in ``GHC.Profiling``.


=====================================
docs/users_guide/profiling.rst
=====================================
@@ -951,47 +951,47 @@ follows:
 .. rts-flag:: -hc ⟨name⟩
     :noindex:
 
-    Restrict the profile to closures produced by cost-centre stacks with
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures produced by cost-centre stacks with
     one of the specified cost centres at the top.
 
 .. rts-flag:: -hC ⟨name⟩
     :noindex:
 
-    Restrict the profile to closures produced by cost-centre stacks with
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures produced by cost-centre stacks with
     one of the specified cost centres anywhere in the stack.
 
 .. rts-flag:: -hm ⟨module⟩
     :noindex:
 
-    Restrict the profile to closures produced by the specified modules.
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures produced by the specified modules.
 
 .. rts-flag:: -hd ⟨desc⟩
     :noindex:
 
-    Restrict the profile to closures with the specified description
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures with the specified description
     strings.
 
 .. rts-flag:: -hy ⟨type⟩
     :noindex:
 
-    Restrict the profile to closures with the specified types.
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures with the specified types.
 
 .. rts-flag:: -he ⟨era⟩
     :noindex:
 
-    Restrict the profile to the specified era.
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to the specified era.
 
 .. rts-flag:: -hr ⟨cc⟩
     :noindex:
 
-    Restrict the profile to closures with retainer sets containing
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures with retainer sets containing
     cost-centre stacks with one of the specified cost centres at the
     top.
 
 .. rts-flag:: -hb ⟨bio⟩
     :noindex:
 
-    Restrict the profile to closures with one of the specified
+    *Requires* :ghc-flag:`-prof`. Restrict the profile to closures with one of the specified
     biographies, where ⟨bio⟩ is one of ``lag``, ``drag``, ``void``, or
     ``use``.
 


=====================================
rts/Capability.h
=====================================
@@ -98,7 +98,7 @@ struct Capability_ {
     // The update remembered set for the non-moving collector
     UpdRemSet upd_rem_set;
     // Array of current segments for the non-moving collector.
-    // Of length NONMOVING_ALLOCA_CNT.
+    // Of length nonmoving_alloca_cnt.
     struct NonmovingSegment **current_segments;
 
     // block for allocating pinned objects into


=====================================
rts/ProfHeap.c
=====================================
@@ -1280,6 +1280,116 @@ heapCensusBlock(Census *census, bdescr *bd)
     }
 }
 
+// determine whether a closure should be assigned to the PRIM cost-centre.
+static bool
+closureIsPrim (StgPtr p)
+{
+  bool prim = false;
+  const StgInfoTable *info = get_itbl((const StgClosure *)p);
+  switch (info->type) {
+    case THUNK:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_2_0:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_SELECTOR:
+    case FUN:
+    case BLACKHOLE:
+    case BLOCKING_QUEUE:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_1_1:
+    case FUN_0_2:
+    case FUN_2_0:
+    case CONSTR:
+    case CONSTR_NOCAF:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+    case CONSTR_2_0:
+    case IND:
+    case AP:
+    case PAP:
+    case AP_STACK:
+    case CONTINUATION:
+        prim = false;
+        break;
+
+    case BCO:
+    case MVAR_CLEAN:
+    case MVAR_DIRTY:
+    case TVAR:
+    case WEAK:
+    case PRIM:
+    case MUT_PRIM:
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY:
+    case ARR_WORDS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+    case MUT_ARR_PTRS_FROZEN_CLEAN:
+    case MUT_ARR_PTRS_FROZEN_DIRTY:
+    case SMALL_MUT_ARR_PTRS_CLEAN:
+    case SMALL_MUT_ARR_PTRS_DIRTY:
+    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+    case TSO:
+    case STACK:
+    case TREC_CHUNK:
+        prim = true;
+        break;
+
+    case COMPACT_NFDATA:
+        barf("heapCensus, found compact object in the wrong list");
+        break;
+
+    default:
+        barf("heapCensus, unknown object: %d", info->type);
+  }
+  return prim;
+}
+
+static void
+heapCensusSegment (Census* census, struct NonmovingSegment* seg )
+{
+  unsigned int block_size = nonmovingSegmentBlockSize(seg);
+  unsigned int block_count = nonmovingSegmentBlockCount(seg);
+
+  for (unsigned int b = 0; b < block_count; b++) {
+    StgPtr p = nonmovingSegmentGetBlock(seg, b);
+    // ignore unmarked heap objects
+    if (!nonmovingClosureMarkedThisCycle(p)) continue;
+    // NB: We round up the size of objects to the segment block size.
+    // This aligns with live bytes accounting for the nonmoving collector.
+    heapProfObject(census, (StgClosure*)p, block_size / sizeof(W_), closureIsPrim(p));
+  }
+}
+
+/* Note [Non-concurrent nonmoving collector heap census]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * When using the nonmoving collector, we currently disable concurrent collection
+ * to simplify heap census accounting.
+ *
+ * Without concurrent allocation, marked objects on the nonmoving heap are exactly
+ * the live objects.
+ *
+ * We disable concurrent collection both for GCs that lead to a heap census and not.
+ * This is because a concurrent collection can overlap with a GC that is meant
+ * to perform a heap census. Alternatively we could better handle the case where
+ * a non-concurrent collection is triggered while a non-concurrent collection
+ * is running.
+ */
+
+static void
+heapCensusSegmentList (Census* census, struct NonmovingSegment* seg )
+{
+  for (; seg; seg = seg->link) {
+    heapCensusSegment(census, seg);
+  }
+}
+
 /* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
@@ -1350,6 +1460,24 @@ void heapCensus (Time t)
       }
   }
 
+  if (RtsFlags.GcFlags.useNonmoving) {
+    for (unsigned int i = 0; i < nonmoving_alloca_cnt; i++) {
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].filled);
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].saved_filled);
+      heapCensusSegmentList(census, nonmovingHeap.allocators[i].active);
+
+      heapCensusChain(census, nonmoving_large_objects);
+      heapCensusCompactList(census, nonmoving_compact_objects);
+
+      // segments living on capabilities
+      for (unsigned int j = 0; j < getNumCapabilities(); j++) {
+        Capability* cap = getCapability(j);
+        heapCensusSegment(census, cap->current_segments[i]);
+      }
+    }
+
+  }
+
   // dump out the census info
 #if defined(PROFILING)
     // We can't generate any info for LDV profiling until


=====================================
rts/RtsFlags.c
=====================================
@@ -1987,11 +1987,6 @@ static void normaliseRtsOpts (void)
     }
 #endif
 
-    if (RtsFlags.ProfFlags.doHeapProfile != NO_HEAP_PROFILING &&
-            RtsFlags.GcFlags.useNonmoving) {
-        barf("The non-moving collector doesn't support profiling");
-    }
-
     if (RtsFlags.GcFlags.compact && RtsFlags.GcFlags.useNonmoving) {
         errorBelch("The non-moving collector cannot be used in conjunction with\n"
                    "the compacting collector.");


=====================================
rts/sm/GC.c
=====================================
@@ -874,7 +874,9 @@ GarbageCollect (struct GcConfig config,
       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
 
 #if defined(THREADED_RTS)
-      concurrent = !config.nonconcurrent;
+      // Concurrent collection is currently incompatible with heap profiling.
+      // See Note [Non-concurrent nonmoving collector heap census]
+      concurrent = !config.nonconcurrent && !RtsFlags.ProfFlags.doHeapProfile;
 #else
       // In the non-threaded runtime this is the only time we push to the
       // upd_rem_set


=====================================
testsuite/tests/profiling/should_run/all.T
=====================================
@@ -13,9 +13,9 @@ test('T11489', [req_profiling], makefile_test, ['T11489'])
 
 test('dynamic-prof', [], compile_and_run, [''])
 
-test('dynamic-prof2', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
+test('dynamic-prof2', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
 
-test('dynamic-prof3', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
+test('dynamic-prof3', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, [''])
 
 # Remove the ipName field as it's volatile (depends on e.g. architecture and may change with every new GHC version)
 def normalise_InfoProv_ipName(str):
@@ -34,9 +34,7 @@ test('staticcallstack002',
      ['-O0 -g3 -fdistinct-constructor-tables -finfo-table-map'])
 
 test('T21455',
-     [extra_run_opts('+RTS -hT -postem'),
-      # Nonmoving collector doesn't support -hT
-      omit_ways(['nonmoving', 'nonmoving_thr', 'nonmoving_thr_sanity'])],
+     [extra_run_opts('+RTS -hT -postem')],
      compile_and_run,
      [''])
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e534decffde95c067238cd1a905a0b86892fde9d...38c3d11c5926a48fcc1451afd0b6b1e20c0b6eec

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e534decffde95c067238cd1a905a0b86892fde9d...38c3d11c5926a48fcc1451afd0b6b1e20c0b6eec
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240212/e803cc2b/attachment-0001.html>


More information about the ghc-commits mailing list