[Git][ghc/ghc][master] rts: Refine memory retention behaviour to account for pinned/compacted objects

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu May 11 15:57:07 UTC 2023



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
05cea68c by Matthew Pickering at 2023-05-11T11:56:36-04:00
rts: Refine memory retention behaviour to account for pinned/compacted objects

When using the copying collector there is still a lot of data which
isn't copied (such as pinned, compacted, large objects etc). The logic
to decide how much memory to retain didn't take into account that these
wouldn't be copied. Therefore we pessimistically retained 2* the amount
of memory for these blocks even though they wouldn't be copied by the
collector.

The solution is to split up the heap into two parts, the parts which
will be copied and the parts which won't be copied. Then the appropiate
factor is applied to each part individually (2 * for copying and 1.2 *
for not copying).

The T23221 test demonstrates this improvement with a program which first
allocates many unpinned ByteArray# followed by many pinned ByteArray#
and observes the difference in the ultimate memory baseline between the
two.

There are some charts on #23221.

Fixes #23221

- - - - -


5 changed files:

- rts/sm/GC.c
- rts/sm/Storage.c
- rts/sm/Storage.h
- + testsuite/tests/rts/T23221.hs
- testsuite/tests/rts/all.T


Changes:

=====================================
rts/sm/GC.c
=====================================
@@ -994,14 +994,40 @@ GarbageCollect (struct GcConfig config,
   commitMBlockFreeing();
 
   if (major_gc) {
-      W_ need_prealloc, need_live, need, got;
+      W_ need_prealloc, need_copied_live, need_uncopied_live, need, got, extra_needed;
       uint32_t i;
 
-      need_live = 0;
+      need_copied_live = 0;
+      need_uncopied_live = 0;
       for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
-          need_live += genLiveBlocks(&generations[i]);
+          need_copied_live += genLiveCopiedBlocks(&generations[i]);
+          need_uncopied_live += genLiveUncopiedBlocks(&generations[i]);
       }
-      need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
+
+      debugTrace(DEBUG_gc, "(before) copied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live );
+
+
+      // minOldGenSize states that the size of the oldest generation must be at least
+      // as big as a certain value, so make sure to save enough memory for that.
+      extra_needed = 0;
+      if (RtsFlags.GcFlags.minOldGenSize >= need_copied_live + need_uncopied_live){
+        extra_needed = RtsFlags.GcFlags.minOldGenSize - (need_copied_live + need_uncopied_live);
+      }
+      debugTrace(DEBUG_gc, "(minOldGen: %d; extra_needed: %d", RtsFlags.GcFlags.minOldGenSize, extra_needed);
+
+      // If oldest gen is uncopying in some manner (compact or non-moving) then
+      // add the extra requested by minOldGenSize to uncopying portion of memory.
+      // Otherwise, the last generation is copying so add it to copying portion.
+      if (oldest_gen -> compact || RtsFlags.GcFlags.useNonmoving) {
+        need_uncopied_live += extra_needed;
+      }
+      else {
+        need_copied_live += extra_needed;
+      }
+
+      ASSERT(need_uncopied_live + need_copied_live >= RtsFlags.GcFlags.minOldGenSize );
+
+      debugTrace(DEBUG_gc, "(after) copyied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live );
 
       need_prealloc = 0;
       for (i = 0; i < n_nurseries; i++) {
@@ -1027,14 +1053,19 @@ GarbageCollect (struct GcConfig config,
 
       debugTrace(DEBUG_gc, "factors: %f %d %f", RtsFlags.GcFlags.oldGenFactor, consec_idle_gcs, scaled_factor  );
 
-      // Unavoidable need depends on GC strategy
+      // Unavoidable need for copying memory depends on GC strategy
       // * Copying need 2 * live
       // * Compacting need 1.x * live (we choose 1.2)
-      // * Nonmoving needs ~ 1.x * live
-      double unavoidable_need_factor = (oldest_gen->compact || RtsFlags.GcFlags.useNonmoving)
-                                          ? 1.2 : 2;
-      W_ scaled_needed = (scaled_factor + unavoidable_need_factor) * need_live;
-      debugTrace(DEBUG_gc, "factors_2: %f %d", unavoidable_need_factor, scaled_needed);
+      double unavoidable_copied_need_factor = (oldest_gen->compact)
+                                              ? 1.2 : 2;
+
+      // Unmoving blocks (compacted, pinned, nonmoving GC blocks) are not going
+      // to be copied so don't need to save 2* the memory for them.
+      double unavoidable_uncopied_need_factor = 1.2;
+
+      W_ scaled_needed = ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live)
+                       + ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live);
+      debugTrace(DEBUG_gc, "factors_2: %f %d", ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live), ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live));
       need = need_prealloc + scaled_needed;
 
       /* Also, if user set heap size, do not drop below it.


=====================================
rts/sm/Storage.c
=====================================
@@ -1608,20 +1608,34 @@ W_ countOccupied (bdescr *bd)
     return words;
 }
 
+// Returns the total number of live blocks
 W_ genLiveWords (generation *gen)
 {
     return (gen->live_estimate ? gen->live_estimate : gen->n_words) +
         gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W;
 }
 
-W_ genLiveBlocks (generation *gen)
+// The number of live blocks which will be copied by the copying collector.
+W_ genLiveCopiedBlocks (generation *gen)
+{
+  return gen->n_blocks;
+}
+
+// The number of live blocks which will not be copied by the copying collector
+// This includes non-moving collector segments, compact blocks and large/pinned blocks.
+W_ genLiveUncopiedBlocks (generation *gen)
 {
   W_ nonmoving_blocks = 0;
   // The nonmoving heap contains some blocks that live outside the regular generation structure.
   if (gen == oldest_gen && RtsFlags.GcFlags.useNonmoving){
     nonmoving_blocks = n_nonmoving_large_blocks + n_nonmoving_marked_large_blocks + n_nonmoving_compact_blocks + n_nonmoving_marked_compact_blocks;
   }
-  return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks;
+  return gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks;
+}
+
+W_ genLiveBlocks (generation *gen)
+{
+  return genLiveCopiedBlocks(gen) + genLiveUncopiedBlocks(gen);
 }
 
 W_ gcThreadLiveWords (uint32_t i, uint32_t g)


=====================================
rts/sm/Storage.h
=====================================
@@ -121,6 +121,8 @@ StgWord gcThreadLiveBlocks (uint32_t i, uint32_t g);
 
 StgWord genLiveWords  (generation *gen);
 StgWord genLiveBlocks (generation *gen);
+StgWord genLiveCopiedBlocks (generation *gen);
+StgWord genLiveUncopiedBlocks (generation *gen);
 
 StgWord calcTotalLargeObjectsW (void);
 StgWord calcTotalCompactW (void);


=====================================
testsuite/tests/rts/T23221.hs
=====================================
@@ -0,0 +1,70 @@
+{-# LANGUAGE MagicHash, UnboxedTuples, NumericUnderscores #-}
+
+module Main where
+
+import GHC.Exts
+import GHC.IO
+import System.Mem
+import System.Environment
+import Debug.Trace
+import Control.Monad
+import GHC.Stats
+import Data.Word
+
+-- This test is for checking the memory return behaviour of blocks which will be
+-- copied and blocks which are not copied (#23221)
+main :: IO ()
+main = do
+  [sn] <- getArgs
+  let n = read sn
+  -- By checking that lower bound of unpinned is the upper bound of pinned then we
+  -- check that unpinned has lower memory baseline than pinned.
+  loop newByteArray 2 3 n
+  loop newPinnedByteArray 1 2 n
+
+
+-- The upper_bound is the upper bound on how much total memory should be live at the end
+-- of the test as a factor of the expected live bytes.
+loop f lower_bound upper_bound n = do
+  ba <- mapM (\_ -> f 128) [0..n]
+  traceMarkerIO "Allocated_all"
+  performGC
+  let !ba' = take (n `div` 4) ba
+  evaluate (length ba')
+  traceMarkerIO "GC_4"
+  performGC
+  evaluate (length (reverse ba'))
+  replicateM_ 20 performGC
+  total_mem <- checkStats lower_bound upper_bound (n `div` 4)
+  evaluate (length (reverse ba'))
+  return total_mem
+
+checkStats :: Double -> Double -> Int -> IO ()
+checkStats lower_bound upper_bound n = do
+  stats <- getRTSStats
+  let expected_live_memory = fromIntegral n -- How many objects
+                             * (3     -- One list cons
+                                + 2   -- One BA constructor
+                                + 18) -- ByteArray# object (size 16 + 2 overhead)
+                                  -- size of each object
+                             * 8            -- word size
+  let bytes_used = gcdetails_mem_in_use_bytes (gc stats)
+      mblocks = bytes_used  `div` (2 ^ 20)
+  when (truncate (expected_live_memory * upper_bound) < bytes_used) $
+    error ("Upper Memory bound failed: " ++ show (truncate expected_live_memory, upper_bound, bytes_used))
+  when (truncate (expected_live_memory * lower_bound) >= bytes_used) $
+    error ("Lower Memory bound failed: " ++ show (truncate expected_live_memory, lower_bound, bytes_used))
+
+data BA = BA ByteArray#
+
+newByteArray :: Int -> IO BA
+newByteArray (I# sz#) = IO $ \s -> case newByteArray# sz# s of
+    (# s', k #) -> case unsafeFreezeByteArray# k s' of
+                    (# s'', ba# #) -> (# s'', BA ba# #)
+
+newPinnedByteArray :: Int -> IO BA
+newPinnedByteArray (I# sz#) = IO $ \s -> case newPinnedByteArray# sz# s of
+    (# s', k #) -> case unsafeFreezeByteArray# k s' of
+                    (# s'', ba# #) -> (# s'', BA ba# #)
+
+


=====================================
testsuite/tests/rts/all.T
=====================================
@@ -590,4 +590,6 @@ test('T22795c', [only_ways(['normal']), js_skip], compile_and_run, ['-threaded -
 
 test('T17574', [js_skip], compile_and_run, ['-with-rtsopts -T'])
 
+test('T23221', [js_skip, high_memory_usage, extra_run_opts('1500000'), unless(wordsize(64), skip)], compile_and_run, ['-O -with-rtsopts -T'])
+
 test('T23142', [unless(debug_rts(), skip), req_interp], makefile_test, ['T23142'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/05cea68c0f883999e8fc69edd305906041f44829

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/05cea68c0f883999e8fc69edd305906041f44829
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230511/1392926c/attachment-0001.html>


More information about the ghc-commits mailing list