[commit: ghc] master: Make RTS keep less memory (fixes #14702) (0171e09)
git at git.haskell.org
git at git.haskell.org
Thu Feb 1 04:29:58 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/0171e09e4d073d8466953ebbf01292e55829fb20/ghc
>---------------------------------------------------------------
commit 0171e09e4d073d8466953ebbf01292e55829fb20
Author: Andrey Sverdlichenko <blaze at ruddy.ru>
Date: Wed Jan 31 21:33:58 2018 -0500
Make RTS keep less memory (fixes #14702)
Currently runtime keeps hold to 4*used_memory. This includes, in
particular, nursery, which can be quite large on multiprocessor
machines: 16 CPUs x 64Mb each is 1GB. Multiplying it by 4 means whatever
actual memory usage is, runtime will never release memory under 4GB, and
this is quite excessive for processes which only need a lot of memory
shortly (think building data structures from large files).
This diff makes multiplier to apply only to GC-managed memory, leaving
all "static" allocations alone.
Test Plan: make test TEST="T14702"
Reviewers: bgamari, erikd, simonmar
Reviewed By: simonmar
Subscribers: rwbarton, thomie, carter
GHC Trac Issues: #14702
Differential Revision: https://phabricator.haskell.org/D4338
>---------------------------------------------------------------
0171e09e4d073d8466953ebbf01292e55829fb20
rts/RetainerProfile.c | 2 --
rts/RetainerProfile.h | 2 --
rts/sm/GC.c | 52 ++++++++++++++++++++++++++++++++++---------
testsuite/tests/rts/T14702.hs | 36 ++++++++++++++++++++++++++++++
testsuite/tests/rts/all.T | 5 +++++
5 files changed, 83 insertions(+), 14 deletions(-)
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 7a9b9cc..4badbfe 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -279,7 +279,6 @@ isEmptyRetainerStack( void )
/* -----------------------------------------------------------------------------
* Returns size of stack
* -------------------------------------------------------------------------- */
-#if defined(DEBUG)
W_
retainerStackBlocks( void )
{
@@ -291,7 +290,6 @@ retainerStackBlocks( void )
return res;
}
-#endif
/* -----------------------------------------------------------------------------
* Returns true if stackTop is at the stack boundary of the current stack,
diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h
index 6882a2a..bc11cc7 100644
--- a/rts/RetainerProfile.h
+++ b/rts/RetainerProfile.h
@@ -41,9 +41,7 @@ retainerSetOf( const StgClosure *c )
}
// Used by Storage.c:memInventory()
-#if defined(DEBUG)
extern W_ retainerStackBlocks ( void );
-#endif
#include "EndPrivate.h"
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index c5ab7a8..197b466 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -28,6 +28,7 @@
#include "Sparks.h"
#include "Sweep.h"
+#include "Arena.h"
#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
@@ -50,6 +51,10 @@
#include "CNF.h"
#include "RtsFlags.h"
+#if defined(PROFILING)
+#include "RetainerProfile.h"
+#endif
+
#include <string.h> // for memset()
#include <unistd.h>
@@ -756,24 +761,51 @@ GarbageCollect (uint32_t collect_gen,
ACQUIRE_SM_LOCK;
if (major_gc) {
- W_ need, got;
- need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
- got = mblocks_allocated;
+ W_ need_prealloc, need_live, need, got;
+ uint32_t i;
+
+ need_live = 0;
+ for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
+ need_live += genLiveBlocks(&generations[i]);
+ }
+ need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
+
+ need_prealloc = 0;
+ for (i = 0; i < n_nurseries; i++) {
+ need_prealloc += nurseries[i].n_blocks;
+ }
+ need_prealloc += RtsFlags.GcFlags.largeAllocLim;
+ need_prealloc += countAllocdBlocks(exec_block);
+ need_prealloc += arenaBlocks();
+#if defined(PROFILING)
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ need_prealloc = retainerStackBlocks();
+ }
+#endif
+
/* If the amount of data remains constant, next major GC we'll
- require (F+1)*need. We leave (F+2)*need in order to reduce
- repeated deallocation and reallocation. */
- need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
+ * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
+ * in order to reduce repeated deallocation and reallocation. #14702
+ */
+ need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
+
+ /* Also, if user set heap size, do not drop below it.
+ */
+ need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
+
/* But with a large nursery, the above estimate might exceed
* maxHeapSize. A large resident set size might make the OS
* kill this process, or swap unnecessarily. Therefore we
* ensure that our estimate does not exceed maxHeapSize.
*/
if (RtsFlags.GcFlags.maxHeapSize != 0) {
- W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
- if (need > max) {
- need = max;
- }
+ need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
}
+
+ need = BLOCKS_TO_MBLOCKS(need);
+
+ got = mblocks_allocated;
+
if (got > need) {
returnMemoryToOS(got - need);
}
diff --git a/testsuite/tests/rts/T14702.hs b/testsuite/tests/rts/T14702.hs
new file mode 100644
index 0000000..8e07529
--- /dev/null
+++ b/testsuite/tests/rts/T14702.hs
@@ -0,0 +1,36 @@
+module Main where
+
+import Control.Monad
+import Data.Array.IO.Safe
+import Data.Word
+import GHC.Stats
+import System.Exit
+import System.Mem
+
+printAlloc :: String -> IO (Word64, Word64)
+printAlloc name = do
+ performGC
+ details <- gc <$> getRTSStats
+ let dat = (gcdetails_live_bytes details, gcdetails_mem_in_use_bytes details)
+ putStrLn $ name ++ ": " ++ show dat
+ pure dat
+
+allocateAndPrint :: IO ()
+allocateAndPrint = do
+ -- allocate and touch a lot of memory (4MB * 260 ~ 1GB)
+ memoryHog <- forM [1 .. 300] $ \_ ->
+ (newArray (0, 1000000) 0 :: IO (IOUArray Word Word32))
+ _ <- printAlloc "with large allocation"
+ -- do something with memory to prevent it from being GC'ed until now
+ forM_ memoryHog $ \a -> void $ readArray a 0
+
+main :: IO ()
+main = do
+ (firstLive, firstTotal) <- printAlloc "initial"
+ allocateAndPrint
+ (lastLive, lastTotal) <- printAlloc "final"
+
+ -- Now there is no reason to have more memory allocated than at start
+ let ratio = fromIntegral lastTotal / fromIntegral firstTotal
+ putStrLn $ "alloc ratio " ++ show ratio
+ when (ratio > 1.5) $ exitFailure
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index fe86dd1..ef77d57 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -383,3 +383,8 @@ test('T13832', exit_code(1), compile_and_run, ['-threaded'])
test('T13894', normal, compile_and_run, [''])
test('T14497', normal, compile_and_run, ['-O'])
test('T14695', normal, run_command, ['$MAKE -s --no-print-directory T14695'])
+test('T14702', [ ignore_stdout
+ , only_ways(['threaded1', 'threaded2'])
+ , extra_run_opts('+RTS -A32m -N8 -T -RTS')
+ ]
+ , compile_and_run, [''])
More information about the ghc-commits
mailing list