[commit: ghc] master: Turn on -n4m with -A16m or greater (85e81a8)

git at git.haskell.org git at git.haskell.org
Sun Oct 9 22:55:20 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/85e81a850a3e79d965e18f267a0e0b1c4bc69fae/ghc

>---------------------------------------------------------------

commit 85e81a850a3e79d965e18f267a0e0b1c4bc69fae
Author: Simon Marlow <marlowsd at gmail.com>
Date:   Sun Oct 9 18:21:35 2016 -0400

    Turn on -n4m with -A16m or greater
    
    Nursery chunks help reduce the cost of GC when capabilities are unevenly
    loaded, by ensuring that we use more of the available nursery.
    
    The rationale for enabling this at -A16m is that any negative effects
    due to loss of cache locality are less likely to be an issue at -A16m
    and above.  It's a conservative guess.  If we had a lot of benchmark
    data we could probably do better.
    
    Results for nofib/parallel at -N4 -A32m with and without -n4m:
    
    ```
    ------------------------------------------------------------------------
            Program           Size    Allocs   Runtime   Elapsed  TotalMem
    ------------------------------------------------------------------------
       blackscholes           0.0%     -9.5%     -9.0%    -15.0%     -2.2%
              coins           0.0%     -4.7%     -3.6%     -0.6%    -13.6%
             mandel           0.0%     -0.3%     +7.7%    +13.1%     +0.1%
            matmult           0.0%     +1.5%    +10.0%     +7.7%     +0.1%
              nbody           0.0%     -4.1%     -2.9%     0.085      0.0%
             parfib           0.0%     -1.4%     +1.0%     +1.5%     +0.2%
            partree           0.0%     -0.3%     +0.8%     +2.9%     -0.8%
               prsa           0.0%     -0.5%     -2.1%     -7.6%      0.0%
             queens           0.0%     -3.2%     -1.4%     +2.2%     +1.3%
                ray           0.0%     -5.6%    -14.5%     -7.6%     +0.8%
           sumeuler           0.0%     -0.4%     +2.4%     +1.1%      0.0%
    ------------------------------------------------------------------------
                Min           0.0%     -9.5%    -14.5%    -15.0%    -13.6%
                Max           0.0%     +1.5%    +10.0%    +13.1%     +1.3%
     Geometric Mean          +0.0%     -2.6%     -1.3%     -0.5%     -1.4%
    ```
    
    Not conclusive, but slightly better.  This matters a lot more when you
    have more cores.
    
    Test Plan: validate, nofib/paralel
    
    Reviewers: niteria, ezyang, nh2, trofi, austin, erikd, bgamari
    
    Reviewed By: bgamari
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2581
    
    GHC Trac Issues: #9221


>---------------------------------------------------------------

85e81a850a3e79d965e18f267a0e0b1c4bc69fae
 docs/users_guide/runtime_control.rst |  2 +-
 rts/RtsFlags.c                       | 18 ++++++++++++++++++
 rts/sm/Storage.c                     | 13 -------------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst
index 0ffb1d8..54c7508 100644
--- a/docs/users_guide/runtime_control.rst
+++ b/docs/users_guide/runtime_control.rst
@@ -327,7 +327,7 @@ performance.
 
 .. rts-flag:: -n ⟨size⟩
 
-    :default: 0
+    :default: 4m with ``-A16m`` or larger, otherwise 0.
 
     .. index::
        single: allocation area, chunk size
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 4bd544e..d86b154 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -1454,6 +1454,24 @@ static void normaliseRtsOpts (void)
         errorUsage();
     }
 
+    if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+        RtsFlags.GcFlags.heapSizeSuggestion >
+        RtsFlags.GcFlags.maxHeapSize) {
+        RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
+    }
+
+    if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+        RtsFlags.GcFlags.minAllocAreaSize >
+        RtsFlags.GcFlags.maxHeapSize) {
+        errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
+        RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
+    }
+
+    // If we have -A16m or larger, use -n4m.
+    if (RtsFlags.GcFlags.minAllocAreaSize >= (16*1024*1024) / BLOCK_SIZE) {
+        RtsFlags.GcFlags.nurseryChunkSize = (4*1024*1024) / BLOCK_SIZE;
+    }
+
     if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
         StgWord alloc_area_bytes
             = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 4d0c8d5..357e018 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -140,19 +140,6 @@ initStorage (void)
   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 
-  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
-      RtsFlags.GcFlags.heapSizeSuggestion >
-      RtsFlags.GcFlags.maxHeapSize) {
-      RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
-  }
-
-  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
-      RtsFlags.GcFlags.minAllocAreaSize >
-      RtsFlags.GcFlags.maxHeapSize) {
-      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
-      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
-  }
-
   initBlockAllocator();
 
 #if defined(THREADED_RTS)



More information about the ghc-commits mailing list