[commit: ghc] ghc-8.2: rts: Always collect stats (f19ab07)

Mon Jun 26 22:25:39 UTC 2017

Repository : ssh://git@git.haskell.org/ghc

On branch  : ghc-8.2
Link       : http://ghc.haskell.org/trac/ghc/changeset/f19ab07b660589bc7cc04073b9c91fac4be384e1/ghc

>---------------------------------------------------------------

commit f19ab07b660589bc7cc04073b9c91fac4be384e1
Author: Ben Gamari <bgamari.foss at gmail.com>
Date:   Mon Jun 26 16:27:23 2017 -0400

    rts: Always collect stats
    
    It seems that 12ad4d417b89462ba8e19a3c7772a931b3a93f0e enabled
    collection by default as its needs stats.allocated_bytes to determine
    whether the program has exceeded its grace limit.
    
    However, enabling stats also enables some potentially expensive times
    checks.  In general GC statistics should be cheap to compute (relative
    to the GC itself), so now we always compute them. This allows us to once
    again disable giveStats by default.
    
    Fixes #13864.
    
    Reviewers: simonmar, austin, erikd
    
    Reviewed By: simonmar
    
    Subscribers: rwbarton, thomie
    
    GHC Trac Issues: #13864
    
    Differential Revision: https://phabricator.haskell.org/D3669
    
    (cherry picked from commit 4bd4f561d79de4d056571eca61a5249a5091c985)


>---------------------------------------------------------------

f19ab07b660589bc7cc04073b9c91fac4be384e1
 rts/RtsFlags.c |   2 +-
 rts/Stats.c    | 124 ++++++++++++++++++++++++++++++---------------------------
 2 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 8d71354..0a12ba3 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -130,7 +130,7 @@ void initRtsFlagsDefaults(void)
         maxStkSize = 8 * 1024 * 1024;
 
     RtsFlags.GcFlags.statsFile          = NULL;
-    RtsFlags.GcFlags.giveStats          = COLLECT_GC_STATS;
+    RtsFlags.GcFlags.giveStats          = NO_GC_STATS;
 
     RtsFlags.GcFlags.maxStkSize         = maxStkSize / sizeof(W_);
     RtsFlags.GcFlags.initialStkSize     = 1024 / sizeof(W_);
diff --git a/rts/Stats.c b/rts/Stats.c
index 5f5fa58..552200c 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -285,29 +285,76 @@ stat_endGC (Capability *cap, gc_thread *gct,
             W_ live, W_ copied, W_ slop, uint32_t gen,
             uint32_t par_n_threads, W_ par_max_copied)
 {
-    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
-        rtsConfig.gcDoneHook != NULL ||
-        RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
-    {
-        // -------------------------------------------------
-        // Collect all the stats about this GC in stats.gc
-
-        stats.gc.gen = gen;
-        stats.gc.threads = par_n_threads;
+    // -------------------------------------------------
+    // Collect all the stats about this GC in stats.gc. We always do this since
+    // it's relatively cheap and we need allocated_bytes to catch heap
+    // overflows.
+
+    stats.gc.gen = gen;
+    stats.gc.threads = par_n_threads;
+
+    uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+
+    // allocated since the last GC
+    stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+
+    stats.gc.live_bytes = live * sizeof(W_);
+    stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
+    stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
+    stats.gc.slop_bytes = slop * sizeof(W_);
+    stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
+    stats.gc.copied_bytes = copied * sizeof(W_);
+    stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+
+    // -------------------------------------------------
+    // Update the cumulative stats
+
+    stats.gcs++;
+    stats.allocated_bytes = tot_alloc_bytes;
+    stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
+
+    GC_coll_cpu[gen] += stats.gc.cpu_ns;
+    GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
+    if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
+        GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
+    }
 
-        uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+    stats.copied_bytes += stats.gc.copied_bytes;
+    if (par_n_threads > 1) {
+        stats.par_copied_bytes += stats.gc.copied_bytes;
+        stats.cumulative_par_max_copied_bytes +=
+            stats.gc.par_max_copied_bytes;
+    }
+    stats.gc_cpu_ns += stats.gc.cpu_ns;
+    stats.gc_elapsed_ns += stats.gc.elapsed_ns;
 
-        // allocated since the last GC
-        stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+    if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+        stats.major_gcs++;
+        if (stats.gc.live_bytes > stats.max_live_bytes) {
+            stats.max_live_bytes = stats.gc.live_bytes;
+        }
+        if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
+            stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
+        }
+        if (stats.gc.compact_bytes > stats.max_compact_bytes) {
+            stats.max_compact_bytes = stats.gc.compact_bytes;
+        }
+        if (stats.gc.slop_bytes > stats.max_slop_bytes) {
+            stats.max_slop_bytes = stats.gc.slop_bytes;
+        }
+        stats.cumulative_live_bytes += stats.gc.live_bytes;
+    }
 
-        stats.gc.live_bytes = live * sizeof(W_);
-        stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
-        stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
-        stats.gc.slop_bytes = slop * sizeof(W_);
-        stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
-        stats.gc.copied_bytes = copied * sizeof(W_);
-        stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+    // -------------------------------------------------
+    // Do the more expensive bits only when stats are enabled.
 
+    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        rtsConfig.gcDoneHook != NULL ||
+        RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
+    {
+        // We only update the times when stats are explicitly enabled since
+        // getProcessTimes (e.g. requiring a system call) can be expensive on
+        // some platforms.
         Time current_cpu, current_elapsed;
         getProcessTimes(&current_cpu, &current_elapsed);
         stats.cpu_ns = current_cpu - start_init_cpu;
@@ -319,45 +366,6 @@ stat_endGC (Capability *cap, gc_thread *gct,
         stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
 
         // -------------------------------------------------
-        // Update the cumulative stats
-
-        stats.gcs++;
-        stats.allocated_bytes = tot_alloc_bytes;
-        stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
-
-        GC_coll_cpu[gen] += stats.gc.cpu_ns;
-        GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
-        if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
-            GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
-        }
-
-        stats.copied_bytes += stats.gc.copied_bytes;
-        if (par_n_threads > 1) {
-            stats.par_copied_bytes += stats.gc.copied_bytes;
-            stats.cumulative_par_max_copied_bytes +=
-                stats.gc.par_max_copied_bytes;
-        }
-        stats.gc_cpu_ns += stats.gc.cpu_ns;
-        stats.gc_elapsed_ns += stats.gc.elapsed_ns;
-
-        if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
-            stats.major_gcs++;
-            if (stats.gc.live_bytes > stats.max_live_bytes) {
-                stats.max_live_bytes = stats.gc.live_bytes;
-            }
-            if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
-                stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
-            }
-            if (stats.gc.compact_bytes > stats.max_compact_bytes) {
-                stats.max_compact_bytes = stats.gc.compact_bytes;
-            }
-            if (stats.gc.slop_bytes > stats.max_slop_bytes) {
-                stats.max_slop_bytes = stats.gc.slop_bytes;
-            }
-            stats.cumulative_live_bytes += stats.gc.live_bytes;
-        }
-
-        // -------------------------------------------------
         // Emit events to the event log
 
         // Has to be emitted while all caps stopped for GC, but before GC_END.