[commit: ghc] master: rts: Always collect stats (4bd4f56)

Mon Jun 26 21:26:12 UTC 2017

Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/4bd4f561d79de4d056571eca61a5249a5091c985/ghc

>---------------------------------------------------------------

commit 4bd4f561d79de4d056571eca61a5249a5091c985
Author: Ben Gamari <bgamari.foss at gmail.com>
Date:   Mon Jun 26 16:27:23 2017 -0400

    rts: Always collect stats
    
    It seems that 12ad4d417b89462ba8e19a3c7772a931b3a93f0e enabled
    collection by default as its needs stats.allocated_bytes to determine
    whether the program has exceeded its grace limit.
    
    However, enabling stats also enables some potentially expensive times
    checks.  In general GC statistics should be cheap to compute (relative
    to the GC itself), so now we always compute them. This allows us to once
    again disable giveStats by default.
    
    Fixes #13864.
    
    Reviewers: simonmar, austin, erikd
    
    Reviewed By: simonmar
    
    Subscribers: rwbarton, thomie
    
    GHC Trac Issues: #13864
    
    Differential Revision: https://phabricator.haskell.org/D3669


>---------------------------------------------------------------

4bd4f561d79de4d056571eca61a5249a5091c985
 rts/RtsFlags.c |   2 +-
 rts/Stats.c    | 124 ++++++++++++++++++++++++++++++---------------------------
 2 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 73635cf..7b10d2a 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -130,7 +130,7 @@ void initRtsFlagsDefaults(void)
         maxStkSize = 8 * 1024 * 1024;
 
     RtsFlags.GcFlags.statsFile          = NULL;
-    RtsFlags.GcFlags.giveStats          = COLLECT_GC_STATS;
+    RtsFlags.GcFlags.giveStats          = NO_GC_STATS;
 
     RtsFlags.GcFlags.maxStkSize         = maxStkSize / sizeof(W_);
     RtsFlags.GcFlags.initialStkSize     = 1024 / sizeof(W_);
diff --git a/rts/Stats.c b/rts/Stats.c
index e31d124..b0c1be0 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -285,29 +285,76 @@ stat_endGC (Capability *cap, gc_thread *gct,
             W_ live, W_ copied, W_ slop, uint32_t gen,
             uint32_t par_n_threads, W_ par_max_copied)
 {
-    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
-        rtsConfig.gcDoneHook != NULL ||
-        RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
-    {
-        // -------------------------------------------------
-        // Collect all the stats about this GC in stats.gc
-
-        stats.gc.gen = gen;
-        stats.gc.threads = par_n_threads;
+    // -------------------------------------------------
+    // Collect all the stats about this GC in stats.gc. We always do this since
+    // it's relatively cheap and we need allocated_bytes to catch heap
+    // overflows.
+
+    stats.gc.gen = gen;
+    stats.gc.threads = par_n_threads;
+
+    uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+
+    // allocated since the last GC
+    stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+
+    stats.gc.live_bytes = live * sizeof(W_);
+    stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
+    stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
+    stats.gc.slop_bytes = slop * sizeof(W_);
+    stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
+    stats.gc.copied_bytes = copied * sizeof(W_);
+    stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+
+    // -------------------------------------------------
+    // Update the cumulative stats
+
+    stats.gcs++;
+    stats.allocated_bytes = tot_alloc_bytes;
+    stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
+
+    GC_coll_cpu[gen] += stats.gc.cpu_ns;
+    GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
+    if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
+        GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
+    }
 
-        uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+    stats.copied_bytes += stats.gc.copied_bytes;
+    if (par_n_threads > 1) {
+        stats.par_copied_bytes += stats.gc.copied_bytes;
+        stats.cumulative_par_max_copied_bytes +=
+            stats.gc.par_max_copied_bytes;
+    }
+    stats.gc_cpu_ns += stats.gc.cpu_ns;
+    stats.gc_elapsed_ns += stats.gc.elapsed_ns;
 
-        // allocated since the last GC
-        stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+    if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+        stats.major_gcs++;
+        if (stats.gc.live_bytes > stats.max_live_bytes) {
+            stats.max_live_bytes = stats.gc.live_bytes;
+        }
+        if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
+            stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
+        }
+        if (stats.gc.compact_bytes > stats.max_compact_bytes) {
+            stats.max_compact_bytes = stats.gc.compact_bytes;
+        }
+        if (stats.gc.slop_bytes > stats.max_slop_bytes) {
+            stats.max_slop_bytes = stats.gc.slop_bytes;
+        }
+        stats.cumulative_live_bytes += stats.gc.live_bytes;
+    }
 
-        stats.gc.live_bytes = live * sizeof(W_);
-        stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
-        stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
-        stats.gc.slop_bytes = slop * sizeof(W_);
-        stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
-        stats.gc.copied_bytes = copied * sizeof(W_);
-        stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+    // -------------------------------------------------
+    // Do the more expensive bits only when stats are enabled.
 
+    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        rtsConfig.gcDoneHook != NULL ||
+        RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
+    {
+        // We only update the times when stats are explicitly enabled since
+        // getProcessTimes (e.g. requiring a system call) can be expensive on
+        // some platforms.
         Time current_cpu, current_elapsed;
         getProcessTimes(&current_cpu, &current_elapsed);
         stats.cpu_ns = current_cpu - start_init_cpu;
@@ -319,45 +366,6 @@ stat_endGC (Capability *cap, gc_thread *gct,
         stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
 
         // -------------------------------------------------
-        // Update the cumulative stats
-
-        stats.gcs++;
-        stats.allocated_bytes = tot_alloc_bytes;
-        stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
-
-        GC_coll_cpu[gen] += stats.gc.cpu_ns;
-        GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
-        if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
-            GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
-        }
-
-        stats.copied_bytes += stats.gc.copied_bytes;
-        if (par_n_threads > 1) {
-            stats.par_copied_bytes += stats.gc.copied_bytes;
-            stats.cumulative_par_max_copied_bytes +=
-                stats.gc.par_max_copied_bytes;
-        }
-        stats.gc_cpu_ns += stats.gc.cpu_ns;
-        stats.gc_elapsed_ns += stats.gc.elapsed_ns;
-
-        if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
-            stats.major_gcs++;
-            if (stats.gc.live_bytes > stats.max_live_bytes) {
-                stats.max_live_bytes = stats.gc.live_bytes;
-            }
-            if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
-                stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
-            }
-            if (stats.gc.compact_bytes > stats.max_compact_bytes) {
-                stats.max_compact_bytes = stats.gc.compact_bytes;
-            }
-            if (stats.gc.slop_bytes > stats.max_slop_bytes) {
-                stats.max_slop_bytes = stats.gc.slop_bytes;
-            }
-            stats.cumulative_live_bytes += stats.gc.live_bytes;
-        }
-
-        // -------------------------------------------------
         // Emit events to the event log
 
         // Has to be emitted while all caps stopped for GC, but before GC_END.