[Git][ghc/ghc][wip/gc/instrumentation] 2 commits: rts: Add GetMyThreadCPUTime helper

Tue May 21 13:50:54 UTC 2019


Ben Gamari pushed to branch wip/gc/instrumentation at Glasgow Haskell Compiler / GHC


Commits:
698533af by Ben Gamari at 2019-05-17T19:40:26Z
rts: Add GetMyThreadCPUTime helper

- - - - -
3b34cfb1 by Ben Gamari at 2019-05-17T19:43:00Z
rts/Stats: Track time usage of nonmoving collector

- - - - -


11 changed files:

- includes/RtsAPI.h
- libraries/base/GHC/Stats.hsc
- rts/GetTime.h
- rts/Stats.c
- rts/Stats.h
- rts/posix/GetTime.c
- rts/sm/GC.c
- rts/sm/GCThread.h
- rts/sm/NonMoving.c
- rts/sm/NonMovingMark.c
- rts/win32/GetTime.c


Changes:

=====================================
includes/RtsAPI.h
=====================================
@@ -151,6 +151,23 @@ typedef struct GCDetails_ {
   Time cpu_ns;
     // The time elapsed during GC itself
   Time elapsed_ns;
+
+    //
+    // Concurrent garbage collector
+    //
+
+    // The CPU time used during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_sync_cpu_ns;
+    // The time elapsed during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_sync_elapsed_ns;
+    // The CPU time used during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_cpu_ns;
+    // The time elapsed during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_elapsed_ns;
 } GCDetails;
 
 //
@@ -241,6 +258,28 @@ typedef struct _RTSStats {
     // The number of times a GC thread has iterated it's outer loop across all
     // parallel GCs
   uint64_t scav_find_work;
+
+  // ----------------------------------
+  // Concurrent garbage collector
+
+    // The CPU time used during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_sync_cpu_ns;
+    // The time elapsed during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_sync_elapsed_ns;
+    // The maximum time elapsed during the post-mark pause phase of the
+    // concurrent nonmoving GC.
+  Time nonmoving_gc_sync_max_elapsed_ns;
+    // The CPU time used during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_cpu_ns;
+    // The time elapsed during the post-mark pause phase of the concurrent
+    // nonmoving GC.
+  Time nonmoving_gc_elapsed_ns;
+    // The maximum time elapsed during the post-mark pause phase of the
+    // concurrent nonmoving GC.
+  Time nonmoving_gc_max_elapsed_ns;
 } RTSStats;
 
 void getRTSStats (RTSStats *s);


=====================================
libraries/base/GHC/Stats.hsc
=====================================
@@ -103,6 +103,25 @@ data RTSStats = RTSStats {
     -- | Total elapsed time (at the previous GC)
   , elapsed_ns :: RtsTime
 
+    -- | The CPU time used during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , nonmoving_gc_sync_cpu_ns :: RtsTime
+    -- | The time elapsed during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , nonmoving_gc_sync_elapsed_ns :: RtsTime
+    -- | The maximum time elapsed during the post-mark pause phase of the
+    -- concurrent nonmoving GC.
+  , nonmoving_gc_sync_max_elapsed_ns :: RtsTime
+    -- | The CPU time used during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , nonmoving_gc_cpu_ns :: RtsTime
+    -- | The time elapsed during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , nonmoving_gc_elapsed_ns :: RtsTime
+    -- | The maximum time elapsed during the post-mark pause phase of the
+    -- concurrent nonmoving GC.
+  , nonmoving_gc_max_elapsed_ns :: RtsTime
+
     -- | Details about the most recent GC
   , gc :: GCDetails
   } deriving ( Read -- ^ @since 4.10.0.0
@@ -146,6 +165,13 @@ data GCDetails = GCDetails {
   , gcdetails_cpu_ns :: RtsTime
     -- | The time elapsed during GC itself
   , gcdetails_elapsed_ns :: RtsTime
+
+    -- | The CPU time used during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , gcdetails_nonmoving_gc_sync_cpu_ns :: RtsTime
+    -- | The time elapsed during the post-mark pause phase of the concurrent
+    -- nonmoving GC.
+  , gcdetails_nonmoving_gc_sync_elapsed_ns :: RtsTime
   } deriving ( Read -- ^ @since 4.10.0.0
              , Show -- ^ @since 4.10.0.0
              )
@@ -192,6 +218,12 @@ getRTSStats = do
     gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p
     cpu_ns <- (# peek RTSStats, cpu_ns) p
     elapsed_ns <- (# peek RTSStats, elapsed_ns) p
+    nonmoving_gc_sync_cpu_ns <- (# peek RTSStats, nonmoving_gc_sync_cpu_ns) p
+    nonmoving_gc_sync_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_elapsed_ns) p
+    nonmoving_gc_sync_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_max_elapsed_ns) p
+    nonmoving_gc_cpu_ns <- (# peek RTSStats, nonmoving_gc_cpu_ns) p
+    nonmoving_gc_elapsed_ns <- (# peek RTSStats, nonmoving_gc_elapsed_ns) p
+    nonmoving_gc_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_max_elapsed_ns) p
     let pgc = (# ptr RTSStats, gc) p
     gc <- do
       gcdetails_gen <- (# peek GCDetails, gen) pgc
@@ -211,5 +243,7 @@ getRTSStats = do
       gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc
       gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc
       gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc
+      gcdetails_nonmoving_gc_sync_cpu_ns <- (# peek GCDetails, nonmoving_gc_sync_cpu_ns) pgc
+      gcdetails_nonmoving_gc_sync_elapsed_ns <- (# peek GCDetails, nonmoving_gc_sync_elapsed_ns) pgc
       return GCDetails{..}
     return RTSStats{..}


=====================================
rts/GetTime.h
=====================================
@@ -13,6 +13,7 @@
 void initializeTimer       (void);
 
 Time getProcessCPUTime     (void);
+Time getMyThreadCPUTime    (void);
 void getProcessTimes       (Time *user, Time *elapsed);
 
 /* Get the current date and time.


=====================================
rts/Stats.c
=====================================
@@ -33,7 +33,9 @@ static Time
     end_init_cpu,   end_init_elapsed,
     start_exit_cpu, start_exit_elapsed,
     start_exit_gc_elapsed, start_exit_gc_cpu,
-    end_exit_cpu,   end_exit_elapsed;
+    end_exit_cpu,   end_exit_elapsed,
+    start_nonmoving_gc_cpu, start_nonmoving_gc_elapsed,
+    start_nonmoving_gc_sync_elapsed;
 
 #if defined(PROFILING)
 static Time RP_start_time  = 0, RP_tot_time  = 0;  // retainer prof user time
@@ -84,7 +86,7 @@ Time stat_getElapsedTime(void)
 double
 mut_user_time_until( Time t )
 {
-    return TimeToSecondsDbl(t - stats.gc_cpu_ns);
+    return TimeToSecondsDbl(t - stats.gc_cpu_ns - stats.nonmoving_gc_cpu_ns);
     // heapCensus() time is included in GC_tot_cpu, so we don't need
     // to subtract it here.
 
@@ -125,6 +127,10 @@ initStats0(void)
     end_init_cpu     = 0;
     end_init_elapsed  = 0;
 
+    start_nonmoving_gc_cpu = 0;
+    start_nonmoving_gc_elapsed = 0;
+    start_nonmoving_gc_sync_elapsed = 0;
+
     start_exit_cpu    = 0;
     start_exit_elapsed = 0;
     start_exit_gc_cpu    = 0;
@@ -175,6 +181,11 @@ initStats0(void)
         .gc_elapsed_ns = 0,
         .cpu_ns = 0,
         .elapsed_ns = 0,
+        .nonmoving_gc_cpu_ns = 0,
+        .nonmoving_gc_elapsed_ns = 0,
+        .nonmoving_gc_max_elapsed_ns = 0,
+        .nonmoving_gc_sync_elapsed_ns = 0,
+        .nonmoving_gc_sync_max_elapsed_ns = 0,
         .gc = {
             .gen = 0,
             .threads = 0,
@@ -189,7 +200,10 @@ initStats0(void)
             .par_balanced_copied_bytes = 0,
             .sync_elapsed_ns = 0,
             .cpu_ns = 0,
-            .elapsed_ns = 0
+            .elapsed_ns = 0,
+            .nonmoving_gc_cpu_ns = 0,
+            .nonmoving_gc_elapsed_ns = 0,
+            .nonmoving_gc_sync_elapsed_ns = 0,
         }
     };
 }
@@ -274,6 +288,11 @@ stat_startExit(void)
     start_exit_gc_cpu = stats.gc_cpu_ns;
 }
 
+/* -----------------------------------------------------------------------------
+   Nonmoving (concurrent) collector statistics
+
+   These two measure the time taken in the concurrent mark & sweep collector.
+   -------------------------------------------------------------------------- */
 void
 stat_endExit(void)
 {
@@ -286,10 +305,87 @@ stat_startGCSync (gc_thread *gct)
     gct->gc_sync_start_elapsed = getProcessElapsedTime();
 }
 
+void
+stat_startNonmovingGc ()
+{
+    start_nonmoving_gc_cpu = getMyThreadCPUTime();
+    start_nonmoving_gc_elapsed = getProcessCPUTime();
+}
+
+void
+stat_endNonmovingGc ()
+{
+    Time cpu = getMyThreadCPUTime();
+    Time elapsed = getProcessCPUTime();
+    stats.gc.nonmoving_gc_elapsed_ns = elapsed - start_nonmoving_gc_elapsed;
+    stats.nonmoving_gc_elapsed_ns += stats.gc.nonmoving_gc_elapsed_ns;
+
+    stats.gc.nonmoving_gc_cpu_ns = cpu - start_nonmoving_gc_cpu;
+    stats.nonmoving_gc_cpu_ns += stats.gc.nonmoving_gc_cpu_ns;
+
+    stats.nonmoving_gc_max_elapsed_ns =
+      stg_max(stats.gc.nonmoving_gc_elapsed_ns,
+              stats.nonmoving_gc_max_elapsed_ns);
+}
+
+void
+stat_startNonmovingGcSync ()
+{
+    start_nonmoving_gc_sync_elapsed = getProcessElapsedTime();
+    traceConcSyncBegin();
+}
+
+void
+stat_endNonmovingGcSync ()
+{
+    Time end_elapsed = getProcessElapsedTime();
+    stats.gc.nonmoving_gc_sync_elapsed_ns = end_elapsed - start_nonmoving_gc_sync_elapsed;
+    stats.nonmoving_gc_sync_elapsed_ns +=  stats.gc.nonmoving_gc_sync_elapsed_ns;
+    stats.nonmoving_gc_sync_max_elapsed_ns =
+      stg_max(stats.gc.nonmoving_gc_sync_elapsed_ns,
+              stats.nonmoving_gc_sync_max_elapsed_ns);
+    traceConcSyncEnd();
+}
+
 /* -----------------------------------------------------------------------------
    Called at the beginning of each GC
    -------------------------------------------------------------------------- */
 
+/*
+ * GC CPU time is collected on a per-gc_thread basis: The CPU time of each GC
+ * thread worker is recorded in its gc_thread at the beginning and end of
+ * scavenging. These are then summed over at the end of the GC.
+ *
+ * By contrast, the elapsed time is recorded only by the thread driving the GC.
+ *
+ * Mutator time is derived from the process's CPU time, subtracting out
+ * contributions from stop-the-world and concurrent GCs.
+ */
+
+void
+stat_startGCWorker (Capability *cap STG_UNUSED, gc_thread *gct)
+{
+    bool stats_enabled =
+        RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        rtsConfig.gcDoneHook != NULL;
+
+    if (stats_enabled || RtsFlags.ProfFlags.doHeapProfile) {
+        gct->gc_start_cpu = getMyThreadCPUTime();
+    }
+}
+
+void
+stat_endGCWorker (Capability *cap STG_UNUSED, gc_thread *gct)
+{
+    bool stats_enabled =
+        RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        rtsConfig.gcDoneHook != NULL;
+
+    if (stats_enabled || RtsFlags.ProfFlags.doHeapProfile) {
+        gct->gc_end_cpu = getMyThreadCPUTime();
+    }
+}
+
 void
 stat_startGC (Capability *cap, gc_thread *gct)
 {
@@ -297,7 +393,15 @@ stat_startGC (Capability *cap, gc_thread *gct)
         debugBelch("\007");
     }
 
-    getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
+    bool stats_enabled =
+        RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        rtsConfig.gcDoneHook != NULL;
+
+    if (stats_enabled || RtsFlags.ProfFlags.doHeapProfile) {
+        gct->gc_start_cpu = getMyThreadCPUTime();
+    }
+
+    gct->gc_start_elapsed = getProcessElapsedTime();
 
     // Post EVENT_GC_START with the same timestamp as used for stats
     // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
@@ -320,9 +424,9 @@ stat_startGC (Capability *cap, gc_thread *gct)
    -------------------------------------------------------------------------- */
 
 void
-stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
-            uint32_t gen, uint32_t par_n_threads, W_ par_max_copied,
-            W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
+stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ slop,
+            uint32_t gen, uint32_t par_n_threads, gc_thread **gc_threads,
+            W_ par_max_copied, W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
             W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work,
             W_ scav_find_work)
 {
@@ -364,9 +468,13 @@ stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
         stats.elapsed_ns = current_elapsed - start_init_elapsed;
 
         stats.gc.sync_elapsed_ns =
-            gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
-        stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
-        stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
+            initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed;
+        stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed;
+        stats.gc.cpu_ns = 0;
+        for (unsigned int i=0; i < par_n_threads; i++) {
+            gc_thread *gct = gc_threads[i];
+            stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
+        }
     }
     // -------------------------------------------------
     // Update the cumulative stats
@@ -473,8 +581,8 @@ stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
                     TimeToSecondsDbl(stats.gc.elapsed_ns),
                     TimeToSecondsDbl(stats.cpu_ns),
                     TimeToSecondsDbl(stats.elapsed_ns),
-                    faults - gct->gc_start_faults,
-                        gct->gc_start_faults - GC_end_faults,
+                    faults - initiating_gct->gc_start_faults,
+                        initiating_gct->gc_start_faults - GC_end_faults,
                     gen);
 
             GC_end_faults = faults;
@@ -709,6 +817,21 @@ static void report_summary(const RTSSummaryStats* sum)
                     TimeToSecondsDbl(gen_stats->avg_pause_ns),
                     TimeToSecondsDbl(gen_stats->max_pause_ns));
     }
+    if (RtsFlags.GcFlags.useNonmoving) {
+        const int n_major_colls = sum->gc_summary_stats[RtsFlags.GcFlags.generations-1].collections;
+        statsPrintf("  Gen  1     %5d syncs"
+                    ",                      %6.3fs     %3.4fs    %3.4fs\n",
+                    n_major_colls,
+                    TimeToSecondsDbl(stats.nonmoving_gc_sync_elapsed_ns),
+                    TimeToSecondsDbl(stats.nonmoving_gc_sync_elapsed_ns) / n_major_colls,
+                    TimeToSecondsDbl(stats.nonmoving_gc_sync_max_elapsed_ns));
+        statsPrintf("  Gen  1      concurrent"
+                    ",             %6.3fs  %6.3fs     %3.4fs    %3.4fs\n",
+                    TimeToSecondsDbl(stats.nonmoving_gc_cpu_ns),
+                    TimeToSecondsDbl(stats.nonmoving_gc_elapsed_ns),
+                    TimeToSecondsDbl(stats.nonmoving_gc_elapsed_ns) / n_major_colls,
+                    TimeToSecondsDbl(stats.nonmoving_gc_max_elapsed_ns));
+    }
 
     statsPrintf("\n");
 
@@ -745,6 +868,12 @@ static void report_summary(const RTSSummaryStats* sum)
     statsPrintf("  GC      time  %7.3fs  (%7.3fs elapsed)\n",
                 TimeToSecondsDbl(stats.gc_cpu_ns),
                 TimeToSecondsDbl(stats.gc_elapsed_ns));
+    if (RtsFlags.GcFlags.useNonmoving) {
+        statsPrintf(
+                "  CONC GC time  %7.3fs  (%7.3fs elapsed)\n",
+                TimeToSecondsDbl(stats.nonmoving_gc_cpu_ns),
+                TimeToSecondsDbl(stats.nonmoving_gc_elapsed_ns));
+    }
 
 #if defined(PROFILING)
     statsPrintf("  RP      time  %7.3fs  (%7.3fs elapsed)\n",
@@ -1103,7 +1232,8 @@ stat_exit (void)
 
             stats.mutator_cpu_ns     = start_exit_cpu
                                  - end_init_cpu
-                                 - (stats.gc_cpu_ns - exit_gc_cpu);
+                                 - (stats.gc_cpu_ns - exit_gc_cpu)
+                                 - stats.nonmoving_gc_cpu_ns;
             stats.mutator_elapsed_ns = start_exit_elapsed
                                  - end_init_elapsed
                                  - (stats.gc_elapsed_ns - exit_gc_elapsed);
@@ -1512,7 +1642,8 @@ void getRTSStats( RTSStats *s )
     s->cpu_ns = current_cpu - end_init_cpu;
     s->elapsed_ns = current_elapsed - end_init_elapsed;
 
-    s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns;
+    s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
+        stats.nonmoving_gc_cpu_ns;
     s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
         stats.gc_elapsed_ns;
 }


=====================================
rts/Stats.h
=====================================
@@ -30,13 +30,21 @@ void      stat_endInit(void);
 
 void      stat_startGCSync(struct gc_thread_ *_gct);
 void      stat_startGC(Capability *cap, struct gc_thread_ *_gct);
-void      stat_endGC  (Capability *cap, struct gc_thread_ *_gct, W_ live,
-                       W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads,
+void      stat_startGCWorker (Capability *cap, struct gc_thread_ *_gct);
+void      stat_endGCWorker (Capability *cap, struct gc_thread_ *_gct);
+void      stat_endGC  (Capability *cap, struct gc_thread_ *initiating_gct, W_ live,
+                       W_ copied, W_ slop, uint32_t gen,
+                       uint32_t n_gc_threads, struct gc_thread_ **gc_threads,
                        W_ par_max_copied, W_ par_balanced_copied,
                        W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin,
                        W_ mut_spin_yield, W_ any_work, W_ no_work,
                        W_ scav_find_work);
 
+void      stat_startNonmovingGcSync(void);
+void      stat_endNonmovingGcSync(void);
+void      stat_startNonmovingGc (void);
+void      stat_endNonmovingGc (void);
+
 #if defined(PROFILING)
 void      stat_startRP(void);
 void      stat_endRP(uint32_t,


=====================================
rts/posix/GetTime.c
=====================================
@@ -44,11 +44,49 @@ void initializeTimer()
 #endif
 }
 
+#if defined(HAVE_CLOCK_GETTIME)
+static Time getClockTime(clockid_t clock)
+{
+    struct timespec ts;
+    int res = clock_gettime(clock, &ts);
+    if (res == 0) {
+        return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec);
+    } else {
+        sysErrorBelch("clock_gettime");
+        stg_exit(EXIT_FAILURE);
+    }
+}
+#endif
+
+Time getMyThreadCPUTime(void)
+{
+#if defined(HAVE_CLOCK_GETTIME)          &&  \
+       defined(CLOCK_PROCESS_CPUTIME_ID) &&  \
+       defined(HAVE_SYSCONF)
+    return getClockTime(CLOCK_THREAD_CPUTIME_ID);
+#elif defined(darwin_HOST_OS)
+    mach_port_t port = pthread_mach_thread_np(GetCurrentThread());
+    thread_basic_info_data_t info = { 0 };
+    mach_msg_type_number_t info_count = THREAD_BASIC_INFO_COUNT;
+    kern_return_t kern_err = thread_info(mach_thread_self(), THREAD_BASIC_INFO,
+                                         (thread_info_t) &info, &info_count);
+    if (kern_err == KERN_SUCCESS) {
+        return SecondsToTime(info.user_time.seconds) + USToTime(info.user_time.microseconds);
+    } else {
+        sysErrorBelch("getThreadCPUTime");
+        stg_exit(EXIT_FAILURE);
+    }
+#else
+    // TODO: How to fallback here?
+    return getProcessCPUTime();
+#endif
+}
+
 Time getProcessCPUTime(void)
 {
 #if !defined(BE_CONSERVATIVE)            &&  \
        defined(HAVE_CLOCK_GETTIME)       &&  \
-       defined(_SC_CPUTIME)             &&  \
+       defined(_SC_CPUTIME)              &&  \
        defined(CLOCK_PROCESS_CPUTIME_ID) &&  \
        defined(HAVE_SYSCONF)
     static int checked_sysconf = 0;
@@ -59,15 +97,7 @@ Time getProcessCPUTime(void)
         checked_sysconf = 1;
     }
     if (sysconf_result != -1) {
-        struct timespec ts;
-        int res;
-        res = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
-        if (res == 0) {
-            return SecondsToTime(ts.tv_sec) + NSToTime(ts.tv_nsec);
-        } else {
-            sysErrorBelch("clock_gettime");
-            stg_exit(EXIT_FAILURE);
-        }
+        return getClockTime(CLOCK_PROCESS_CPUTIME_ID);
     }
 #endif
 
@@ -82,16 +112,7 @@ Time getProcessCPUTime(void)
 StgWord64 getMonotonicNSec(void)
 {
 #if defined(HAVE_CLOCK_GETTIME)
-    struct timespec ts;
-    int res;
-
-    res = clock_gettime(CLOCK_ID, &ts);
-    if (res != 0) {
-        sysErrorBelch("clock_gettime");
-        stg_exit(EXIT_FAILURE);
-    }
-    return (StgWord64)ts.tv_sec * 1000000000 +
-           (StgWord64)ts.tv_nsec;
+    return getClockTime(CLOCK_ID);
 
 #elif defined(darwin_HOST_OS)
 


=====================================
rts/sm/GC.c
=====================================
@@ -901,9 +901,11 @@ GarbageCollect (uint32_t collect_gen,
 #endif
 
   // ok, GC over: tell the stats department what happened.
+  stat_endGCWorker(cap, gct);
   stat_endGC(cap, gct, live_words, copied,
              live_blocks * BLOCK_SIZE_W - live_words /* slop */,
-             N, n_gc_threads, par_max_copied, par_balanced_copied,
+             N, n_gc_threads, gc_threads,
+             par_max_copied, par_balanced_copied,
              gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
              any_work, no_work, scav_find_work);
 
@@ -1181,6 +1183,7 @@ gcWorkerThread (Capability *cap)
 
     SET_GCT(gc_threads[cap->no]);
     gct->id = osThreadId();
+    stat_startGCWorker (cap, gct);
 
     // Wait until we're told to wake up
     RELEASE_SPIN_LOCK(&gct->mut_spin);
@@ -1222,6 +1225,7 @@ gcWorkerThread (Capability *cap)
     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
 
+    stat_endGCWorker (cap, gct);
     SET_GCT(saved_gct);
 }
 


=====================================
rts/sm/GCThread.h
=====================================
@@ -185,9 +185,11 @@ typedef struct gc_thread_ {
     W_ no_work;
     W_ scav_find_work;
 
-    Time gc_start_cpu;   // process CPU time
-    Time gc_sync_start_elapsed;  // start of GC sync
-    Time gc_start_elapsed;  // process elapsed time
+    Time gc_start_cpu;             // thread CPU time
+    Time gc_end_cpu;               // thread CPU time
+    Time gc_sync_start_elapsed;    // start of GC sync
+    Time gc_start_elapsed;         // thread elapsed time
+    Time gc_end_elapsed;
     W_ gc_start_faults;
 
     // -------------------


=====================================
rts/sm/NonMoving.c
=====================================
@@ -17,6 +17,7 @@
 #include "GCThread.h"
 #include "GCTDecl.h"
 #include "Schedule.h"
+#include "Stats.h"
 
 #include "NonMoving.h"
 #include "NonMovingMark.h"
@@ -588,6 +589,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
 {
     ACQUIRE_LOCK(&nonmoving_collection_mutex);
     debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
+    stat_startNonmovingGc();
 
     // Do concurrent marking; most of the heap will get marked here.
     nonmovingMarkThreadsWeaks(mark_queue);
@@ -738,6 +740,7 @@ finish:
 
     // We are done...
     mark_thread = 0;
+    stat_endNonmovingGc();
 
     // Signal that the concurrent collection is finished, allowing the next
     // non-moving collection to proceed


=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -20,6 +20,7 @@
 #include "Printer.h"
 #include "Schedule.h"
 #include "Weak.h"
+#include "Stats.h"
 #include "STM.h"
 #include "MarkWeak.h"
 #include "sm/Storage.h"
@@ -254,6 +255,7 @@ void nonmovingBeginFlush(Task *task)
     debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
     traceConcSyncBegin();
     upd_rem_set_flush_count = 0;
+    stat_startNonmovingGcSync();
     stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
 
     // XXX: We may have been given a capability via releaseCapability (i.e. a
@@ -345,6 +347,7 @@ void nonmovingFinishFlush(Task *task)
 
     debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
     traceConcSyncEnd();
+    stat_endNonmovingGcSync();
     releaseAllCapabilities(n_capabilities, NULL, task);
 }
 #endif


=====================================
rts/win32/GetTime.c
=====================================
@@ -34,6 +34,19 @@ getProcessTimes(Time *user, Time *elapsed)
     *elapsed = getProcessElapsedTime();
 }
 
+Time
+getMyThreadCPUTime(void)
+{
+    FILETIME creationTime, exitTime, userTime, kernelTime = {0,0};
+
+    if (!GetThreadTimes(GetCurrentThread(), &creationTime,
+                        &exitTime, &kernelTime, &userTime)) {
+        return 0;
+    }
+
+    return fileTimeToRtsTime(userTime);
+}
+
 Time
 getProcessCPUTime(void)
 {



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/fb15e2a6078f50e69a207f3f1217ac069b17cc49...3b34cfb14cdf9e0ac9df80257848053d48de4fbd

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/fb15e2a6078f50e69a207f3f1217ac069b17cc49...3b34cfb14cdf9e0ac9df80257848053d48de4fbd
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20190521/06951314/attachment-0001.html>