[commit: ghc] wip/better-machine-readable-stats: [rts] Add spin and yield counters for reallyLockClosure and waitForGcThreads (854f886)
git at git.haskell.org
git at git.haskell.org
Thu Jan 11 02:04:01 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/better-machine-readable-stats
Link : http://ghc.haskell.org/trac/ghc/changeset/854f8865b7bf42070d7e12d4646c35a75c6c4b97/ghc
>---------------------------------------------------------------
commit 854f8865b7bf42070d7e12d4646c35a75c6c4b97
Author: Douglas Wilson <douglas.wilson at gmail.com>
Date: Thu Jan 11 14:17:46 2018 +1300
[rts] Add spin and yield counters for reallyLockClosure and waitForGcThreads
Summary:
Also add busy_wait_nops in these loops.
The loop in StgMiscClosures.cmm doesn't use the counters yet, I need help with
the cmm.
Test Plan:
./validate
Check it builds with #define PROF_SPIN removed from includes/rts/Config.h
Reviewers: bgamari, erikd, simonmar
Subscribers: rwbarton, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4302
>---------------------------------------------------------------
854f8865b7bf42070d7e12d4646c35a75c6c4b97
rts/SMPClosureOps.h | 12 ++++++++++++
rts/Stats.c | 17 +++++++++++++++++
rts/StgMiscClosures.cmm | 7 +++++++
rts/sm/GC.c | 14 ++++++++++++++
rts/sm/GC.h | 2 ++
5 files changed, 52 insertions(+)
diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h
index 4ea1c55..fa6fe01 100644
--- a/rts/SMPClosureOps.h
+++ b/rts/SMPClosureOps.h
@@ -38,6 +38,11 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info);
#if defined(THREADED_RTS)
+#if defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_lock_closure_spin;
+extern volatile StgWord64 whitehole_lock_closure_yield;
+#endif
+
/* -----------------------------------------------------------------------------
* Locking/unlocking closures
*
@@ -56,7 +61,14 @@ EXTERN_INLINE StgInfoTable *reallyLockClosure(StgClosure *p)
do {
info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info);
if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
+#if defined(PROF_SPIN)
+ atomic_inc(&whitehole_lock_closure_spin, 1);
+#endif
+ busy_wait_nop();
} while (++i < SPIN_COUNT);
+#if defined(PROF_SPIN)
+ atomic_inc(&whitehole_lock_closure_spin, 1);
+#endif
yieldThread();
} while (1);
}
diff --git a/rts/Stats.c b/rts/Stats.c
index 07cad3f..341db7e 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -43,6 +43,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
+// TODO REVIEWERS: This seems a bit of an odd place to do this, where would be
+// better?
+#if defined(PROF_SPIN)
+volatile StgWord64 whitehole_lock_closure_spin = 0;
+volatile StgWord64 whitehole_lock_closure_yield = 0;
+#endif
+
//
// All the stats!
//
@@ -780,6 +787,16 @@ stat_exit (void)
, col_width[0], "whitehole_gc"
, col_width[1], whitehole_gc_spin
, col_width[2], (StgWord64)0);
+ statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], "whitehole_lock_closure"
+ , col_width[1], whitehole_lock_closure_spin
+ , col_width[2], whitehole_lock_closure_yield);
+ // waitForGcThreads isn't really spin-locking(see the function)
+ // but these numbers still seem useful.
+ statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], "waitForGcThread"
+ , col_width[1], waitForGcThreads_spin
+ , col_width[2], waitForGcThreads_yield);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
int prefix_length = 0;
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 361989d..2f0d61a 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -375,11 +375,18 @@ loop:
// spin until the WHITEHOLE is updated
info = StgHeader_info(node);
if (info == stg_WHITEHOLE_info) {
+ // TODO REVIEWERS: I think these atomic_incs and the busy_wait_nop
+ // should happen, but I don't know how to write it in cmm. I think this
+ // code is only for the bytecode interpreter?
+
+ // atomic_inc(&whitehole_lock_closure_spin, 1);
i = i + 1;
if (i == SPIN_COUNT) {
i = 0;
+ // atomic_inc(&whitehole_lock_closure_yield, 1);
ccall yieldThread();
}
+ // busy_wait_nop();
goto loop;
}
jump %ENTRY_CODE(info) (node);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index c5ab7a8..1ab9652 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -134,6 +134,9 @@ uint32_t n_gc_threads;
static long copied; // *words* copied & scavenged during this GC
#if defined(PROF_SPIN) && defined(THREADED_RTS)
+// spin and yield counts for the quasi-SpinLock in waitForGcThreads
+volatile StgWord64 waitForGcThreads_spin = 0;
+volatile StgWord64 waitForGcThreads_yield = 0;
volatile StgWord64 whitehole_gc_spin = 0;
#endif
@@ -1154,6 +1157,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
}
}
if (!retry) break;
+#if defined(PROF_SPIN)
+ waitForGcThreads_yield++;
+#endif
yieldThread();
}
@@ -1164,6 +1170,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
rtsConfig.longGCSync(cap->no, t2 - t0);
t1 = t2;
}
+#if defined(PROF_SPIN)
+ // This is a bit strange, we'll get more yields than spins.
+ // I guess that means it's not a spin-lock at all, but these
+ // numbers are still useful (I think).
+ if (retry) {
+ waitForGcThreads_spin++;
+ }
+#endif
}
if (RtsFlags.GcFlags.longGCSync != 0 &&
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 78f0549..7fce87e 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -47,6 +47,8 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS,
#if defined(PROF_SPIN) && defined(THREADED_RTS)
extern volatile StgWord64 whitehole_gc_spin;
+extern volatile StgWord64 waitForGcThreads_spin;
+extern volatile StgWord64 waitForGcThreads_yield;
#endif
void gcWorkerThread (Capability *cap);
More information about the ghc-commits
mailing list