[Git][ghc/ghc][wip/T19146] 7 commits: rts: Clear block_info when unblocking
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Tue May 2 19:55:14 UTC 2023
Ben Gamari pushed to branch wip/T19146 at Glasgow Haskell Compiler / GHC
Commits:
d1540380 by Ben Gamari at 2023-05-02T15:50:41-04:00
rts: Clear block_info when unblocking
Otherwise we may end up with dangling pointers which may complicate
debugging.
- - - - -
251f4a5a by Ben Gamari at 2023-05-02T15:52:07-04:00
rts: Weak pointer cleanups
Various stylistic cleanups. No functional changes.
- - - - -
4102dbc3 by Ben Gamari at 2023-05-02T15:52:37-04:00
rts: Don't force debug output to stderr
Previously `+RTS -Dw -l` would emit debug output to the eventlog while
`+RTS -l -Dw` would emit it to stderr. This was because the parser for
`-D` would unconditionally override the debug output target. Now we
instead only do so if no it is currently `TRACE_NONE`.
- - - - -
c5a93c73 by Ben Gamari at 2023-05-02T15:53:50-04:00
rts: Forcibly flush eventlog on barf
Previously we would attempt to flush via `endEventLogging` which can
easily deadlock, e.g., if `barf` fails during GC.
Using `flushEventLog` directly may result in slightly less consistent
eventlog output (since we don't take all capabilities before flushing)
but avoids deadlocking.
- - - - -
71fb0a37 by Ben Gamari at 2023-05-02T15:53:53-04:00
rts: Assert that pointers aren't cleared
This turns many segmentation faults into much easier-to-debug assertion
failures by ensuring that LOOKS_LIKE_*_PTR checks recognize bit-patterns
produced by `+RTS -DZ` clearing as invalid pointers.
This is a bit ad-hoc but this is the debug runtime.
- - - - -
0aeb9d4f by Ben Gamari at 2023-05-02T15:54:40-04:00
rts: Don't sanity-check StgTSO.global_link
See Note [Avoid dangling global_link pointers].
Fixes #19146.
- - - - -
280bace4 by Ben Gamari at 2023-05-02T15:54:59-04:00
rts: Introduce printGlobalThreads
- - - - -
13 changed files:
- rts/RaiseAsync.c
- rts/RtsFlags.c
- rts/RtsMessages.c
- rts/Schedule.c
- rts/Threads.c
- rts/Threads.h
- rts/include/Cmm.h
- rts/include/MachDeps.h
- rts/include/rts/storage/ClosureMacros.h
- rts/posix/Select.c
- rts/sm/MarkWeak.c
- rts/sm/Sanity.c
- rts/win32/AsyncMIO.c
Changes:
=====================================
rts/RaiseAsync.c
=====================================
@@ -729,6 +729,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
done:
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
appendToRunQueue(cap, tso);
}
@@ -1092,6 +1093,7 @@ done:
// wake it up
if (tso->why_blocked != NotBlocked) {
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
appendToRunQueue(cap,tso);
}
=====================================
rts/RtsFlags.c
=====================================
@@ -2201,13 +2201,14 @@ static void read_debug_flags(const char* arg)
}
// -Dx also turns on -v. Use -l to direct trace
// events to the .eventlog file instead.
- RtsFlags.TraceFlags.tracing = TRACE_STDERR;
-
- // sanity implies zero_on_gc
- if(RtsFlags.DebugFlags.sanity){
- RtsFlags.DebugFlags.zero_on_gc = true;
- }
+ if (RtsFlags.TraceFlags.tracing == TRACE_NONE) {
+ RtsFlags.TraceFlags.tracing = TRACE_STDERR;
+ }
+ // sanity implies zero_on_gc
+ if(RtsFlags.DebugFlags.sanity){
+ RtsFlags.DebugFlags.zero_on_gc = true;
+ }
}
#endif
=====================================
rts/RtsMessages.c
=====================================
@@ -186,7 +186,12 @@ rtsFatalInternalErrorFn(const char *s, va_list ap)
#endif
#if defined(TRACING)
- if (RtsFlags.TraceFlags.tracing == TRACE_EVENTLOG) endEventLogging();
+ if (RtsFlags.TraceFlags.tracing == TRACE_EVENTLOG) {
+ // Use flushAllCapsEventsBufs rather than endEventLogging here since
+ // the latter insists on acquiring all capabilities to flush the eventlog;
+ // this would deadlock if we barfed during a GC.
+ flushAllCapsEventsBufs();
+ }
#endif
abort();
=====================================
rts/Schedule.c
=====================================
@@ -2565,7 +2565,8 @@ resumeThread (void *task_)
traceEventRunThread(cap, tso);
/* Reset blocking status */
- tso->why_blocked = NotBlocked;
+ tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
if ((tso->flags & TSO_BLOCKEX) == 0) {
// avoid locking the TSO if we don't have to
=====================================
rts/Threads.c
=====================================
@@ -334,6 +334,7 @@ unblock:
// just run the thread now, if the BH is not really available,
// we'll block again.
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
appendToRunQueue(cap,tso);
// We used to set the context switch flag here, which would
@@ -1007,6 +1008,20 @@ printAllThreads(void)
}
}
+void
+printGlobalThreads(void)
+{
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ debugBelch("\ngen %d\n", g);
+ for (StgTSO *t = generations[g].threads; t != END_TSO_QUEUE; t = t->global_link) {
+ debugBelch("thread %p (id=%lu)\n", t, t->id);
+ }
+ for (StgTSO *t = generations[g].old_threads; t != END_TSO_QUEUE; t = t->global_link) {
+ debugBelch("thread %p (id=%lu) (old)\n", t, t->id);
+ }
+ }
+}
+
// useful from gdb
void
printThreadQueue(StgTSO *t)
=====================================
rts/Threads.h
=====================================
@@ -46,6 +46,7 @@ bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value);
void printThreadBlockage (StgTSO *tso);
void printThreadStatus (StgTSO *t);
void printAllThreads (void);
+void printGlobalThreads(void);
void printThreadQueue (StgTSO *t);
#endif
=====================================
rts/include/Cmm.h
=====================================
@@ -605,16 +605,20 @@
#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
+#define LOOKS_LIKE_PTR(p) ((p) != NULL && (p) != INVALID_GHC_POINTER)
+
/* Debugging macros */
#define LOOKS_LIKE_INFO_PTR(p) \
- ((p) != NULL && \
+ (LOOKS_LIKE_PTR(p) && \
LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
#define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
(TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
-#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
+#define LOOKS_LIKE_CLOSURE_PTR(p) \
+ ( LOOKS_LIKE_PTR(p) && \
+ LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
/*
* The layout of the StgFunInfoExtra part of an info table changes
=====================================
rts/include/MachDeps.h
=====================================
@@ -107,6 +107,17 @@
#endif
#endif
+// This is the bit-pattern used by the RTS to clear freed memory
+// when +RTS -DZ is in use. The LOOKS_LIKE_*_PTR utilities use this
+// macro to catch clearly invalid pointers
+#if !defined(INVALID_GHC_POINTER)
+#if SIZEOF_HSWORD == 4
+#define INVALID_GHC_POINTER 0xaaaaaaaa
+#else
+#define INVALID_GHC_POINTER 0xaaaaaaaaaaaaaaaa
+#endif
+#endif
+
#if !defined(TAG_BITS)
#if SIZEOF_HSWORD == 4
#define TAG_BITS 2
=====================================
rts/include/rts/storage/ClosureMacros.h
=====================================
@@ -270,6 +270,12 @@ EXTERN_INLINE StgClosure *TAG_CLOSURE(StgWord tag,StgClosure * p)
make sense...
-------------------------------------------------------------------------- */
+EXTERN_INLINE bool LOOKS_LIKE_PTR (const void* p);
+EXTERN_INLINE bool LOOKS_LIKE_PTR (const void* p)
+{
+ return p && (p != (const void*) INVALID_GHC_POINTER);
+}
+
EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p);
EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
{
@@ -280,12 +286,13 @@ EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR (StgWord p);
EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR (StgWord p)
{
- return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
+ return LOOKS_LIKE_PTR((const void*) p) && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
}
EXTERN_INLINE bool LOOKS_LIKE_CLOSURE_PTR (const void *p);
EXTERN_INLINE bool LOOKS_LIKE_CLOSURE_PTR (const void *p)
{
+ if (!LOOKS_LIKE_PTR(p)) return false;
const StgInfoTable *info = RELAXED_LOAD(&UNTAG_CONST_CLOSURE((const StgClosure *) (p))->header.info);
return LOOKS_LIKE_INFO_PTR((StgWord) info);
}
=====================================
rts/posix/Select.c
=====================================
@@ -106,6 +106,7 @@ static bool wakeUpSleepingThreads (Capability *cap, LowResTime now)
}
iomgr->sleeping_queue = tso->_link;
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
tso->_link = END_TSO_QUEUE;
IF_DEBUG(scheduler, debugBelch("Waking up sleeping thread %"
FMT_StgThreadID "\n", tso->id));
@@ -437,6 +438,7 @@ awaitEvent(Capability *cap, bool wait)
debugBelch("Waking up blocked thread %" FMT_StgThreadID "\n",
tso->id));
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
tso->_link = END_TSO_QUEUE;
pushOnRunQueue(cap,tso);
break;
=====================================
rts/sm/MarkWeak.c
=====================================
@@ -251,7 +251,7 @@ static void collectDeadWeakPtrs (generation *gen, StgWeak **dead_weak_ptr_list)
*/
static bool resurrectUnreachableThreads (generation *gen, StgTSO **resurrected_threads)
{
- StgTSO *t, *tmp, *next;
+ StgTSO *t, *next;
bool flag = false;
for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
@@ -272,12 +272,14 @@ static bool resurrectUnreachableThreads (generation *gen, StgTSO **resurrected_t
t->global_link = END_TSO_QUEUE;
continue;
default:
- tmp = t;
+ {
+ StgTSO *tmp = t;
evacuate((StgClosure **)&tmp);
tmp->global_link = *resurrected_threads;
*resurrected_threads = tmp;
flag = true;
}
+ }
}
gen->old_threads = END_TSO_QUEUE;
@@ -387,18 +389,21 @@ static bool tidyWeakList(generation *gen)
}
/*
- * Walk over the `old_threads` list of the given generation and move any
- * reachable threads onto the `threads` list.
+ * Walk over the given generation's thread list and promote TSOs which are
+ * reachable via the heap. This will move the TSO from gen->old_threads to
+ * new_gen->threads.
+ *
+ * This has the side-effect of updating the global thread list to account for
+ * indirections introduced by evacuation.
*/
static void tidyThreadList (generation *gen)
{
- StgTSO *t, *tmp, *next, **prev;
+ StgTSO *next;
+ StgTSO **prev = &gen->old_threads;
- prev = &gen->old_threads;
-
- for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
+ for (StgTSO *t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
- tmp = (StgTSO *)isAlive((StgClosure *)t);
+ StgTSO *tmp = (StgTSO *)isAlive((StgClosure *)t);
if (tmp != NULL) {
t = tmp;
@@ -426,10 +431,9 @@ static void tidyThreadList (generation *gen)
*prev = next;
// move this thread onto the correct threads list.
- generation *new_gen;
- new_gen = Bdescr((P_)t)->gen;
+ generation *new_gen = Bdescr((P_)t)->gen;
t->global_link = new_gen->threads;
- new_gen->threads = t;
+ new_gen->threads = t;
}
}
}
=====================================
rts/sm/Sanity.c
=====================================
@@ -737,14 +737,34 @@ checkSTACK (StgStack *stack)
checkStackChunk(sp, stack_end);
}
+/*
+ * Note [Avoid dangling global_link pointers]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * TSOs are a bit odd in that they have a global_link pointer field
+ * which is not scavenged by the GC. This field is used to track the
+ * generations[_].[old_]threads lists and is ultimately updated by
+ * MarkWeak.c:tidyThreadList.
+
+ * Typically the fact that this field is not scavenged is fine as all reachable
+ * TSOs on the heap are guaranteed to be on some generation's thread list and
+ * therefore will be scavenged by tidyThreadList. However, the sanity checker
+ * poses a bit of a challenge here as it walks heap blocks directly and
+ * therefore may encounter TSOs which aren't reachable via the heap.
+ * For this reason, checkTSO does not check global_link. Instead, we only do
+ * so in checkGlobalTSOList, which by definition will only look at
+ * threads which are reachable via a thread list (and therefore must have won
+ * the forwarding-pointer race).
+ *
+ * See #19146.
+ */
+
void
checkTSO(StgTSO *tso)
{
- StgTSO *next = tso->_link;
const StgInfoTable *info = (const StgInfoTable*) tso->_link->header.info;
load_load_barrier();
- ASSERT(next == END_TSO_QUEUE ||
+ ASSERT(tso->_link == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
info == &stg_TSO_info ||
info == &stg_WHITEHOLE_info); // used to happen due to STM doing
@@ -762,9 +782,11 @@ checkTSO(StgTSO *tso)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj));
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
- (tso->global_link == END_TSO_QUEUE ||
- get_itbl((StgClosure*)tso->global_link)->type == TSO));
+
+ // This assertion is sadly not viable. See Note [Sanity-checking global_link].
+ //ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
+ // (tso->global_link == END_TSO_QUEUE ||
+ // get_itbl((StgClosure*)tso->global_link)->type == TSO));
if (tso->label) {
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->label));
=====================================
rts/win32/AsyncMIO.c
=====================================
@@ -321,6 +321,7 @@ start:
// Terminates the run queue + this inner for-loop.
tso->_link = END_TSO_QUEUE;
tso->why_blocked = NotBlocked;
+ tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
// save the StgAsyncIOResult in the
// stg_block_async_info stack frame, because
// the block_info field will be overwritten by
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/593058d280912576c595a0d6e3619c7f90cbaf0a...280bace467a531d00be6e5d448c528b09b0dbc5a
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/593058d280912576c595a0d6e3619c7f90cbaf0a...280bace467a531d00be6e5d448c528b09b0dbc5a
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230502/6d70131e/attachment-0001.html>
More information about the ghc-commits
mailing list