[Git][ghc/ghc][master] 6 commits: rts: Weak pointer cleanups

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Mon May 15 22:02:42 UTC 2023



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
18ea2295 by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Weak pointer cleanups

Various stylistic cleanups. No functional changes.

- - - - -
c343112f by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Don't force debug output to stderr

Previously `+RTS -Dw -l` would emit debug output to the eventlog while
`+RTS -l -Dw` would emit it to stderr. This was because the parser for
`-D` would unconditionally override the debug output target. Now we
instead only do so if no it is currently `TRACE_NONE`.

- - - - -
a5f5f067 by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Forcibly flush eventlog on barf

Previously we would attempt to flush via `endEventLogging` which can
easily deadlock, e.g., if `barf` fails during GC.

Using `flushEventLog` directly may result in slightly less consistent
eventlog output (since we don't take all capabilities before flushing)
but avoids deadlocking.

- - - - -
73b1e87c by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Assert that pointers aren't cleared by -DZ

This turns many segmentation faults into much easier-to-debug assertion
failures by ensuring that LOOKS_LIKE_*_PTR checks recognize bit-patterns
produced by `+RTS -DZ` clearing as invalid pointers.

This is a bit ad-hoc but this is the debug runtime.

- - - - -
37fb61d8 by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Introduce printGlobalThreads

- - - - -
451d65a6 by Ben Gamari at 2023-05-15T18:02:20-04:00
rts: Don't sanity-check StgTSO.global_link

See Note [Avoid dangling global_link pointers].

Fixes #19146.

- - - - -


9 changed files:

- rts/RtsFlags.c
- rts/RtsMessages.c
- rts/Threads.c
- rts/Threads.h
- rts/include/Cmm.h
- rts/include/rts/Constants.h
- rts/include/rts/storage/ClosureMacros.h
- rts/sm/MarkWeak.c
- rts/sm/Sanity.c


Changes:

=====================================
rts/RtsFlags.c
=====================================
@@ -2201,13 +2201,14 @@ static void read_debug_flags(const char* arg)
     }
     // -Dx also turns on -v.  Use -l to direct trace
     // events to the .eventlog file instead.
-    RtsFlags.TraceFlags.tracing = TRACE_STDERR;
-
-   // sanity implies zero_on_gc
-   if(RtsFlags.DebugFlags.sanity){
-        RtsFlags.DebugFlags.zero_on_gc = true;
-   }
+    if (RtsFlags.TraceFlags.tracing == TRACE_NONE) {
+        RtsFlags.TraceFlags.tracing = TRACE_STDERR;
+    }
 
+    // sanity implies zero_on_gc
+    if(RtsFlags.DebugFlags.sanity){
+         RtsFlags.DebugFlags.zero_on_gc = true;
+    }
 }
 #endif
 


=====================================
rts/RtsMessages.c
=====================================
@@ -186,7 +186,12 @@ rtsFatalInternalErrorFn(const char *s, va_list ap)
 #endif
 
 #if defined(TRACING)
-  if (RtsFlags.TraceFlags.tracing == TRACE_EVENTLOG) endEventLogging();
+  if (RtsFlags.TraceFlags.tracing == TRACE_EVENTLOG) {
+    // Use flushAllCapsEventsBufs rather than endEventLogging here since
+    // the latter insists on acquiring all capabilities to flush the eventlog;
+    // this would deadlock if we barfed during a GC.
+    flushAllCapsEventsBufs();
+  }
 #endif
 
   abort();


=====================================
rts/Threads.c
=====================================
@@ -1007,6 +1007,20 @@ printAllThreads(void)
   }
 }
 
+void
+printGlobalThreads(void)
+{
+  for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
+    debugBelch("\ngen %d\n", g);
+    for (StgTSO *t = generations[g].threads; t != END_TSO_QUEUE; t = t->global_link) {
+      debugBelch("thread %p (id=%lu)\n", t, t->id);
+    }
+    for (StgTSO *t = generations[g].old_threads; t != END_TSO_QUEUE; t = t->global_link) {
+      debugBelch("thread %p (id=%lu) (old)\n", t, t->id);
+    }
+  }
+}
+
 // useful from gdb
 void
 printThreadQueue(StgTSO *t)


=====================================
rts/Threads.h
=====================================
@@ -46,6 +46,7 @@ bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value);
 void printThreadBlockage (StgTSO *tso);
 void printThreadStatus (StgTSO *t);
 void printAllThreads (void);
+void printGlobalThreads(void);
 void printThreadQueue (StgTSO *t);
 #endif
 


=====================================
rts/include/Cmm.h
=====================================
@@ -607,16 +607,20 @@
 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
 
+#define LOOKS_LIKE_PTR(p) ((p) != NULL && (p) != INVALID_GHC_POINTER)
+
 /* Debugging macros */
 #define LOOKS_LIKE_INFO_PTR(p)                                  \
-   ((p) != NULL &&                                              \
+   (LOOKS_LIKE_PTR(p) &&                                        \
     LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
 
 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p)                         \
    ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) &&     \
      (TO_W_(%INFO_TYPE(%STD_INFO(p))) <  N_CLOSURE_TYPES))
 
-#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
+#define LOOKS_LIKE_CLOSURE_PTR(p)                               \
+   ( LOOKS_LIKE_PTR(p) &&                                       \
+     LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
 
 /*
  * The layout of the StgFunInfoExtra part of an info table changes


=====================================
rts/include/rts/Constants.h
=====================================
@@ -215,6 +215,21 @@
 #define LDV_STATE_USE           0x40000000
 #endif /* SIZEOF_VOID_P */
 
+/* See Note [Debugging predicates for pointers] in ClosureMacros.h */
+#if !defined(INVALID_GHC_POINTER)
+#if !defined(DEBUG)
+#define INVALID_GHC_POINTER 0x0
+#elif SIZEOF_VOID_P== 4
+/* N.B. this may result in false-negatives from LOOKS_LIKE_PTR on some
+ * platforms since this is a valid user-space address.
+ */
+#define INVALID_GHC_POINTER 0xaaaaaaaa
+#else
+/* N.B. this is typically a kernel-mode address on 64-bit platforms */
+#define INVALID_GHC_POINTER 0xaaaaaaaaaaaaaaaa
+#endif
+#endif
+
 /* -----------------------------------------------------------------------------
    TSO related constants
    -------------------------------------------------------------------------- */


=====================================
rts/include/rts/storage/ClosureMacros.h
=====================================
@@ -253,22 +253,35 @@ EXTERN_INLINE StgClosure *TAG_CLOSURE(StgWord tag,StgClosure * p)
 #define MK_FORWARDING_PTR(p) (((StgWord)p) | 1)
 #define UN_FORWARDING_PTR(p) (((StgWord)p) - 1)
 
-/* -----------------------------------------------------------------------------
-   DEBUGGING predicates for pointers
-
-   LOOKS_LIKE_INFO_PTR(p)    returns False if p is definitely not an info ptr
-   LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
-
-   These macros are complete but not sound.  That is, they might
-   return false positives.  Do not rely on them to distinguish info
-   pointers from closure pointers, for example.
+/*
+ * Note [Debugging predicates for pointers]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * LOOKS_LIKE_PTR(p)         returns False if p is definitely not a valid pointer
+ * LOOKS_LIKE_INFO_PTR(p)    returns False if p is definitely not an info ptr
+ * LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
+ *
+ * These macros are complete but not sound.  That is, they might
+ * return false positives.  Do not rely on them to distinguish info
+ * pointers from closure pointers, for example.
+ *
+ * We for the most part don't use address-space predicates these days, for
+ * portability reasons, and the fact that code/data can be scattered about the
+ * address space in a dynamically-linked environment.  Our best option is to
+ * look at the alleged info table and see whether it seems to make sense.
+ *
+ * The one exception here is the use of INVALID_GHC_POINTER, which catches
+ * the bit-pattern used by `+RTS -DZ` to zero freed memory (that is 0xaaaaa...).
+ * In the case of most 64-bit platforms, this INVALID_GHC_POINTER is a
+ * kernel-mode address, making this check free of false-negatives. On the other
+ * hand, on 32-bit platforms this typically isn't the case. Consequently, we
+ * only use this check in the DEBUG RTS.
+ */
 
-   We don't use address-space predicates these days, for portability
-   reasons, and the fact that code/data can be scattered about the
-   address space in a dynamically-linked environment.  Our best option
-   is to look at the alleged info table and see whether it seems to
-   make sense...
-   -------------------------------------------------------------------------- */
+EXTERN_INLINE bool LOOKS_LIKE_PTR (const void* p);
+EXTERN_INLINE bool LOOKS_LIKE_PTR (const void* p)
+{
+    return p && (p != (const void*) INVALID_GHC_POINTER);
+}
 
 EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p);
 EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
@@ -280,12 +293,13 @@ EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
 EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR (StgWord p);
 EXTERN_INLINE bool LOOKS_LIKE_INFO_PTR (StgWord p)
 {
-    return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
+    return LOOKS_LIKE_PTR((const void*) p) && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
 }
 
 EXTERN_INLINE bool LOOKS_LIKE_CLOSURE_PTR (const void *p);
 EXTERN_INLINE bool LOOKS_LIKE_CLOSURE_PTR (const void *p)
 {
+    if (!LOOKS_LIKE_PTR(p)) return false;
     const StgInfoTable *info = RELAXED_LOAD(&UNTAG_CONST_CLOSURE((const StgClosure *) (p))->header.info);
     return LOOKS_LIKE_INFO_PTR((StgWord) info);
 }


=====================================
rts/sm/MarkWeak.c
=====================================
@@ -251,7 +251,7 @@ static void collectDeadWeakPtrs (generation *gen, StgWeak **dead_weak_ptr_list)
  */
 static bool resurrectUnreachableThreads (generation *gen, StgTSO **resurrected_threads)
 {
-    StgTSO *t, *tmp, *next;
+    StgTSO *t, *next;
     bool flag = false;
 
     for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
@@ -272,12 +272,14 @@ static bool resurrectUnreachableThreads (generation *gen, StgTSO **resurrected_t
             t->global_link = END_TSO_QUEUE;
             continue;
         default:
-            tmp = t;
+        {
+            StgTSO *tmp = t;
             evacuate((StgClosure **)&tmp);
             tmp->global_link = *resurrected_threads;
             *resurrected_threads = tmp;
             flag = true;
         }
+        }
     }
 
     gen->old_threads = END_TSO_QUEUE;
@@ -387,18 +389,21 @@ static bool tidyWeakList(generation *gen)
 }
 
 /*
- * Walk over the `old_threads` list of the given generation and move any
- * reachable threads onto the `threads` list.
+ * Walk over the given generation's thread list and promote TSOs which are
+ * reachable via the heap. This will move the TSO from gen->old_threads to
+ * new_gen->threads.
+ *
+ * This has the side-effect of updating the global thread list to account for
+ * indirections introduced by evacuation.
  */
 static void tidyThreadList (generation *gen)
 {
-    StgTSO *t, *tmp, *next, **prev;
+    StgTSO *next;
+    StgTSO **prev = &gen->old_threads;
 
-    prev = &gen->old_threads;
-
-    for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
+    for (StgTSO *t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
 
-        tmp = (StgTSO *)isAlive((StgClosure *)t);
+        StgTSO *tmp = (StgTSO *)isAlive((StgClosure *)t);
 
         if (tmp != NULL) {
             t = tmp;
@@ -426,10 +431,9 @@ static void tidyThreadList (generation *gen)
             *prev = next;
 
             // move this thread onto the correct threads list.
-            generation *new_gen;
-            new_gen = Bdescr((P_)t)->gen;
+            generation *new_gen = Bdescr((P_)t)->gen;
             t->global_link = new_gen->threads;
-            new_gen->threads  = t;
+            new_gen->threads = t;
         }
     }
 }


=====================================
rts/sm/Sanity.c
=====================================
@@ -737,14 +737,45 @@ checkSTACK (StgStack *stack)
     checkStackChunk(sp, stack_end);
 }
 
+/*
+ * Note [Sanity-checking global_link]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * TSOs are a bit odd in that they have a global_link pointer field
+ * which is not scavenged by the GC. This field is used to track the
+ * generations[_].[old_]threads lists and is ultimately updated by
+ * MarkWeak.c:tidyThreadList, which walks the thread lists and updates
+ * the global_link references of all TSOs that it finds.
+ *
+ * Typically the fact that this field is not scavenged is fine as all reachable
+ * TSOs on the heap are guaranteed to be on some generation's thread list and
+ * therefore will be scavenged by tidyThreadList. However, the sanity checker
+ * poses a bit of a challenge here as it walks heap blocks directly and
+ * therefore may encounter TSOs which aren't reachable via the the global
+ * thread lists.
+ *
+ * How might such orphan TSOs arise? One such way is via racing evacuation.
+ * Specifically, if two GC threads attempt to simultaneously evacuate a
+ * TSO, both threads will produce a copy of the TSO in their respective
+ * to-space. However, only one will succeed in turning the from-space TSO into
+ * a forwarding pointer. Consequently, tidyThreadList will find and update the
+ * copy which "won". Meanwhile, the "losing" copy will contain a dangling
+ * global_link pointer into from-space.
+ *
+ * For this reason, checkTSO does not check global_link. Instead, we only do
+ * so in checkGlobalTSOList, which by definition will only look at
+ * threads which are reachable via a thread list (and therefore must have won
+ * the forwarding-pointer race).
+ *
+ * See #19146.
+ */
+
 void
 checkTSO(StgTSO *tso)
 {
-    StgTSO *next = tso->_link;
     const StgInfoTable *info = (const StgInfoTable*) tso->_link->header.info;
     load_load_barrier();
 
-    ASSERT(next == END_TSO_QUEUE ||
+    ASSERT(tso->_link == END_TSO_QUEUE ||
            info == &stg_MVAR_TSO_QUEUE_info ||
            info == &stg_TSO_info ||
            info == &stg_WHITEHOLE_info); // used to happen due to STM doing
@@ -762,9 +793,12 @@ checkTSO(StgTSO *tso)
     ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq));
     ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions));
     ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj));
-    ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
-            (tso->global_link == END_TSO_QUEUE ||
-             get_itbl((StgClosure*)tso->global_link)->type == TSO));
+
+    // This assertion sadly does not always hold.
+    // See Note [Sanity-checking global_link] for why.
+    //ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
+    //        (tso->global_link == END_TSO_QUEUE ||
+    //         get_itbl((StgClosure*)tso->global_link)->type == TSO));
 
     if (tso->label) {
         ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->label));



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/21f3aae7371469beb3950a6170db6c5668379ff3...451d65a6913d85088a350be8e9b7a6d834453326

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/21f3aae7371469beb3950a6170db6c5668379ff3...451d65a6913d85088a350be8e9b7a6d834453326
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230515/f67ae666/attachment-0001.html>


More information about the ghc-commits mailing list