[Git][ghc/ghc][wip/ghc-debug] 4 commits: Remove addition of findPtrCb and related changes

David Eichmann gitlab at gitlab.haskell.org
Mon Sep 21 19:58:36 UTC 2020



David Eichmann pushed to branch wip/ghc-debug at Glasgow Haskell Compiler / GHC


Commits:
e6b5bbb2 by David Eichmann at 2020-09-18T10:33:29+01:00
Remove addition of findPtrCb and related changes

- - - - -
fba0e4d8 by David Eichmann at 2020-09-18T14:23:11+01:00
Rename rts_unpause to rts_resume

- - - - -
9d010ebc by David Eichmann at 2020-09-21T20:36:40+01:00
Simplify and speedup test

- - - - -
1762210f by David Eichmann at 2020-09-21T20:57:00+01:00
WIP Clarify multi-threaded behavior and correct usage of the ghc-debug API

- - - - -


18 changed files:

- includes/RtsAPI.h
- − includes/rts/PrinterAPI.h
- libraries/ghc-heap/tests/list_threads_and_misc_roots_c.c
- rts/Printer.c
- rts/Printer.h
- rts/RtsAPI.c
- testsuite/tests/rts/ghc-debug/all.T
- + testsuite/tests/rts/ghc-debug/ghc_debug.c
- + testsuite/tests/rts/ghc-debug/ghc_debug.h
- + testsuite/tests/rts/ghc-debug/ghc_debug_01.hs
- + testsuite/tests/rts/ghc-debug/ghc_debug_02.hs
- + testsuite/tests/rts/ghc-debug/ghc_debug_03.hs
- − testsuite/tests/rts/ghc-debug/rts_pause_and_unpause.hs
- − testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.c
- − testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.h
- testsuite/tests/rts/ghc-debug/shouldfail/all.T
- testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.hs
- + testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.stderr


Changes:

=====================================
includes/RtsAPI.h
=====================================
@@ -485,36 +485,23 @@ void rts_checkSchedStatus (char* site, Capability *);
 
 SchedulerStatus rts_getSchedStatus (Capability *cap);
 
-// Various bits of information that need to be persisted between rts_pause and
-// rts_unpause.
-typedef struct RtsPaused_ {
-    // The task (i.e. OS thread) on which rts_pause() was called. This is used
-    // in rts_unpause() to check that it is called on the same OS thread.
-    Task *pausing_task;
-
-    // The capability owned by pausing_task (possibly NULL) just before calling
-    // rts_unpause(). On rts_unpause(), the pausing_task will retain ownership
-    // of this capability (if not NULL).
-    Capability *capability;
-} RtsPaused;
-
-// Halt execution of all Haskell threads by acquiring all capabilities. It is
-// different to rts_lock() because rts_pause() pauses all capabilities while
-// rts_lock() only pauses a single capability. rts_pause() and rts_unpause()
-// have to be executed from the same OS thread (i.e. myTask() must stay the
-// same). Returns the currently owned capability (possibly NULL). This must be
-// passed back to rts_unpause().
-RtsPaused rts_pause (void);
-
-// Counterpart of rts_pause: Continue from a pause.
-// rts_pause() and rts_unpause() have to be executed from the same OS thread
-// (i.e. myTask() must stay the same).
-void rts_unpause (RtsPaused);
-
-// Tells the current state of the RTS regarding rts_pause() and rts_unpause().
+// Halt execution of all Haskell threads (OS threads may continue) by acquiring
+// all capabilities. Blocks untill pausing is completed. This is different to
+// rts_lock() because rts_pause() pauses all capabilities while rts_lock() only
+// pauses a single capability. rts_pause() and rts_resume() must be executed
+// from the same OS thread. Must not be called when the rts is already paused.
+void rts_pause (void);
+
+// Counterpart of rts_pause: Continue from a pause. All capabilities are
+// released. Must be done while RTS is paused and on the same thread as
+// rts_pause().
+void rts_resume (void);
+
+// Tells the current state of the RTS regarding rts_pause() and rts_resume().
 bool rts_isPaused(void);
 
-// List all live threads. Must be done while RTS is paused (see rts_pause()).
+// List all live threads. Must be done while RTS is paused and on the same
+// thread as rts_pause().
 typedef void (*ListThreadsCb)(void *user, StgTSO *);
 void rts_listThreads(ListThreadsCb cb, void *user);
 


=====================================
includes/rts/PrinterAPI.h deleted
=====================================
@@ -1,23 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 2020
- *
- * Public API of closure printing functions.
- *
- * ---------------------------------------------------------------------------*/
-
-#pragma once
-
-// findPtrCb takes a callback of type FindPtrCb, so external tools (such as
-// ghc-debug) can invoke it and intercept the intermediate results.
-// When findPtrCb successfully finds a closure containing an address then the
-// callback is called on the address of that closure.
-// The `StgClosure` argument is an untagged closure pointer.
-// `user` points to any data provided by the caller. It's not used internally.
-typedef void (*FindPtrCb)(void *user, StgClosure *);
-
-void findPtrCb(FindPtrCb cb, void *, P_ p);
-
-// Special case of findPtrCb: Uses a default callback, that prints the closure
-// pointed to by p.
-void findPtr(P_ p, int follow);


=====================================
libraries/ghc-heap/tests/list_threads_and_misc_roots_c.c
=====================================
@@ -26,7 +26,7 @@ void* listThreads_thread(void* unused){
     RtsPaused paused = rts_pause();
     rts_listThreads(&collectTSOsCallback, NULL);
     rts_listMiscRoots(&collectMiscRootsCallback, NULL);
-    rts_unpause(paused);
+    rts_resume(paused);
 
     return NULL;
 }


=====================================
rts/Printer.c
=====================================
@@ -852,69 +852,37 @@ extern void DEBUG_LoadSymbols( const char *name STG_UNUSED )
 
 #endif /* USING_LIBBFD */
 
-static void
-findPtr_default_callback(void *user STG_UNUSED, StgClosure * closure){
-  debugBelch("%p = ", closure);
-  printClosure((StgClosure *)closure);
-}
-
+void findPtr(P_ p, int);                /* keep gcc -Wall happy */
 
 int searched = 0;
 
-// Search through a block (and it's linked blocks) for closures that reference
-// p. The size of arr is respected and the search is stoped when arr is full.
-// TODO: This may produce false positives if e.g. a closure contains an Int that
-// happens to have the same value as memory address p. Returns the new i value
-// i.e. the next free position in the arr array.
 static int
-findPtrBlocks
-    ( FindPtrCb cb      // [in] callback called whenever a closure referencing p is found.
-    , void* user        // [in] unused other than to pass to the callback.
-    , StgPtr p          // [in] The pointer to search for.
-    , bdescr *bd        // [in] The block descriptor of the block from which to start searching.
-    , StgPtr arr[]      // [in/out] All found closure addresses are written into this array.
-    , int arr_size      // [in] The size of arr.
-    , int i             // [in] The current position in arr.
-    )
+findPtrBlocks (StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i)
 {
-    StgPtr candidate, retainer, end;
-
-    // Iterate over all blocks.
+    StgPtr q, r, end;
     for (; bd; bd = bd->link) {
         searched++;
-        // Scan the block looking for a pointer equal to p.
-        for (candidate = bd->start; candidate < bd->free; candidate++) {
-            if (UNTAG_CONST_CLOSURE((StgClosure*)*candidate) == (const StgClosure *)p) {
-                // *candidate looks like a pointer equal to p, but it might not
-                // be a pointer type i.e. it may just be an Int that happens to
-                // have the same value as memory address p.
-
-                // We stop if the output array is full.
+        for (q = bd->start; q < bd->free; q++) {
+            if (UNTAG_CONST_CLOSURE((StgClosure*)*q) == (const StgClosure *)p) {
                 if (i < arr_size) {
-                    for (retainer = bd->start; retainer < bd->free; retainer = end) {
+                    for (r = bd->start; r < bd->free; r = end) {
                         // skip over zeroed-out slop
-                        while (*retainer == 0) retainer++;
-
-                        // A quick check that retainer looks like a InfoTable pointer.
-                        if (!LOOKS_LIKE_CLOSURE_PTR(retainer)) {
+                        while (*r == 0) r++;
+                        if (!LOOKS_LIKE_CLOSURE_PTR(r)) {
                             debugBelch("%p found at %p, no closure at %p\n",
-                                       p, candidate, retainer);
+                                       p, q, r);
                             break;
                         }
-                        end = retainer + closure_sizeW((StgClosure*)retainer);
-                        if (candidate < end) {
-                            // end has just increased past candidate. Hence
-                            // candidate is in the closure starting at retainer.
-                            cb(user, (StgClosure *) retainer);
-                            arr[i++] = retainer;
+                        end = r + closure_sizeW((StgClosure*)r);
+                        if (q < end) {
+                            debugBelch("%p = ", r);
+                            printClosure((StgClosure *)r);
+                            arr[i++] = r;
                             break;
                         }
                     }
-                    if (retainer >= bd->free) {
-                        // TODO: How is this case reachable? Perhaps another
-                        // thread overwrote *q after we found q and before we
-                        // found the corresponding closure retainer.
-                        debugBelch("%p found at %p, closure?", p, candidate);
+                    if (r >= bd->free) {
+                        debugBelch("%p found at %p, closure?", p, q);
                     }
                 } else {
                     return i;
@@ -925,19 +893,8 @@ findPtrBlocks
     return i;
 }
 
-// Search for for closures that reference p. This may NOT find all such closures
-// (e.g. the nursery is not searched). This may also find false positives if
-// e.g. a closure contains an Int that happens to have the same value as memory
-// address p. The number of results is capped at 1024. The callback is called
-// for each closure found.
-static void
-findPtr_gen
-    ( FindPtrCb cb  // [in] Callback called for each closure found referencing p.
-    , void *user    // [in] Unused other than to pass to the callback.
-    , P_ p          // [in] Search for closures referencing this address.
-    , int follow    // [in] If set to 1 and only a single closure was found,
-                    //      recursively find pointers to that  if to recurse (call findPtr on the ). May only be 1 if cb==findPtr_default_callback.
-    )
+void
+findPtr(P_ p, int follow)
 {
   uint32_t g, n;
   bdescr *bd;
@@ -959,38 +916,24 @@ findPtr_gen
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
       bd = generations[g].blocks;
-      i = findPtrBlocks(cb, user,p,bd,arr,arr_size,i);
+      i = findPtrBlocks(p,bd,arr,arr_size,i);
       bd = generations[g].large_objects;
-      i = findPtrBlocks(cb, user, p,bd,arr,arr_size,i);
+      i = findPtrBlocks(p,bd,arr,arr_size,i);
       if (i >= arr_size) return;
       for (n = 0; n < n_capabilities; n++) {
-          i = findPtrBlocks(cb, user, p, gc_threads[n]->gens[g].part_list,
+          i = findPtrBlocks(p, gc_threads[n]->gens[g].part_list,
                             arr, arr_size, i);
-          i = findPtrBlocks(cb, user, p, gc_threads[n]->gens[g].todo_bd,
+          i = findPtrBlocks(p, gc_threads[n]->gens[g].todo_bd,
                             arr, arr_size, i);
       }
       if (i >= arr_size) return;
   }
   if (follow && i == 1) {
-      ASSERT(cb == &findPtr_default_callback);
       debugBelch("-->\n");
-      // Non-standard callback expects follow=0
       findPtr(arr[0], 1);
   }
 }
 
-// Special case of findPtrCb: Uses a default callback, that prints the closure
-// pointed to by p.
-void findPtr(P_ p, int follow){
-  findPtr_gen(&findPtr_default_callback, NULL, p, follow);
-}
-
-// Call cb on the closure pointed to by p.
-// FindPtrCb is documented where it's defined.
-void findPtrCb(FindPtrCb cb, void* user, P_ p){
-  findPtr_gen(cb, user, p, 0);
-}
-
 const char *what_next_strs[] = {
   [0]               = "(unknown)",
   [ThreadRunGHC]    = "ThreadRunGHC",


=====================================
rts/Printer.h
=====================================
@@ -10,8 +10,6 @@
 
 #include "BeginPrivate.h"
 
-#include "rts/PrinterAPI.h"
-
 extern void        printPtr        ( StgPtr p );
 extern void        printObj        ( StgClosure *obj );
 


=====================================
rts/RtsAPI.c
=====================================
@@ -647,63 +647,173 @@ rts_unlock (Capability *cap)
 }
 
 #if defined(THREADED_RTS)
-static bool rts_paused = false;
+// The task that paused the RTS. The rts_pausing_task variable is owned by the
+// task that owns all capabilities (there is at most one such task).
+Task * rts_pausing_task = NULL;
 
 // See RtsAPI.h
-RtsPaused rts_pause (void)
-{
-    if (rts_isPaused())
+void rts_pause (void)
+{
+    // The current task must not own a capability. This is true when a new
+    // thread is stareted, or when making a safe FFI call. If
+    // `task->cap->running_task == task` then that is also ok because the
+    // capability can be taken by other capabilities. Note that we always check
+    // (rather than ASSERT which only happens with `-debug`) because this is a
+    // user facing function and we want good error reporting. We also don't
+    // expect rts_pause to be performance critical.
+    Task * task = getMyTask();
+    if (task->cap && task->cap->running_task == task)
     {
-        errorBelch("error: rts_pause: attempting to pause an already paused RTS.");
+        // This task owns a capability (at it can't be taken by other capabilities).
+        errorBelch(task->cap->in_haskell
+            ? ("error: rts_pause: attempting to pause via an unsafe FFI call.\n"
+               "   Perhaps a 'foreign import unsafe' should be 'safe'?")
+            : ("error: rts_pause: attempting to pause from a Task that owns a capability.\n"
+               "   Have you already acquired a capability e.g. with rts_lock?"));
         stg_exit(EXIT_FAILURE);
     }
 
-    RtsPaused rtsPaused;
-    rtsPaused.pausing_task = newBoundTask();
-
-    // Check if we own a capability. This is needed to correctly call
-    // stopAllCapabilities() and to know if to keep ownership or release the
-    // capability on rts_unpause().
-    Capability * cap = rtsPaused.pausing_task->cap;
-    bool taskOwnsCap = cap != NULL && cap->running_task == rtsPaused.pausing_task;
-    rtsPaused.capability = taskOwnsCap ? cap : NULL;
-    stopAllCapabilities(taskOwnsCap ? &rtsPaused.capability : NULL, rtsPaused.pausing_task);
+    // Note that if the rts was paused by another task/thread, then we block
+    // instead of error. It's only an error if the same thread tries to pause
+    // twice in a row.
+    if (rts_pausing_task == task)
+    {
+        errorBelch("error: rts_pause: attempting to pause an already paused RTS.");
+        stg_exit(EXIT_FAILURE);
+    }
 
-    rts_paused = true;
-    return rtsPaused;
+    // NOTE ghc-debug deadlock:
+    //
+    // stopAllCapabilities attempts to acquire all capabilities and will only
+    // block if an existing thread/task:
+    //
+    // 1. Owns a capability and
+    // 2. Is deadlocked i.e. refuses to yield/release its capability.
+    //
+    // Let's assume the rest of the RTS is deadlock free (tasks will eventually
+    // yield their capability) outside of using the ghc-debug API:
+    //
+    // * rts_pause
+    // * rts_resume
+    // * rts_isPaused
+    // * rts_listThreads
+    // * rts_listMiscRoots
+    //
+    // Except rts_pause, none of these functions acquire a lock and so cannot
+    // block. rts_pause may block on stopAllCapabilities, but we ensure that the
+    // current task does not own a capability before calling
+    // stopAllCapabilities. Hence, (1) does not hold given an isolated call to
+    // rts_pause. The only lose end is that after rts_pause, we now have a task
+    // that (by design) owns all capabilities (point (1) above) and is refusing
+    // to yield them (point (2) above). Indeed, if 2 threads concurrently call
+    // rts_pause, one will block until the other calls rts_resume. As "correct
+    // usage" of this API requires calling rts_resume, this case is a non-issue,
+    // but does imply the awkward quirk that if you call rts_pause on many
+    // threads, they will all "take turns" pausing the rts, blocking until it is
+    // their turn. In adition, any API function that attempts to acquire a
+    // capability (e.g. rts_lock), will block until rts_resume is called. Of
+    // course, all ghc-debug API functions  besides rts_pause do not attempt to
+    // acquire a capability.
+    //
+    // The moral to this story is that you will not dealock as long as you, on
+    // the same thread:
+    //
+    // * First call rts_pause
+    // * Then avoid rts functions other than:
+    //      * rts_isPaused
+    //      * rts_listThreads
+    //      * rts_listMiscRoots
+    //      * AND dereferencing/inspect the heap directly e.g. using
+    //        rts_listThreads/rts_listMiscRoots and the ghc-heap library.
+    // * Finally call rts_resume
+    //
+    // TODO
+    //
+    // I think we should return Capability*. We should be able to use the rest
+    // of the rts API with that token. There are a few functions that take
+    // `Capability **` implying that it may change capabilities. I need to
+    // confirm, but I think that in our case, we'll just end up with the same
+    // capability since all others are acquired already. These other API
+    // functions may change the heap, but it is up to the caller to account for
+    // that. Is it possible that the API can be used to start executing a
+    // haskell thread?!?!?! That's perhaps ok as long as we reacquire the
+    // capability at the end so we're paused.
+    task = newBoundTask(); // TODO I'm not sure why we need this. rts_lock does this.
+    stopAllCapabilities(NULL, task);
+
+    // Now we own all capabilities so we own rts_pausing_task.
+    rts_pausing_task = task;
 }
 
 // See RtsAPI.h
-void rts_unpause (RtsPaused rtsPaused)
+void rts_resume (void)
 {
-    if (!rts_isPaused())
+    Task * task = getMyTask(); // This thread has ownership over its Task.
+
+    if (task != rts_pausing_task)
     {
-        errorBelch("error: rts_pause: attempting to resume an RTS that is not paused.");
+        // We don't have a lock on rts_pausing_task but we are garanteed that
+        // rts_pausing_task won't be set the current task (because the current
+        // task is here now!), so the error messages are still correct.
+        errorBelch (rts_isPaused()
+            ? "error: rts_resume: called from a different OS thread than rts_pause."
+            : "error: rts_resume: the rts is not paused. Did you forget to call rts_pause?");
+
         stg_exit(EXIT_FAILURE);
     }
-    if (rtsPaused.pausing_task != getMyTask())
+
+    // Check that we own all capabilities.
+    for (uint i = 0; i < n_capabilities; i++)
     {
-        errorBelch("error: rts_unpause was called from a different OS thread than rts_pause.");
-        stg_exit(EXIT_FAILURE);
+        Capability *cap = capabilities[i];
+        if (cap->running_task != task)
+        {
+            errorBelch ("error: rts_resume: the pausing thread does not own all capabilities."
+                        "   Have you manually released a capability after calling rts_pause?");
+            stg_exit(EXIT_FAILURE);
+        }
     }
 
-    rts_paused = false;
-    releaseAllCapabilities(n_capabilities, rtsPaused.capability, getMyTask());
+    // Now we own all capabilities so we own rts_pausing_task.
+    rts_pausing_task = NULL;
+
+    // releaseAllCapabilities will not block because the current task owns all
+    // capabilities.
+    releaseAllCapabilities(n_capabilities, NULL, task);
     exitMyTask();
 }
 
 // See RtsAPI.h
 bool rts_isPaused(void)
 {
-    return rts_paused;
+    return rts_pausing_task != NULL;
 }
 
-// Call cb for all StgTSOs. *user is a user defined payload to cb. It's not
-// used by the RTS.
-// rts_listThreads should only be called when the RTS is paused, i.e. rts_pause
-// was called before.
+// Check that the rts_pause was called on this thread/task. If not, outputs an
+// error and exits with EXIT_FAILURE.
+void assert_isPausedOnMyTask(void)
+{
+    if (rts_pausing_task == NULL)
+    {
+        errorBelch ("error: rts_listThreads: the rts is not paused. Did you forget to call rts_pause?");
+        stg_exit(EXIT_FAILURE);
+    }
+    else if (rts_pausing_task != myTask())
+    {
+        errorBelch ("error: rts_listThreads: must be called from the same thread as rts_pause.");
+        stg_exit(EXIT_FAILURE);
+    }
+}
+
+// See RtsAPI.h
 void rts_listThreads(ListThreadsCb cb, void *user)
 {
+    assert_isPausedOnMyTask();
+
+    // rts_pausing_task == myTask(). This implies that the rts is paused and can
+    // only be resumed by the current thread. Hence it is safe to read global
+    // thread data.
+
     for (uint32_t g=0; g < RtsFlags.GcFlags.generations; g++) {
         StgTSO *tso = generations[g].threads;
         while (tso != END_TSO_QUEUE) {
@@ -724,13 +834,11 @@ static void list_roots_helper(void *user, StgClosure **p) {
     ctx->cb(ctx->user, *p);
 }
 
-// Call cb for all StgClosures reachable from threadStableNameTable and
-// threadStablePtrTable. *user is a user defined payload to cb. It's not
-// used by the RTS.
-// rts_listMiscRoots should only be called when the RTS is paused, i.e.
-// rts_pause was called before.
+// See RtsAPI.h
 void rts_listMiscRoots (ListRootsCb cb, void *user)
 {
+    assert_isPausedOnMyTask();
+
     struct list_roots_ctx ctx;
     ctx.cb = cb;
     ctx.user = user;
@@ -740,21 +848,18 @@ void rts_listMiscRoots (ListRootsCb cb, void *user)
 }
 
 #else
-RtsPaused rts_pause (void)
+void rts_pause (void)
 {
     errorBelch("Warning: Pausing the RTS is only possible for "
                "multithreaded RTS.");
-    RtsPaused rtsPaused = {
-            .pausing_task = NULL,
-            .capability = NULL
-        };
-    return rtsPaused;
+    stg_exit(EXIT_FAILURE);
 }
 
-void rts_unpause (RtsPaused cap STG_UNUSED)
+void rts_resume (void)
 {
     errorBelch("Warning: Unpausing the RTS is only possible for "
                "multithreaded RTS.");
+    stg_exit(EXIT_FAILURE);
 }
 
 bool rts_isPaused()


=====================================
testsuite/tests/rts/ghc-debug/all.T
=====================================
@@ -1,6 +1,18 @@
-test('rts_pause_and_unpause',
-     [ extra_files(['rts_pause_and_unpause_c.c','rts_pause_and_unpause_c.h']),
+test('ghc_debug_01',
+     [ extra_files(['ghc_debug.c','ghc_debug.h']),
       ignore_stdout,
       ignore_stderr
      ],
-     multi_compile_and_run, ['rts_pause_and_unpause', [('rts_pause_and_unpause_c.c','')], '-threaded '])
\ No newline at end of file
+     multi_compile_and_run, ['ghc_debug_01', [('ghc_debug.c','')], '-threaded '])
+test('ghc_debug_02',
+     [ extra_files(['ghc_debug.c','ghc_debug.h']),
+      ignore_stdout,
+      ignore_stderr
+     ],
+     multi_compile_and_run, ['ghc_debug_02', [('ghc_debug.c','')], '-threaded '])
+test('ghc_debug_03',
+     [ extra_files(['ghc_debug.c','ghc_debug.h']),
+      ignore_stdout,
+      ignore_stderr
+     ],
+     multi_compile_and_run, ['ghc_debug_03', [('ghc_debug.c','')], '-threaded '])


=====================================
testsuite/tests/rts/ghc-debug/ghc_debug.c
=====================================
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include <unistd.h>
+
+#include "Rts.h"
+#include "RtsAPI.h"
+
+#include "ghc_debug.h"
+
+void expectNoChange(const char * msg, volatile unsigned int * count);
+void expectChange(const char * msg, volatile unsigned int * count);
+
+// Test rts_pause/rts_resume by observing a count that we expect to be
+// incremented by concurrent Haskell thread(s). We expect rts_pause to stop
+// those threads and hence stop incrementing the count.
+void pauseAndResume
+    ( bool assertNotPaused // [in] True to enable assertions before rts_pause and after rts_resume.
+                           // Often disabled when calling this concurrently.
+    , volatile unsigned int * count  // [in] Haskell threads should be forever incrementing this.
+    )
+{
+    // Assert the RTS is resumed.
+    if (assertNotPaused)
+    {
+        expectChange("RTS should be running", count);
+        if(rts_isPaused()) {
+            errorBelch("Expected the RTS to be resumed.");
+            exit(1);
+        }
+    }
+
+    // Pause and assert.
+    rts_pause();
+    if(!rts_isPaused()) {
+        errorBelch("Expected the RTS to be paused.");
+        exit(1);
+    }
+
+    expectNoChange("RTS should be paused", count);
+
+    // Resume.
+    rts_resume();
+
+    // Assert the RTS is resumed.
+    if (assertNotPaused)
+    {
+        expectChange("RTS should be resumed", count);
+        if(rts_isPaused()) {
+            errorBelch("Expected the RTS to be resumed.");
+            exit(1);
+        }
+    }
+}
+
+void* pauseAndResumeViaThread_helper(volatile unsigned int * count)
+{
+    pauseAndResume(false, count);
+    return NULL;
+}
+
+// Call pauseAndResume via a new thread and return the thread ID.
+unsigned long pauseAndResumeViaThread
+    ( volatile unsigned int * count  // [in] Haskell threads should be forever incrementing this.
+    )
+{
+    pthread_t threadId;
+    pthread_create(&threadId, NULL, &pauseAndResumeViaThread_helper, count);
+    return threadId;
+}
+
+const int TIMEOUT = 1000000; // 1 second
+
+// Wait for &count to change (else exit(1) after TIMEOUT).
+void expectChange(const char * msg, volatile unsigned int * count)
+{
+    unsigned int count_0 = *count;
+    int microSecondsLeft = TIMEOUT;
+    unsigned int sleepTime = 10000;
+    while (true)
+    {
+        usleep(sleepTime);
+        microSecondsLeft -= sleepTime;
+
+        if (count_0 != *count)
+        {
+            // Change detected.
+            return;
+        }
+
+        if (microSecondsLeft < 0)
+        {
+            printf("Expected: %s\n", msg);
+            exit(1);
+        }
+    }
+}
+
+// Ensure &count does NOT change (for TIMEOUT else exit(1)).
+void expectNoChange(const char * msg, volatile unsigned int * count)
+{
+    unsigned int count_0 = *count;
+    int microSecondsLeft = TIMEOUT;
+    unsigned int sleepTime = 10000;
+    while (true)
+    {
+        usleep(sleepTime);
+        microSecondsLeft -= sleepTime;
+
+        if (count_0 != *count)
+        {
+            // Change detected.
+            printf("Expected: %s\n", msg);
+            exit(1);
+        }
+
+        if (microSecondsLeft < 0)
+        {
+            return;
+        }
+    }
+}


=====================================
testsuite/tests/rts/ghc-debug/ghc_debug.h
=====================================
@@ -0,0 +1,3 @@
+
+void pauseAndResume(bool assertNotPaused, volatile unsigned int * count);
+unsigned long pauseAndResumeViaThread(volatile unsigned int * count);


=====================================
testsuite/tests/rts/ghc-debug/ghc_debug_01.hs
=====================================
@@ -0,0 +1,38 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+
+import Control.Concurrent
+import Control.Concurrent.MVar
+import Control.Monad
+import Foreign.C.Types
+import Foreign.Marshal.Alloc
+import Foreign.Ptr
+import Foreign.Storable
+import GHC.Stack
+
+foreign import ccall safe "ghc_debug.h pauseAndResume"
+    safe_pauseAndResume_c :: CBool -> Ptr CUInt -> IO ()
+
+-- Simple test of rts_pause() followed by rts_resume()
+main :: IO ()
+main = do
+  alloca $ \countPtr -> do
+    poke countPtr 0
+
+    -- forever increment count. Changes will be observed from the c code.
+    sequence_ $ replicate 4 $ forkIO $ forever $ do
+      count <- peek countPtr
+      poke countPtr (count + 1)
+      threadDelay 10000   -- 10 milliseconds
+
+    -- Test rts_pause/rts_resume.
+    safe_pauseAndResume_c cTrue countPtr
+
+    -- Test rts_pause/rts_resume from a unbound (worker) thread.
+    mvar <- newEmptyMVar
+    forkIO $ do
+      safe_pauseAndResume_c cTrue countPtr
+      putMVar mvar ()
+    takeMVar mvar
+
+cTrue :: CBool
+cTrue = 1


=====================================
testsuite/tests/rts/ghc-debug/ghc_debug_02.hs
=====================================
@@ -0,0 +1,38 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+
+import Control.Concurrent
+import Control.Concurrent.MVar
+import Control.Monad
+import Foreign.C.Types
+import Foreign.Marshal.Alloc
+import Foreign.Ptr
+import Foreign.Storable
+
+foreign import ccall safe "ghc_debug.h pauseAndResumeViaThread"
+    safe_pauseAndResumeViaThread_c :: Ptr CUInt -> IO CULong
+
+foreign import ccall safe "pthread.h pthread_join"
+    safe_pthread_join_c :: CULong -> IO ()
+
+-- Simple test of rts_pause() followed by rts_resume() via a new thread created
+-- in c code.
+main :: IO ()
+main = do
+  alloca $ \countPtr -> do
+    poke countPtr 0
+
+    -- forever increment count. Changes will be observed from the c code.
+    sequence_ $ replicate 4 $ forkIO $ forever $ do
+      count <- peek countPtr
+      poke countPtr (count + 1)
+      threadDelay 10000   -- 10 milliseconds
+
+    -- Test rts_pause/rts_resume.
+    safe_pthread_join_c =<< safe_pauseAndResumeViaThread_c countPtr
+
+    -- Test rts_pause/rts_resume from a unbound (worker) thread.
+    mvar <- newEmptyMVar
+    forkIO $ do
+      safe_pthread_join_c =<< safe_pauseAndResumeViaThread_c countPtr
+      putMVar mvar ()
+    takeMVar mvar


=====================================
testsuite/tests/rts/ghc-debug/ghc_debug_03.hs
=====================================
@@ -0,0 +1,52 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+
+import Control.Concurrent
+import Control.Concurrent.MVar
+import Control.Monad
+import Foreign.C.Types
+import Foreign.Marshal.Alloc
+import Foreign.Ptr
+import Foreign.Storable
+import System.Exit
+import System.Timeout
+
+foreign import ccall safe "ghc_debug.h pauseAndResume"
+    safe_pauseAndResume_c :: CBool -> Ptr CUInt -> IO ()
+
+-- Test that concurrent calls to rts_pause()/rts_resume() doesn't cause deadlock.
+main :: IO ()
+main = do
+  alloca $ \countPtr -> do
+    poke countPtr 0
+
+    -- forever increment count. Changes will be observed from the c code.
+    sequence_ $ replicate 4 $ forkIO $ forever $ do
+      count <- peek countPtr
+      poke countPtr (count + 1)
+      threadDelay 10000   -- 10 milliseconds
+
+    -- Note that each call blocks for about a second, so this will take 5
+    -- seconds to complete.
+    let n = 5
+    mvars <- sequence $ replicate n newEmptyMVar
+    forM_ mvars $ \mvar -> forkIO $ do
+      safe_pauseAndResume_c
+        -- Don't check rts_isPaused() before rts_pause nore after rts_resume
+        -- because we're doing this concurrently so that would introduce a race
+        -- condition.
+        cFalse
+        countPtr
+      putMVar mvar ()
+
+    -- Wait (at least 2n seconds to be safe) for all threads to finish.
+    result <- timeout (2 * n * 1000000) (mapM_ takeMVar mvars)
+    case result of
+      Nothing -> do
+        putStrLn "Not all rts_pause/rts_resume threads have finished. Assuming deadlocked and failing test."
+        exitFailure
+      Just () -> do
+        putStrLn "All threads finished"
+        exitSuccess
+
+cFalse :: CBool
+cFalse = 0


=====================================
testsuite/tests/rts/ghc-debug/rts_pause_and_unpause.hs deleted
=====================================
@@ -1,98 +0,0 @@
-{-# LANGUAGE ForeignFunctionInterface #-}
-
-import Data.Word
-import Data.IORef
-import GHC.Clock
-import Control.Concurrent
-import Foreign.C.Types
-import System.Mem
-import Control.Monad
-
-foreign import ccall safe "rts_pause_and_unpause_c.h pauseAndUnpause"
-    safe_pauseAndUnpause_c :: IO ()
-
-foreign import ccall unsafe "rts_pause_and_unpause_c.h pauseAndUnpause"
-    unsafe_pauseAndUnpause_c :: IO ()
-
-foreign import ccall unsafe "rts_pause_and_unpause_c.h pauseAndUnpauseViaNewThread"
-    unsafe_pauseAndUnpauseViaNewThread_c :: IO ()
-
--- Note that these should be unsafe FFI calls. rts_pause() does not pause or
--- wait for safe FFI calls, as they do not own a capability.
-foreign import ccall unsafe "rts_pause_and_unpause_c.h getUnixTime"
-    getUnixTime_c :: IO CTime
-
-foreign import ccall unsafe "rts_pause_and_unpause_c.h getPauseBegin"
-    getPauseBegin_c :: IO CTime
-
-foreign import ccall unsafe "rts_pause_and_unpause_c.h getPauseEnd"
-    getPauseEnd_c :: IO CTime
-
-clockEachSecond :: IORef [CTime] -> IO ()
-clockEachSecond ref = forever $ do
-  time <- getUnixTime_c
-  modifyIORef ref $ (time:)
-
-  sleepSeconds 1
-
-{- To show that rts_pause() and rts_unpause() work, clockEachSecond adds the
-current unix time to a list (once per Second). pauseAndUnpause_c stops the RTS
-for 5 Seconds. Thus there's an invariant that there should be no timestamp in
-the list that is in this 5 Seconds wide timeframe, which is defined by
-getPauseBegin_c and getPauseEnd_c. -}
-main :: IO ()
-main = do
-  -- Start thread that forever writes the current time to an IORef
-  ref <- newIORef []
-  forkIO $ clockEachSecond ref
-
-  -- Attempt pause and unpause in various forms
-  withPauseAndUnpause ref
-    "Pause and unpause via safe FFI call"
-    safe_pauseAndUnpause_c
-
-  withPauseAndUnpause ref
-    "Pause and unpause via unsafe FFI call"
-    unsafe_pauseAndUnpause_c
-
-  withPauseAndUnpause ref
-    "Pause and unpause via unsafe FFI call that creates a new OS thread"
-    unsafe_pauseAndUnpauseViaNewThread_c
-
-withPauseAndUnpause :: IORef [CTime] -> String -> IO () -> IO ()
-withPauseAndUnpause ref startMsg pauseAndUnpause = do
-    putStrLn startMsg
-
-    writeIORef ref []
-    sleepSeconds 3
-    pauseAndUnpause
-
-    -- This seems to sleep for 8 - 5 Seconds. That's strange, but should be
-    -- good enough for this test.
-    -- 5 Seconds is the time the whole RTS is paused. But I (Sven) don't
-    -- understand how this relates.
-    sleepSeconds 8
-
-    times <- readIORef ref
-
-    pauseBegin <- getPauseBegin_c
-    pauseEnd <- getPauseEnd_c
-    filter (\t -> pauseBegin < t && t < pauseEnd) times `shouldBe` []
-    filter (\t -> t <= pauseBegin) times `shouldNotBe` []
-    filter (\t -> t >= pauseEnd) times `shouldNotBe` []
-
-    putStrLn "DONE"
-
-sleepSeconds :: Int -> IO ()
-sleepSeconds t = threadDelay $ oneSecondInMicroSeconds * t
-
-oneSecondInMicroSeconds :: Int
-oneSecondInMicroSeconds = 1000000
-
-shouldBe :: (Eq a, Show a) => a -> a -> IO ()
-shouldBe x y =
-  unless (x == y) $ fail $ show x ++ " is not equal to " ++ show y
-
-shouldNotBe :: (Eq a, Show a) => a -> a -> IO ()
-shouldNotBe x y =
-  unless (x /= y) $ fail $ show x ++ " is equal to " ++ show y


=====================================
testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.c deleted
=====================================
@@ -1,54 +0,0 @@
-#include <pthread.h>
-#include <time.h>
-#include <unistd.h>
-#include "rts_pause_and_unpause_c.h"
-#include "Rts.h"
-#include "RtsAPI.h"
-
-#include <stdio.h>
-
-struct PauseTimestamps timestamps = {0, 0};
-
-void* pauseAndUnpause_thread(void* unused){
-    RtsPaused rtsPaused = rts_pause();
-
-    if(!rts_isPaused()) {
-        errorBelch("Expected the RTS to be paused.");
-        exit(1);
-    }
-
-    timestamps.begin = time(NULL);
-    sleep(5);
-    timestamps.end = time(NULL);
-
-    rts_unpause(rtsPaused);
-
-    if(rts_isPaused()) {
-        errorBelch("Expected the RTS to be unpaused.");
-        exit(1);
-    }
-
-    return NULL;
-}
-
-void pauseAndUnpause(void){
-    pauseAndUnpause_thread(NULL);
-}
-
-void pauseAndUnpauseViaNewThread(void){
-    pthread_t threadId;
-    pthread_create(&threadId, NULL, &pauseAndUnpause_thread, NULL);
-    pthread_detach(threadId);
-}
-
-time_t getPauseBegin(void) {
-    return timestamps.begin;
-}
-
-time_t getPauseEnd(void) {
-    return timestamps.end;
-}
-
-time_t getUnixTime(void){
-    return time(NULL);
-}


=====================================
testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.h deleted
=====================================
@@ -1,11 +0,0 @@
-#include <time.h>
-
-struct PauseTimestamps{
-    time_t begin;
-    time_t end;
-};
-
-void pauseAndUnpause(void);
-time_t getPauseBegin(void);
-time_t getPauseEnd(void);
-time_t getUnixTime(void);


=====================================
testsuite/tests/rts/ghc-debug/shouldfail/all.T
=====================================
@@ -1 +1 @@
-test('unsafe_rts_pause', [ignore_stderr, exit_code(134)], compile_and_run, ['-threaded '])
\ No newline at end of file
+test('unsafe_rts_pause', [exit_code(1)], compile_and_run, ['-threaded '])
\ No newline at end of file


=====================================
testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.hs
=====================================
@@ -15,7 +15,7 @@ foreign import ccall unsafe "RtsAPI.h rts_pause"
 
 main :: IO ()
 main = do
-  putStrLn "Making a unsafe call to rts_pause() should fail on return. We \
+  putStrLn "Making a unsafe call to rts_pause() should fail. We \
            \cannot allow this haskell thread to continue if the RTS is paused."
   _ <- unsafe_rts_pause_c
   putStrLn "Oops! Haskell thread has continued even though RTS was paused."


=====================================
testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.stderr
=====================================
@@ -0,0 +1,2 @@
+unsafe_rts_pause: error: rts_pause: attempting to pause via an unsafe FFI call.
+   Perhaps a 'foreign import unsafe' should be 'safe'?



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/47b915a196db03d57ca3358a7c03e6ab97e7aac4...1762210f8b17de124dd2a20594eca75f97f4db9b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/47b915a196db03d57ca3358a7c03e6ab97e7aac4...1762210f8b17de124dd2a20594eca75f97f4db9b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200921/733d0bae/attachment-0001.html>


More information about the ghc-commits mailing list