[Git][ghc/ghc][wip/ghc-debug] 5 commits: Use `exitMyTask()` instead of `freeTask()` in `rts_unpause()`

David Eichmann gitlab at gitlab.haskell.org
Thu Sep 17 13:55:37 UTC 2020



David Eichmann pushed to branch wip/ghc-debug at Glasgow Haskell Compiler / GHC


Commits:
2f8ca0e5 by David Eichmann at 2020-09-14T15:26:20+01:00
Use `exitMyTask()` instead of `freeTask()` in `rts_unpause()`

- - - - -
781607b2 by David Eichmann at 2020-09-14T17:06:25+01:00
Correct documentation for Task_::stopped

- - - - -
a5d9429f by David Eichmann at 2020-09-16T20:21:56+01:00
Improve documentation

- - - - -
56596dd7 by David Eichmann at 2020-09-17T14:47:46+01:00
Record and restor owned capability on pause/unpause

- - - - -
b7e42c0a by David Eichmann at 2020-09-17T14:49:52+01:00
Add tests for calling ghc-debug API via safe/unsafe FFI call and via a new thread

- - - - -


12 changed files:

- includes/RtsAPI.h
- rts/Capability.c
- rts/Printer.c
- rts/RtsAPI.c
- rts/Schedule.c
- rts/Task.h
- testsuite/tests/rts/ghc-debug/all.T
- testsuite/tests/rts/ghc-debug/pause_and_unpause.hs → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause.hs
- testsuite/tests/rts/ghc-debug/pause_and_unpause_thread.c → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.c
- testsuite/tests/rts/ghc-debug/pause_and_unpause_thread.h → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.h
- + testsuite/tests/rts/ghc-debug/shouldfail/all.T
- + testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.hs


Changes:

=====================================
includes/RtsAPI.h
=====================================
@@ -488,21 +488,28 @@ SchedulerStatus rts_getSchedStatus (Capability *cap);
 // Various bits of information that need to be persisted between rts_pause and
 // rts_unpause.
 typedef struct RtsPaused_ {
+    // The task (i.e. OS thread) on which rts_pause() was called. This is used
+    // in rts_unpause() to check that it is called on the same OS thread.
     Task *pausing_task;
-    Capability *capabilities;
+
+    // The capability owned by pausing_task (possibly NULL) just before calling
+    // rts_unpause(). On rts_unpause(), the pausing_task will retain ownership
+    // of this capability (if not NULL).
+    Capability *capability;
 } RtsPaused;
 
-// Halt execution of all Haskell threads.
-// It is different to rts_lock because it pauses all capabilities. rts_lock
-// only pauses a single capability.
-// rts_pause() and rts_unpause() have to be executed from the same OS thread
-// (i.e. myTask() must stay the same).
+// Halt execution of all Haskell threads by acquiring all capabilities. It is
+// different to rts_lock() because rts_pause() pauses all capabilities while
+// rts_lock() only pauses a single capability. rts_pause() and rts_unpause()
+// have to be executed from the same OS thread (i.e. myTask() must stay the
+// same). Returns the currently owned capability (possibly NULL). This must be
+// passed back to rts_unpause().
 RtsPaused rts_pause (void);
 
 // Counterpart of rts_pause: Continue from a pause.
 // rts_pause() and rts_unpause() have to be executed from the same OS thread
 // (i.e. myTask() must stay the same).
-void rts_unpause (RtsPaused paused);
+void rts_unpause (RtsPaused);
 
 // Tells the current state of the RTS regarding rts_pause() and rts_unpause().
 bool rts_isPaused(void);


=====================================
rts/Capability.c
=====================================
@@ -858,7 +858,13 @@ void waitForCapability (Capability **pCap, Task *task)
 /* See Note [GC livelock] in Schedule.c for why we have gcAllowed
    and return the bool */
 bool /* Did we GC? */
-yieldCapability (Capability** pCap, Task *task, bool gcAllowed)
+yieldCapability
+    ( Capability** pCap     // [in/out] Task's currently owned capability
+                            //      pCap != NULL
+                            //      *pCap != NULL
+    , Task *task            // [in] This thread's task.
+    , bool gcAllowed
+    )
 {
     Capability *cap = *pCap;
 


=====================================
rts/Printer.c
=====================================
@@ -861,32 +861,60 @@ findPtr_default_callback(void *user STG_UNUSED, StgClosure * closure){
 
 int searched = 0;
 
+// Search through a block (and it's linked blocks) for closures that reference
+// p. The size of arr is respected and the search is stoped when arr is full.
+// TODO: This may produce false positives if e.g. a closure contains an Int that
+// happens to have the same value as memory address p. Returns the new i value
+// i.e. the next free position in the arr array.
 static int
-findPtrBlocks (FindPtrCb cb, void* user, StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i)
+findPtrBlocks
+    ( FindPtrCb cb      // [in] callback called whenever a closure referencing p is found.
+    , void* user        // [in] unused other than to pass to the callback.
+    , StgPtr p          // [in] The pointer to search for.
+    , bdescr *bd        // [in] The block descriptor of the block from which to start searching.
+    , StgPtr arr[]      // [in/out] All found closure addresses are written into this array.
+    , int arr_size      // [in] The size of arr.
+    , int i             // [in] The current position in arr.
+    )
 {
-    StgPtr q, r, end;
+    StgPtr candidate, retainer, end;
+
+    // Iterate over all blocks.
     for (; bd; bd = bd->link) {
         searched++;
-        for (q = bd->start; q < bd->free; q++) {
-            if (UNTAG_CONST_CLOSURE((StgClosure*)*q) == (const StgClosure *)p) {
+        // Scan the block looking for a pointer equal to p.
+        for (candidate = bd->start; candidate < bd->free; candidate++) {
+            if (UNTAG_CONST_CLOSURE((StgClosure*)*candidate) == (const StgClosure *)p) {
+                // *candidate looks like a pointer equal to p, but it might not
+                // be a pointer type i.e. it may just be an Int that happens to
+                // have the same value as memory address p.
+
+                // We stop if the output array is full.
                 if (i < arr_size) {
-                    for (r = bd->start; r < bd->free; r = end) {
+                    for (retainer = bd->start; retainer < bd->free; retainer = end) {
                         // skip over zeroed-out slop
-                        while (*r == 0) r++;
-                        if (!LOOKS_LIKE_CLOSURE_PTR(r)) {
+                        while (*retainer == 0) retainer++;
+
+                        // A quick check that retainer looks like a InfoTable pointer.
+                        if (!LOOKS_LIKE_CLOSURE_PTR(retainer)) {
                             debugBelch("%p found at %p, no closure at %p\n",
-                                       p, q, r);
+                                       p, candidate, retainer);
                             break;
                         }
-                        end = r + closure_sizeW((StgClosure*)r);
-                        if (q < end) {
-                            cb(user, (StgClosure *) r);
-                            arr[i++] = r;
+                        end = retainer + closure_sizeW((StgClosure*)retainer);
+                        if (candidate < end) {
+                            // end has just increased past candidate. Hence
+                            // candidate is in the closure starting at retainer.
+                            cb(user, (StgClosure *) retainer);
+                            arr[i++] = retainer;
                             break;
                         }
                     }
-                    if (r >= bd->free) {
-                        debugBelch("%p found at %p, closure?", p, q);
+                    if (retainer >= bd->free) {
+                        // TODO: How is this case reachable? Perhaps another
+                        // thread overwrote *q after we found q and before we
+                        // found the corresponding closure retainer.
+                        debugBelch("%p found at %p, closure?", p, candidate);
                     }
                 } else {
                     return i;
@@ -897,8 +925,19 @@ findPtrBlocks (FindPtrCb cb, void* user, StgPtr p, bdescr *bd, StgPtr arr[], int
     return i;
 }
 
+// Search for for closures that reference p. This may NOT find all such closures
+// (e.g. the nursery is not searched). This may also find false positives if
+// e.g. a closure contains an Int that happens to have the same value as memory
+// address p. The number of results is capped at 1024. The callback is called
+// for each closure found.
 static void
-findPtr_gen(FindPtrCb cb, void *user, P_ p, int follow)
+findPtr_gen
+    ( FindPtrCb cb  // [in] Callback called for each closure found referencing p.
+    , void *user    // [in] Unused other than to pass to the callback.
+    , P_ p          // [in] Search for closures referencing this address.
+    , int follow    // [in] If set to 1 and only a single closure was found,
+                    //      recursively find pointers to that  if to recurse (call findPtr on the ). May only be 1 if cb==findPtr_default_callback.
+    )
 {
   uint32_t g, n;
   bdescr *bd;


=====================================
rts/RtsAPI.c
=====================================
@@ -648,31 +648,51 @@ rts_unlock (Capability *cap)
 
 #if defined(THREADED_RTS)
 static bool rts_paused = false;
-// Halt execution of all Haskell threads.
-// It is different to rts_lock because it pauses all capabilities. rts_lock
-// only pauses a single capability.
-// rts_pause() and rts_unpause() have to be executed from the same OS thread
-// (i.e. myTask() must stay the same).
+
+// See RtsAPI.h
 RtsPaused rts_pause (void)
 {
-    struct RtsPaused_ paused;
-    paused.pausing_task = newBoundTask();
-    stopAllCapabilities(&paused.capabilities, paused.pausing_task);
+    if (rts_isPaused())
+    {
+        errorBelch("error: rts_pause: attempting to pause an already paused RTS.");
+        stg_exit(EXIT_FAILURE);
+    }
+
+    RtsPaused rtsPaused;
+    rtsPaused.pausing_task = newBoundTask();
+
+    // Check if we own a capability. This is needed to correctly call
+    // stopAllCapabilities() and to know if to keep ownership or release the
+    // capability on rts_unpause().
+    Capability * cap = rtsPaused.pausing_task->cap;
+    bool taskOwnsCap = cap != NULL && cap->running_task == rtsPaused.pausing_task;
+    rtsPaused.capability = taskOwnsCap ? cap : NULL;
+    stopAllCapabilities(taskOwnsCap ? &rtsPaused.capability : NULL, rtsPaused.pausing_task);
+
     rts_paused = true;
-    return paused;
+    return rtsPaused;
 }
 
-// Counterpart of rts_pause: Continue from a pause.
-// rts_pause() and rts_unpause() have to be executed from the same OS thread
-// (i.e. myTask() must stay the same).
-void rts_unpause (RtsPaused paused)
+// See RtsAPI.h
+void rts_unpause (RtsPaused rtsPaused)
 {
+    if (!rts_isPaused())
+    {
+        errorBelch("error: rts_pause: attempting to resume an RTS that is not paused.");
+        stg_exit(EXIT_FAILURE);
+    }
+    if (rtsPaused.pausing_task != getMyTask())
+    {
+        errorBelch("error: rts_unpause was called from a different OS thread than rts_pause.");
+        stg_exit(EXIT_FAILURE);
+    }
+
     rts_paused = false;
-    releaseAllCapabilities(n_capabilities, paused.capabilities, paused.pausing_task);
-    freeTask(paused.pausing_task);
+    releaseAllCapabilities(n_capabilities, rtsPaused.capability, getMyTask());
+    exitMyTask();
 }
 
-// Tells the current state of the RTS regarding rts_pause() and rts_unpause().
+// See RtsAPI.h
 bool rts_isPaused(void)
 {
     return rts_paused;
@@ -684,7 +704,6 @@ bool rts_isPaused(void)
 // was called before.
 void rts_listThreads(ListThreadsCb cb, void *user)
 {
-    ASSERT(rts_paused);
     for (uint32_t g=0; g < RtsFlags.GcFlags.generations; g++) {
         StgTSO *tso = generations[g].threads;
         while (tso != END_TSO_QUEUE) {
@@ -716,7 +735,6 @@ void rts_listMiscRoots (ListRootsCb cb, void *user)
     ctx.cb = cb;
     ctx.user = user;
 
-    ASSERT(rts_paused);
     threadStableNameTable(&list_roots_helper, (void *)&ctx);
     threadStablePtrTable(&list_roots_helper, (void *)&ctx);
 }
@@ -726,19 +744,20 @@ RtsPaused rts_pause (void)
 {
     errorBelch("Warning: Pausing the RTS is only possible for "
                "multithreaded RTS.");
-    struct RtsPaused_ paused;
-    paused.pausing_task = NULL;
-    paused.capabilities = NULL;
-    return paused;
+    RtsPaused rtsPaused = {
+            .pausing_task = NULL,
+            .capability = NULL
+        };
+    return rtsPaused;
 }
 
-void rts_unpause (RtsPaused paused STG_UNUSED)
+void rts_unpause (RtsPaused cap STG_UNUSED)
 {
     errorBelch("Warning: Unpausing the RTS is only possible for "
                "multithreaded RTS.");
 }
 
-bool rts_isPaused(void)
+bool rts_isPaused()
 {
     errorBelch("Warning: (Un-) Pausing the RTS is only possible for "
                "multithreaded RTS.");


=====================================
rts/Schedule.c
=====================================
@@ -1411,7 +1411,15 @@ scheduleNeedHeapProfile( bool ready_to_gc )
  * -------------------------------------------------------------------------- */
 
 #if defined(THREADED_RTS)
-void stopAllCapabilities (Capability **pCap, Task *task)
+void stopAllCapabilities
+    ( Capability **pCap     // [in/out] This thread's task's owned capability.
+                            //      pCap may be NULL if no capability is owned.
+                            //      *pCap != NULL
+                            // On return, set to the task's newly owned
+                            // capability (task->cap). Though, the Task will
+                            // technically own all capabilities.
+    , Task *task            // [in] This thread's task.
+    )
 {
     stopAllCapabilitiesWith(pCap, task, SYNC_OTHER);
 }
@@ -1463,9 +1471,15 @@ void stopAllCapabilitiesWith (Capability **pCap, Task *task, SyncType sync_type)
  * -------------------------------------------------------------------------- */
 
 #if defined(THREADED_RTS)
-static bool requestSync (
-    Capability **pcap, Task *task, PendingSync *new_sync,
-    SyncType *prev_sync_type)
+static bool requestSync
+    ( Capability **pcap         // [in/out] This thread's task's owned capability.
+                                // May change if there is an existing sync (true is returned).
+                                //      pcap may be NULL
+                                //      *pcap != NULL
+    , Task *task                // [in] This thread's task.
+    , PendingSync *new_sync     // [in] The new requested synch.
+    , SyncType *prev_sync_type  // [out] Only set if there is an existing previous sync (true is returned).
+    )
 {
     PendingSync *sync;
 


=====================================
rts/Task.h
=====================================
@@ -149,7 +149,7 @@ typedef struct Task_ {
     struct InCall_ *spare_incalls;
 
     bool    worker;          // == true if this is a worker Task
-    bool    stopped;         // == true between newBoundTask and
+    bool    stopped;         // == false between newBoundTask and
                                 // exitMyTask, or in a worker Task.
 
     // So that we can detect when a finalizer illegally calls back into Haskell


=====================================
testsuite/tests/rts/ghc-debug/all.T
=====================================
@@ -1,6 +1,6 @@
-test('pause_and_unpause',
-     [ extra_files(['pause_and_unpause_thread.c','pause_and_unpause_thread.h']),
+test('rts_pause_and_unpause',
+     [ extra_files(['rts_pause_and_unpause_c.c','rts_pause_and_unpause_c.h']),
       ignore_stdout,
       ignore_stderr
      ],
-     multi_compile_and_run, ['pause_and_unpause', [('pause_and_unpause_thread.c','')], '-threaded'])
+     multi_compile_and_run, ['rts_pause_and_unpause', [('rts_pause_and_unpause_c.c','')], '-threaded '])
\ No newline at end of file


=====================================
testsuite/tests/rts/ghc-debug/pause_and_unpause.hs → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause.hs
=====================================
@@ -8,23 +8,30 @@ import Foreign.C.Types
 import System.Mem
 import Control.Monad
 
-foreign import ccall safe "pause_and_unpause_thread.h pauseAndUnpause"
-    pauseAndUnpause_c :: IO ()
+foreign import ccall safe "rts_pause_and_unpause_c.h pauseAndUnpause"
+    safe_pauseAndUnpause_c :: IO ()
 
-foreign import ccall safe "pause_and_unpause_thread.h getUnixTime"
+foreign import ccall unsafe "rts_pause_and_unpause_c.h pauseAndUnpause"
+    unsafe_pauseAndUnpause_c :: IO ()
+
+foreign import ccall unsafe "rts_pause_and_unpause_c.h pauseAndUnpauseViaNewThread"
+    unsafe_pauseAndUnpauseViaNewThread_c :: IO ()
+
+-- Note that these should be unsafe FFI calls. rts_pause() does not pause or
+-- wait for safe FFI calls, as they do not own a capability.
+foreign import ccall unsafe "rts_pause_and_unpause_c.h getUnixTime"
     getUnixTime_c :: IO CTime
 
-foreign import ccall safe "pause_and_unpause_thread.h getPauseBegin"
+foreign import ccall unsafe "rts_pause_and_unpause_c.h getPauseBegin"
     getPauseBegin_c :: IO CTime
 
-foreign import ccall safe "pause_and_unpause_thread.h getPauseEnd"
+foreign import ccall unsafe "rts_pause_and_unpause_c.h getPauseEnd"
     getPauseEnd_c :: IO CTime
 
 clockEachSecond :: IORef [CTime] -> IO ()
 clockEachSecond ref = forever $ do
   time <- getUnixTime_c
-  timesList <- readIORef ref
-  writeIORef ref $ time : timesList
+  modifyIORef ref $ (time:)
 
   sleepSeconds 1
 
@@ -35,12 +42,30 @@ the list that is in this 5 Seconds wide timeframe, which is defined by
 getPauseBegin_c and getPauseEnd_c. -}
 main :: IO ()
 main = do
-    ref <- newIORef []
-    forkIO $ clockEachSecond ref
+  -- Start thread that forever writes the current time to an IORef
+  ref <- newIORef []
+  forkIO $ clockEachSecond ref
 
-    sleepSeconds 3
+  -- Attempt pause and unpause in various forms
+  withPauseAndUnpause ref
+    "Pause and unpause via safe FFI call"
+    safe_pauseAndUnpause_c
+
+  withPauseAndUnpause ref
+    "Pause and unpause via unsafe FFI call"
+    unsafe_pauseAndUnpause_c
 
-    pauseAndUnpause_c
+  withPauseAndUnpause ref
+    "Pause and unpause via unsafe FFI call that creates a new OS thread"
+    unsafe_pauseAndUnpauseViaNewThread_c
+
+withPauseAndUnpause :: IORef [CTime] -> String -> IO () -> IO ()
+withPauseAndUnpause ref startMsg pauseAndUnpause = do
+    putStrLn startMsg
+
+    writeIORef ref []
+    sleepSeconds 3
+    pauseAndUnpause
 
     -- This seems to sleep for 8 - 5 Seconds. That's strange, but should be
     -- good enough for this test.
@@ -56,7 +81,7 @@ main = do
     filter (\t -> t <= pauseBegin) times `shouldNotBe` []
     filter (\t -> t >= pauseEnd) times `shouldNotBe` []
 
-    return ()
+    putStrLn "DONE"
 
 sleepSeconds :: Int -> IO ()
 sleepSeconds t = threadDelay $ oneSecondInMicroSeconds * t


=====================================
testsuite/tests/rts/ghc-debug/pause_and_unpause_thread.c → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.c
=====================================
@@ -1,7 +1,7 @@
 #include <pthread.h>
 #include <time.h>
 #include <unistd.h>
-#include "pause_and_unpause_thread.h"
+#include "rts_pause_and_unpause_c.h"
 #include "Rts.h"
 #include "RtsAPI.h"
 
@@ -10,7 +10,7 @@
 struct PauseTimestamps timestamps = {0, 0};
 
 void* pauseAndUnpause_thread(void* unused){
-    RtsPaused r_paused = rts_pause();
+    RtsPaused rtsPaused = rts_pause();
 
     if(!rts_isPaused()) {
         errorBelch("Expected the RTS to be paused.");
@@ -21,7 +21,7 @@ void* pauseAndUnpause_thread(void* unused){
     sleep(5);
     timestamps.end = time(NULL);
 
-    rts_unpause(r_paused);
+    rts_unpause(rtsPaused);
 
     if(rts_isPaused()) {
         errorBelch("Expected the RTS to be unpaused.");
@@ -32,6 +32,10 @@ void* pauseAndUnpause_thread(void* unused){
 }
 
 void pauseAndUnpause(void){
+    pauseAndUnpause_thread(NULL);
+}
+
+void pauseAndUnpauseViaNewThread(void){
     pthread_t threadId;
     pthread_create(&threadId, NULL, &pauseAndUnpause_thread, NULL);
     pthread_detach(threadId);


=====================================
testsuite/tests/rts/ghc-debug/pause_and_unpause_thread.h → testsuite/tests/rts/ghc-debug/rts_pause_and_unpause_c.h
=====================================


=====================================
testsuite/tests/rts/ghc-debug/shouldfail/all.T
=====================================
@@ -0,0 +1 @@
+test('unsafe_rts_pause', normal, compile_and_run, ['-threaded '])
\ No newline at end of file


=====================================
testsuite/tests/rts/ghc-debug/shouldfail/unsafe_rts_pause.hs
=====================================
@@ -0,0 +1,21 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+
+import Data.Word
+import Data.IORef
+import GHC.Clock
+import Control.Concurrent
+import Foreign.Ptr
+import System.Mem
+import Control.Monad
+
+data RtsPause
+
+foreign import ccall unsafe "RtsAPI.h rts_pause"
+    unsafe_rts_pause_c :: IO (Ptr RtsPause)
+
+main :: IO ()
+main = do
+  putStrLn "Making a unsafe call to rts_pause() should fail on return. We \
+           \cannot allow this haskell thread to continue if the RTS is paused."
+  _ <- unsafe_rts_pause_c
+  putStrLn "Oops! Haskell thread has continued even though RTS was paused."
\ No newline at end of file



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f8b8f2a147e7df9a57cdcbd61c9ae1f7a9c0eb0e...b7e42c0a0181a5bb8e57fcef2547a7b045821e54

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f8b8f2a147e7df9a57cdcbd61c9ae1f7a9c0eb0e...b7e42c0a0181a5bb8e57fcef2547a7b045821e54
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200917/b8b1bab4/attachment-0001.html>


More information about the ghc-commits mailing list