[commit: ghc] master: fix EBADF unqueueing in select backend (Trac #10590) (5857e0a)

git at git.haskell.org git at git.haskell.org
Tue Jul 7 17:17:40 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/5857e0afb5823987e84e6d3dd8d0b269b7546166/ghc

>---------------------------------------------------------------

commit 5857e0afb5823987e84e6d3dd8d0b269b7546166
Author: Sergei Trofimovich <slyfox at gentoo.org>
Date:   Tue Jul 7 17:00:23 2015 +0200

    fix EBADF unqueueing in select backend (Trac #10590)
    
    Alexander found a interesting case:
    1. We have a queue of two waiters in a blocked_queue
    2. first file descriptor changes state to RUNNABLE,
       second changes to INVALID
    3. awaitEvent function dequeued RUNNABLE thread to a
       run queue and attempted to dequeue INVALID descriptor
       to a run queue.
    
    Unqueueing INVALID fails thusly:
            #3  0x000000000045cf1c in barf (s=0x4c1cb0 "removeThreadFromDeQueue: not found")
                                   at rts/RtsMessages.c:42
            #4  0x000000000046848b in removeThreadFromDeQueue (...) at rts/Threads.c:249
            #5  0x000000000049a120 in removeFromQueues (...) at rts/RaiseAsync.c:719
            #6  0x0000000000499502 in throwToSingleThreaded__ (...) at rts/RaiseAsync.c:67
            #7  0x0000000000499555 in throwToSingleThreaded (..) at rts/RaiseAsync.c:75
            #8  0x000000000047c27d in awaitEvent (wait=rtsFalse) at rts/posix/Select.c:415
    
    The problem here is a throwToSingleThreaded function that tries
    to unqueue a TSO from blocked_queue, but awaitEvent function
    leaves blocked_queue in a inconsistent state while traverses
    over blocked_queue:
    
          case RTS_FD_IS_READY:
              IF_DEBUG(scheduler,
                  debugBelch("Waking up blocked thread %lu\n",
                             (unsigned long)tso->id));
              tso->why_blocked = NotBlocked;
              tso->_link = END_TSO_QUEUE;              // Here we break the queue head
              pushOnRunQueue(&MainCapability,tso);
              break;
    
    Signed-off-by: Sergei Trofimovich <siarheit at google.com>
    
    Test Plan: tested on a sample from T10590
    
    Reviewers: austin, bgamari, simonmar
    
    Reviewed By: bgamari, simonmar
    
    Subscribers: qnikst, thomie, bgamari
    
    Differential Revision: https://phabricator.haskell.org/D1024
    
    GHC Trac Issues: #10590, #4934


>---------------------------------------------------------------

5857e0afb5823987e84e6d3dd8d0b269b7546166
 rts/RaiseAsync.c              |  8 +-------
 rts/RaiseAsync.h              |  6 ++++++
 rts/posix/Select.c            | 10 ++++++++--
 testsuite/tests/rts/T10590.hs | 37 +++++++++++++++++++++++++++++++++++++
 testsuite/tests/rts/all.T     |  5 +++++
 5 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 3b206ff..267707c 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -23,12 +23,6 @@
 #include "win32/IOManager.h"
 #endif
 
-static StgTSO* raiseAsync (Capability *cap,
-                           StgTSO *tso,
-                           StgClosure *exception,
-                           rtsBool stop_at_atomically,
-                           StgUpdateFrame *stop_here);
-
 static void removeFromQueues(Capability *cap, StgTSO *tso);
 
 static void removeFromMVarBlockedQueue (StgTSO *tso);
@@ -777,7 +771,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
  *
  * -------------------------------------------------------------------------- */
 
-static StgTSO *
+StgTSO *
 raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
            rtsBool stop_at_atomically, StgUpdateFrame *stop_here)
 {
diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h
index 6bfed8d..1f939d4 100644
--- a/rts/RaiseAsync.h
+++ b/rts/RaiseAsync.h
@@ -19,6 +19,12 @@
 void blockedThrowTo (Capability *cap,
                      StgTSO *target, MessageThrowTo *msg);
 
+StgTSO* raiseAsync (Capability *cap,
+                    StgTSO *tso,
+                    StgClosure *exception,
+                    rtsBool stop_at_atomically,
+                    StgUpdateFrame *stop_here);
+
 void throwToSingleThreaded (Capability *cap,
                             StgTSO *tso,
                             StgClosure *exception);
diff --git a/rts/posix/Select.c b/rts/posix/Select.c
index 4b19235..d5c9b55 100644
--- a/rts/posix/Select.c
+++ b/rts/posix/Select.c
@@ -375,6 +375,12 @@ awaitEvent(rtsBool wait)
 
       prev = NULL;
       {
+          /*
+           * The queue is being rebuilt in this loop:
+           * 'blocked_queue_hd' will contain already
+           * traversed blocked TSOs. As a result you
+           * can't use functions accessing 'blocked_queue_hd'.
+           */
           for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
               next = tso->_link;
               int fd;
@@ -412,8 +418,8 @@ awaitEvent(rtsBool wait)
                   IF_DEBUG(scheduler,
                       debugBelch("Killing blocked thread %lu on bad fd=%i\n",
                                  (unsigned long)tso->id, fd));
-                  throwToSingleThreaded(&MainCapability, tso,
-                                        (StgClosure *)blockedOnBadFD_closure);
+                  raiseAsync(&MainCapability, tso,
+                      (StgClosure *)blockedOnBadFD_closure, rtsFalse, NULL);
                   break;
               case RTS_FD_IS_READY:
                   IF_DEBUG(scheduler,
diff --git a/testsuite/tests/rts/T10590.hs b/testsuite/tests/rts/T10590.hs
new file mode 100644
index 0000000..24198ab
--- /dev/null
+++ b/testsuite/tests/rts/T10590.hs
@@ -0,0 +1,37 @@
+import Foreign.C
+import Foreign.Marshal.Array
+import Foreign.Storable
+import Control.Concurrent
+
+-- The test works only on UNIX like.
+-- unportable bits:
+import qualified System.Posix.Internals as SPI
+import qualified System.Posix.Types as SPT
+
+pipe :: IO (CInt, CInt)
+pipe = allocaArray 2 $ \fds -> do
+    throwErrnoIfMinus1_ "pipe" $ SPI.c_pipe fds
+    rd <- peekElemOff fds 0
+    wr <- peekElemOff fds 1
+    return (rd, wr)
+
+main :: IO ()
+main = do
+    (r1, w1)  <- pipe
+    (r2, _w2) <- pipe
+    _ <- forkIO $ do -- thread A
+                     threadWaitRead (SPT.Fd r1)
+    _ <- forkIO $ do -- thread B
+                     threadWaitRead (SPT.Fd r2)
+    yield -- switch to A, then B
+          -- now both are blocked
+    _ <- SPI.c_close w1 -- unblocking thread A fd
+    _ <- SPI.c_close r2 -- breaking   thread B fd
+    yield -- kick RTS IO manager
+
+{-
+ Trac #10590 exposed a bug as:
+   T10590: internal error: removeThreadFromDeQueue: not found
+    (GHC version 7.11.20150702 for x86_64_unknown_linux)
+    Please report this as a GHC bug:  http://www.haskell.org/ghc/reportabug
+ -}
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index 8f6137f..0e89249 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -318,3 +318,8 @@ test('T9839_05', [ only_ways(prof_ways), ignore_output, exit_code(1), extra_run_
 test('T9839_06', [ only_ways(prof_ways), ignore_output, exit_code(1), extra_run_opts('+RTS -xtx')],
                 compile_and_run,
                 [''])
+
+# ignore_output as RTS reports slightly different error messages
+# in 'epoll' and 'select' backends on reading from EBADF
+# mingw32 skip as UNIX pipe and close(fd) is used to exercise the problem
+test('T10590', [ignore_output, when(opsys('mingw32'),skip)], compile_and_run, [''])



More information about the ghc-commits mailing list