[commit: ghc] master: event manager: Don't worry if attempt to wake dead manager fails (d5cd505)

git at git.haskell.org git at git.haskell.org
Tue Jan 17 21:37:44 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/d5cd505bc484edee3dbd5d41fb7a27c2e18d528d/ghc

>---------------------------------------------------------------

commit d5cd505bc484edee3dbd5d41fb7a27c2e18d528d
Author: Ben Gamari <bgamari.foss at gmail.com>
Date:   Tue Jan 17 15:52:37 2017 -0500

    event manager: Don't worry if attempt to wake dead manager fails
    
    This fixes #12038, where the TimerManager would attempt to wake up a
    manager that was already dead, resulting in setnumcapabilities001
    occassionally failing during shutdown with unexpected output on stderr.
    
    I'm frankly still not entirely confident in this solution but perhaps it
    will help to get a few more eyes on this.
    
    My hypothesis is that the TimerManager is racing:
    
      thread                   TimerManager worker
      -------                  --------------------
      requests that thread
      manager shuts down
    
                               begins to clean up,
                               closing eventfd
    
      calls wakeManager,
      which tries to write
      to closed eventfd
    
    To prevent this `wakeManager` will need to synchronize with the
    TimerManger worker to ensure that the worker doesn't clean up the
    `Control` while another thread is trying to send a wakeup. However, this
    would add a bit of overhead on every timer interaction, which feels
    rather costly for what is really a problem only at shutdown.  Moreover,
    it seems that the event manager (e.g.  `GHC.Event.Manager`) is also
    afflicted by a similar race.
    
    This patch instead simply tries to catch the write failure after it has
    happened and silence it in the case that the fd has vanished. It feels
    rather hacky but it seems to work.
    
    Test Plan: Run `setnumcapabilities001` repeatedly
    
    Reviewers: hvr, austin, simonmar
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2957
    
    GHC Trac Issues: #12038


>---------------------------------------------------------------

d5cd505bc484edee3dbd5d41fb7a27c2e18d528d
 libraries/base/GHC/Event/Control.hs | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/libraries/base/GHC/Event/Control.hs b/libraries/base/GHC/Event/Control.hs
index 0b0f558..9e3940a 100644
--- a/libraries/base/GHC/Event/Control.hs
+++ b/libraries/base/GHC/Event/Control.hs
@@ -30,11 +30,12 @@ module GHC.Event.Control
 
 import Foreign.ForeignPtr (ForeignPtr)
 import GHC.Base
+import GHC.IORef
 import GHC.Conc.Signal (Signal)
 import GHC.Real (fromIntegral)
 import GHC.Show (Show)
 import GHC.Word (Word8)
-import Foreign.C.Error (throwErrnoIfMinus1_)
+import Foreign.C.Error (throwErrnoIfMinus1_, throwErrno, getErrno)
 import Foreign.C.Types (CInt(..), CSize(..))
 import Foreign.ForeignPtr (mallocForeignPtrBytes, withForeignPtr)
 import Foreign.Marshal (alloca, allocaBytes)
@@ -46,10 +47,10 @@ import System.Posix.Internals (c_close, c_pipe, c_read, c_write,
 import System.Posix.Types (Fd)
 
 #if defined(HAVE_EVENTFD)
-import Foreign.C.Error (throwErrnoIfMinus1)
+import Foreign.C.Error (throwErrnoIfMinus1, eBADF)
 import Foreign.C.Types (CULLong(..))
 #else
-import Foreign.C.Error (eAGAIN, eWOULDBLOCK, getErrno, throwErrno)
+import Foreign.C.Error (eAGAIN, eWOULDBLOCK)
 #endif
 
 data ControlMessage = CMsgWakeup
@@ -69,7 +70,9 @@ data Control = W {
     , wakeupWriteFd  :: {-# UNPACK #-} !Fd
 #endif
     , didRegisterWakeupFd :: !Bool
-    } deriving (Show)
+      -- | Have this Control's fds been cleaned up?
+    , controlIsDead  :: !(IORef Bool)
+    }
 
 #if defined(HAVE_EVENTFD)
 wakeupReadFd :: Control -> Fd
@@ -101,6 +104,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
   (wake_rd, wake_wr) <- createPipe
   when shouldRegister $ c_setIOManagerWakeupFd wake_wr
 #endif
+  isDead <- newIORef False
   return W { controlReadFd  = fromIntegral ctrl_rd
            , controlWriteFd = fromIntegral ctrl_wr
 #if defined(HAVE_EVENTFD)
@@ -110,6 +114,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
            , wakeupWriteFd  = fromIntegral wake_wr
 #endif
            , didRegisterWakeupFd = shouldRegister
+           , controlIsDead  = isDead
            }
 
 -- | Close the control structure used by the IO manager thread.
@@ -119,6 +124,7 @@ newControl shouldRegister = allocaArray 2 $ \fds -> do
 -- file after it has been closed.
 closeControl :: Control -> IO ()
 closeControl w = do
+  atomicModifyIORef (controlIsDead w) (\_ -> (True, ()))
   _ <- c_close . fromIntegral . controlReadFd $ w
   _ <- c_close . fromIntegral . controlWriteFd $ w
   when (didRegisterWakeupFd w) $ c_setIOManagerWakeupFd (-1)
@@ -172,9 +178,21 @@ readControlMessage ctrl fd
 
 sendWakeup :: Control -> IO ()
 #if defined(HAVE_EVENTFD)
-sendWakeup c =
-  throwErrnoIfMinus1_ "sendWakeup" $
-  c_eventfd_write (fromIntegral (controlEventFd c)) 1
+sendWakeup c = do
+  n <- c_eventfd_write (fromIntegral (controlEventFd c)) 1
+  case n of
+    0     -> return ()
+    _     -> do errno <- getErrno
+                -- Check that Control is still alive if we failed, since it's
+                -- possible that someone cleaned up the fds behind our backs and
+                -- consequently eventfd_write failed with EBADF. If it is dead
+                -- then just swallow the error since we are shutting down
+                -- anyways. Otherwise we will see failures during shutdown from
+                -- setnumcapabilities001 (#12038)
+                isDead <- readIORef (controlIsDead c)
+                if isDead && errno == eBADF
+                  then return ()
+                  else throwErrno "sendWakeup"
 #else
 sendWakeup c = do
   n <- sendMessage (wakeupWriteFd c) CMsgWakeup



More information about the ghc-commits mailing list