[Git][ghc/ghc][wip/temp-files2] Use deterministic names for temporary files

Matthew Pickering (@mpickering) gitlab at gitlab.haskell.org
Tue Sep 3 14:40:43 UTC 2024



Matthew Pickering pushed to branch wip/temp-files2 at Glasgow Haskell Compiler / GHC


Commits:
1c39a2d1 by Matthew Pickering at 2024-09-03T15:40:12+01:00
Use deterministic names for temporary files

When there are multiple threads they can race to create a temporary
file, in some situations the thread will create ghc_1.c and in some it
will create ghc_2.c. This filename ends up in the debug info for object
files after compiling a C file, therefore contributes to object
nondeterminism.

In order to fix this we store a prefix in `TmpFs` which serves to
namespace temporary files. The prefix is populated from the counter in
TmpFs when the TmpFs is forked. Therefore the TmpFs must be forked
outside the thread which consumes it, in a deterministic order, so each
thread always receives a TmpFs with the same prefix.

This assumes that after the initial TmpFs is created, all other TmpFs
are created from forking the original TmpFs. Which should have been try
anyway as otherwise there would be file collisions and non-determinism.

Fixes #25224

- - - - -


3 changed files:

- compiler/GHC/Driver/Make.hs
- compiler/GHC/Utils/TmpFs.hs
- testsuite/tests/ghc-e/should_fail/T9930fail.stderr


Changes:

=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -2924,19 +2924,22 @@ runParPipelines worker_limit plugin_hsc_env diag_wrapper mHscMessager all_pipeli
     atomically $ writeTVar stopped_var True
     wait_log_thread
 
-withLocalTmpFS :: RunMakeM a -> RunMakeM a
-withLocalTmpFS act = do
+withLocalTmpFS :: TmpFs -> (TmpFs -> IO a) -> IO a
+withLocalTmpFS tmpfs act = do
   let initialiser = do
-        MakeEnv{..} <- ask
-        lcl_tmpfs <- liftIO $ forkTmpFsFrom (hsc_tmpfs hsc_env)
-        return $ hsc_env { hsc_tmpfs  = lcl_tmpfs }
-      finaliser lcl_env = do
-        gbl_env <- ask
-        liftIO $ mergeTmpFsInto (hsc_tmpfs lcl_env) (hsc_tmpfs (hsc_env gbl_env))
+        liftIO $ forkTmpFsFrom tmpfs
+      finaliser tmpfs_local = do
+        liftIO $ mergeTmpFsInto tmpfs_local tmpfs
        -- Add remaining files which weren't cleaned up into local tmp fs for
        -- clean-up later.
        -- Clear the logQueue if this node had it's own log queue
-  MC.bracket initialiser finaliser $ \lcl_hsc_env -> local (\env -> env { hsc_env = lcl_hsc_env}) act
+  MC.bracket initialiser finaliser act
+
+withLocalTmpFSMake :: MakeEnv -> (MakeEnv -> IO a) -> IO a
+withLocalTmpFSMake env k =
+  withLocalTmpFS (hsc_tmpfs (hsc_env env)) $ \lcl_tmpfs
+    -> k (env { hsc_env = (hsc_env env) { hsc_tmpfs = lcl_tmpfs }})
+
 
 -- | Run the given actions and then wait for them all to finish.
 runAllPipelines :: WorkerLimit -> MakeEnv -> [MakeAction] -> IO ()
@@ -2958,16 +2961,18 @@ runAllPipelines worker_limit env acts = do
 runLoop :: (((forall a. IO a -> IO a) -> IO ()) -> IO a) -> MakeEnv -> [MakeAction] -> IO [a]
 runLoop _ _env [] = return []
 runLoop fork_thread env (MakeAction act res_var :acts) = do
-  new_thread <-
+
+  -- withLocalTmpFs has to occur outside of fork to remain deterministic
+  new_thread <- withLocalTmpFSMake env $ \lcl_env ->
     fork_thread $ \unmask -> (do
-            mres <- (unmask $ run_pipeline (withLocalTmpFS act))
+            mres <- (unmask $ run_pipeline lcl_env act)
                       `MC.onException` (putMVar res_var Nothing) -- Defensive: If there's an unhandled exception then still signal the failure.
             putMVar res_var mres)
   threads <- runLoop fork_thread env acts
   return (new_thread : threads)
   where
-      run_pipeline :: RunMakeM a -> IO (Maybe a)
-      run_pipeline p = runMaybeT (runReaderT p env)
+      run_pipeline :: MakeEnv -> RunMakeM a -> IO (Maybe a)
+      run_pipeline env p = runMaybeT (runReaderT p env)
 
 data MakeAction = forall a . MakeAction !(RunMakeM a) !(MVar (Maybe a))
 


=====================================
compiler/GHC/Utils/TmpFs.hs
=====================================
@@ -64,6 +64,8 @@ data TmpFs = TmpFs
       --
       -- Shared with forked TmpFs.
 
+  , tmp_dir_prefix :: String
+
   , tmp_files_to_clean :: IORef PathsToClean
       -- ^ Files to clean (per session or per module)
       --
@@ -121,6 +123,7 @@ initTmpFs = do
         , tmp_subdirs_to_clean = subdirs
         , tmp_dirs_to_clean    = dirs
         , tmp_next_suffix      = next
+        , tmp_dir_prefix       = "tmp"
         }
 
 -- | Initialise an empty TmpFs sharing unique numbers and per-process temporary
@@ -132,11 +135,16 @@ forkTmpFsFrom :: TmpFs -> IO TmpFs
 forkTmpFsFrom old = do
     files <- newIORef emptyPathsToClean
     subdirs <- newIORef emptyPathsToClean
+    counter <- newIORef 0
+    prefix  <- newTempSuffix old
+
+
     return $ TmpFs
         { tmp_files_to_clean   = files
         , tmp_subdirs_to_clean = subdirs
         , tmp_dirs_to_clean    = tmp_dirs_to_clean old
-        , tmp_next_suffix      = tmp_next_suffix old
+        , tmp_next_suffix      = counter
+        , tmp_dir_prefix       = prefix
         }
 
 -- | Merge the first TmpFs into the second.
@@ -259,9 +267,11 @@ changeTempFilesLifetime tmpfs lifetime files = do
   addFilesToClean tmpfs lifetime existing_files
 
 -- Return a unique numeric temp file suffix
-newTempSuffix :: TmpFs -> IO Int
-newTempSuffix tmpfs =
-  atomicModifyIORef' (tmp_next_suffix tmpfs) $ \n -> (n+1,n)
+newTempSuffix :: TmpFs -> IO String
+newTempSuffix tmpfs = do
+  n <- atomicModifyIORef' (tmp_next_suffix tmpfs) $ \n -> (n+1,n)
+  return $ tmp_dir_prefix tmpfs ++ "_" ++ show n
+
 
 -- Find a temporary name that doesn't already exist.
 newTempName :: Logger -> TmpFs -> TempDir -> TempFileLifetime -> Suffix -> IO FilePath
@@ -271,8 +281,8 @@ newTempName logger tmpfs tmp_dir lifetime extn
   where
     findTempName :: FilePath -> IO FilePath
     findTempName prefix
-      = do n <- newTempSuffix tmpfs
-           let filename = prefix ++ show n <.> extn
+      = do suffix <- newTempSuffix tmpfs
+           let filename = prefix ++ suffix <.> extn
            b <- doesFileExist filename
            if b then findTempName prefix
                 else do -- clean it up later
@@ -295,8 +305,8 @@ newTempSubDir logger tmpfs tmp_dir
   where
     findTempDir :: FilePath -> IO FilePath
     findTempDir prefix
-      = do n <- newTempSuffix tmpfs
-           let name = prefix ++ show n
+      = do suffix <- newTempSuffix tmpfs
+           let name = prefix ++ suffix
            b <- doesDirectoryExist name
            if b then findTempDir prefix
                 else (do
@@ -314,8 +324,8 @@ newTempLibName logger tmpfs tmp_dir lifetime extn
   where
     findTempName :: FilePath -> String -> IO (FilePath, FilePath, String)
     findTempName dir prefix
-      = do n <- newTempSuffix tmpfs -- See Note [Deterministic base name]
-           let libname = prefix ++ show n
+      = do suffix <- newTempSuffix tmpfs -- See Note [Deterministic base name]
+           let libname = prefix ++ suffix
                filename = dir </> "lib" ++ libname <.> extn
            b <- doesFileExist filename
            if b then findTempName dir prefix
@@ -340,8 +350,8 @@ getTempDir logger tmpfs (TempDir tmp_dir) = do
 
     mkTempDir :: FilePath -> IO FilePath
     mkTempDir prefix = do
-        n <- newTempSuffix tmpfs
-        let our_dir = prefix ++ show n
+        suffix <- newTempSuffix tmpfs
+        let our_dir = prefix ++ suffix
 
         -- 1. Speculatively create our new directory.
         createDirectory our_dir
@@ -376,6 +386,11 @@ the temporary file no longer contains random information (it used to contain
 the process id).
 
 This is ok, as the temporary directory used contains the pid (see getTempDir).
+
+In addition to this, multiple threads can race against each other creating temporary
+files. Therefore we supply a prefix when creating temporary files, when a thread is
+forked, each thread must be given an TmpFs with a unique prefix. This is achieved
+by forkTmpFsFrom creating a fresh prefix from the parent TmpFs.
 -}
 
 manyWithTrace :: Logger -> String -> ([FilePath] -> IO ()) -> [FilePath] -> IO ()


=====================================
testsuite/tests/ghc-e/should_fail/T9930fail.stderr
=====================================
@@ -1,18 +1,12 @@
-ghc: Exception:
+ghc-9.11.20240830: Exception:
 
 default output name would overwrite the input file; must specify -o explicitly
 Usage: For basic information, try the `--help' option.
 
-Package: ghc-inplace
+Package: ghc-9.11-inplace
 Module: GHC.Utils.Panic
 Type: GhcException
 
 HasCallStack backtrace:
-    collectBacktraces, called at libraries/ghc-internal/src/GHC/Internal/Exception.hs:92:13 in ghc-internal:GHC.Internal.Exception
-    toExceptionWithBacktrace, called at libraries/ghc-internal/src/GHC/Internal/IO.hs:260:11 in ghc-internal:GHC.Internal.IO
-    throwIO, called at libraries/exceptions/src/Control/Monad/Catch.hs:371:12 in exceptions-0.10.7-inplace:Control.Monad.Catch
-    throwM, called at libraries/exceptions/src/Control/Monad/Catch.hs:860:84 in exceptions-0.10.7-inplace:Control.Monad.Catch
-    onException, called at compiler/GHC/Driver/Make.hs:2974:23 in ghc-9.9-inplace:GHC.Driver.Make
-
-
+  bracket, called at compiler/GHC/Driver/Make.hs:2936:3 in ghc-9.11-inplace:GHC.Driver.Make
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1c39a2d14774a242f3bbec752393a215b284c158

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1c39a2d14774a242f3bbec752393a215b284c158
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240903/1aba3319/attachment-0001.html>


More information about the ghc-commits mailing list