[Git][ghc/ghc][wip/marge_bot_batch_merge_job] Avoid serializing BCOs with the internal interpreter

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Tue Sep 12 21:30:59 UTC 2023



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
6f969e06 by Krzysztof Gogolewski at 2023-09-12T17:30:54-04:00
Avoid serializing BCOs with the internal interpreter

Refs #23919

- - - - -


5 changed files:

- compiler/GHC/Runtime/Interpreter.hs
- compiler/GHC/Utils/Misc.hs
- libraries/ghci/GHCi/Message.hs
- libraries/ghci/GHCi/Run.hs
- libraries/ghci/GHCi/TH.hs


Changes:

=====================================
compiler/GHC/Runtime/Interpreter.hs
=====================================
@@ -93,7 +93,6 @@ import GHC.Utils.Panic
 import GHC.Utils.Exception as Ex
 import GHC.Utils.Outputable(brackets, ppr, showSDocUnsafe)
 import GHC.Utils.Fingerprint
-import GHC.Utils.Misc
 
 import GHC.Unit.Module
 import GHC.Unit.Module.ModIface
@@ -110,9 +109,7 @@ import Control.Monad
 import Control.Monad.IO.Class
 import Control.Monad.Catch as MC (mask)
 import Data.Binary
-import Data.Binary.Put
 import Data.ByteString (ByteString)
-import qualified Data.ByteString.Lazy as LB
 import Data.Array ((!))
 import Data.IORef
 import Foreign hiding (void)
@@ -120,7 +117,6 @@ import qualified GHC.Exts.Heap as Heap
 import GHC.Stack.CCS (CostCentre,CostCentreStack)
 import System.Directory
 import System.Process
-import GHC.Conc (pseq, par)
 
 {- Note [Remote GHCi]
    ~~~~~~~~~~~~~~~~~~
@@ -353,19 +349,7 @@ mkCostCentres interp mod ccs =
 -- | Create a set of BCOs that may be mutually recursive.
 createBCOs :: Interp -> [ResolvedBCO] -> IO [HValueRef]
 createBCOs interp rbcos = do
-  -- Serializing ResolvedBCO is expensive, so we do it in parallel
-  interpCmd interp (CreateBCOs puts)
- where
-  puts = parMap doChunk (chunkList 100 rbcos)
-
-  -- make sure we force the whole lazy ByteString
-  doChunk c = pseq (LB.length bs) bs
-    where bs = runPut (put c)
-
-  -- We don't have the parallel package, so roll our own simple parMap
-  parMap _ [] = []
-  parMap f (x:xs) = fx `par` (fxs `pseq` (fx : fxs))
-    where fx = f x; fxs = parMap f xs
+  interpCmd interp (CreateBCOs rbcos)
 
 addSptEntry :: Interp -> Fingerprint -> ForeignHValue -> IO ()
 addSptEntry interp fpr ref =


=====================================
compiler/GHC/Utils/Misc.hs
=====================================
@@ -37,8 +37,6 @@ module GHC.Utils.Misc (
         isSingleton, only, expectOnly, GHC.Utils.Misc.singleton,
         notNull, expectNonEmpty, snocView,
 
-        chunkList,
-
         holes,
 
         changeLast,
@@ -494,11 +492,6 @@ expectOnly _   (a:_) = a
 #endif
 expectOnly msg _     = panic ("expectOnly: " ++ msg)
 
--- | Split a list into chunks of /n/ elements
-chunkList :: Int -> [a] -> [[a]]
-chunkList _ [] = []
-chunkList n xs = as : chunkList n bs where (as,bs) = splitAt n xs
-
 -- | Compute all the ways of removing a single element from a list.
 --
 --  > holes [1,2,3] = [(1, [2,3]), (2, [1,3]), (3, [1,2])]


=====================================
libraries/ghci/GHCi/Message.hs
=====================================
@@ -30,11 +30,13 @@ import GHCi.RemoteTypes
 import GHCi.FFI
 import GHCi.TH.Binary () -- For Binary instances
 import GHCi.BreakArray
+import GHCi.ResolvedBCO
 
 import GHC.LanguageExtensions
 import qualified GHC.Exts.Heap as Heap
 import GHC.ForeignSrcLang
 import GHC.Fingerprint
+import GHC.Conc (pseq, par)
 import Control.Concurrent
 import Control.Exception
 import Data.Binary
@@ -84,10 +86,10 @@ data Message a where
   -- Interpreter -------------------------------------------
 
   -- | Create a set of BCO objects, and return HValueRefs to them
-  -- Note: Each ByteString contains a Binary-encoded [ResolvedBCO], not
-  -- a ResolvedBCO. The list is to allow us to serialise the ResolvedBCOs
-  -- in parallel. See @createBCOs@ in compiler/GHC/Runtime/Interpreter.hs.
-  CreateBCOs :: [LB.ByteString] -> Message [HValueRef]
+  -- See @createBCOs@ in compiler/GHC/Runtime/Interpreter.hs.
+  -- NB: this has a custom Binary behavior,
+  -- see Note [Parallelize CreateBCOs serialization]
+  CreateBCOs :: [ResolvedBCO] -> Message [HValueRef]
 
   -- | Release 'HValueRef's
   FreeHValueRefs :: [HValueRef] -> Message ()
@@ -513,7 +515,8 @@ getMessage = do
       9  -> Msg <$> RemoveLibrarySearchPath <$> get
       10 -> Msg <$> return ResolveObjs
       11 -> Msg <$> FindSystemLibrary <$> get
-      12 -> Msg <$> CreateBCOs <$> get
+      12 -> Msg <$> (CreateBCOs . concatMap (runGet get)) <$> (get :: Get [LB.ByteString])
+                    -- See Note [Parallelize CreateBCOs serialization]
       13 -> Msg <$> FreeHValueRefs <$> get
       14 -> Msg <$> MallocData <$> get
       15 -> Msg <$> MallocStrings <$> get
@@ -557,7 +560,8 @@ putMessage m = case m of
   RemoveLibrarySearchPath ptr -> putWord8 9  >> put ptr
   ResolveObjs                 -> putWord8 10
   FindSystemLibrary str       -> putWord8 11 >> put str
-  CreateBCOs bco              -> putWord8 12 >> put bco
+  CreateBCOs bco              -> putWord8 12 >> put (serializeBCOs bco)
+                              -- See Note [Parallelize CreateBCOs serialization]
   FreeHValueRefs val          -> putWord8 13 >> put val
   MallocData bs               -> putWord8 14 >> put bs
   MallocStrings bss           -> putWord8 15 >> put bss
@@ -586,6 +590,34 @@ putMessage m = case m of
   ResumeSeq a                 -> putWord8 38 >> put a
   NewBreakModule name          -> putWord8 39 >> put name
 
+{-
+Note [Parallelize CreateBCOs serialization]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Serializing ResolvedBCO is expensive, so we do it in parallel.
+We split the list [ResolvedBCO] into chunks of length <= 100,
+and serialize every chunk in parallel, getting a [LB.ByteString]
+where every bytestring corresponds to a single chunk (multiple ResolvedBCOs).
+
+Previously, we stored [LB.ByteString] in the Message object, but that
+incurs unneccessary serialization with the internal interpreter (#23919).
+-}
+
+serializeBCOs :: [ResolvedBCO] -> [LB.ByteString]
+serializeBCOs rbcos = parMap doChunk (chunkList 100 rbcos)
+ where
+  -- make sure we force the whole lazy ByteString
+  doChunk c = pseq (LB.length bs) bs
+    where bs = runPut (put c)
+
+  -- We don't have the parallel package, so roll our own simple parMap
+  parMap _ [] = []
+  parMap f (x:xs) = fx `par` (fxs `pseq` (fx : fxs))
+    where fx = f x; fxs = parMap f xs
+
+  chunkList :: Int -> [a] -> [[a]]
+  chunkList _ [] = []
+  chunkList n xs = as : chunkList n bs where (as,bs) = splitAt n xs
+
 -- -----------------------------------------------------------------------------
 -- Reading/writing messages
 


=====================================
libraries/ghci/GHCi/Run.hs
=====================================
@@ -17,8 +17,6 @@ import Prelude -- See note [Why do we import Prelude here?]
 #if !defined(javascript_HOST_ARCH)
 import GHCi.CreateBCO
 import GHCi.InfoTable
-import Data.Binary
-import Data.Binary.Get
 #endif
 
 import GHCi.FFI
@@ -78,7 +76,7 @@ run m = case m of
     toRemotePtr <$> mkConInfoTable tc ptrs nptrs tag ptrtag desc
   ResolveObjs -> resolveObjs
   FindSystemLibrary str -> findSystemLibrary str
-  CreateBCOs bcos -> createBCOs (concatMap (runGet get) bcos)
+  CreateBCOs bcos -> createBCOs bcos
   LookupClosure str -> lookupClosure str
 #endif
   RtsRevertCAFs -> rts_revertCAFs


=====================================
libraries/ghci/GHCi/TH.hs
=====================================
@@ -38,7 +38,7 @@ For each splice
 1. GHC compiles a splice to byte code, and sends it to the server: in
    a CreateBCOs message:
 
-   CreateBCOs :: [LB.ByteString] -> Message [HValueRef]
+   CreateBCOs :: [ResolvedBCOs] -> Message [HValueRef]
 
 2. The server creates the real byte-code objects in its heap, and
    returns HValueRefs to GHC.  HValueRef is the same as RemoteRef



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6f969e06823befd50e7cb7c06123a180dc0e4a73

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6f969e06823befd50e7cb7c06123a180dc0e4a73
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230912/547624f9/attachment-0001.html>


More information about the ghc-commits mailing list