[Git][ghc/ghc][wip/fendor/ghc-iface-sharing] Improve sharing of duplicated values in `ModIface`

Hannes Siebenhandl (@fendor) gitlab at gitlab.haskell.org
Thu May 2 08:11:10 UTC 2024



Hannes Siebenhandl pushed to branch wip/fendor/ghc-iface-sharing at Glasgow Haskell Compiler / GHC


Commits:
4c11762e by Fendor at 2024-05-02T10:10:40+02:00
Improve sharing of duplicated values in `ModIface`

As a `ModIface` contains often duplicated values that are not
necessarily shared, we improve sharing by serialising the `ModIface`
to an in-memory byte array. Serialisation uses deduplication tables, and
deserialisation implicitly shares duplicated values.

This helps reducing the peak memory usage while compiling in
`--make` mode. The peak memory usage is especially reduced when
generating interface files with core expressions
(`-fwrite-if-simplified-core`).

On agda, this reduces the peak memory usage:

* `2.2 GB` to `1.9 GB` for a ghci session.

On `lib:Cabal`, we report:

* `570 MB` to `500 MB` for a ghci session
* `790 MB` to `667 MB` for compiling `lib:Cabal` with ghc

The execution time is not affected.

- - - - -


4 changed files:

- compiler/GHC/Driver/Main.hs
- compiler/GHC/Iface/Binary.hs
- compiler/GHC/Iface/Make.hs
- compiler/GHC/Utils/Binary.hs


Changes:

=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -966,10 +966,11 @@ loadByteCode iface mod_sum = do
 --------------------------------------------------------------
 
 
+
 -- Knot tying!  See Note [Knot-tying typecheckIface]
 -- See Note [ModDetails and --make mode]
 initModDetails :: HscEnv -> ModIface -> IO ModDetails
-initModDetails hsc_env iface =
+initModDetails hsc_env iface = do
   fixIO $ \details' -> do
     let act hpt  = addToHpt hpt (moduleName $ mi_module iface)
                                 (HomeModInfo iface details' emptyHomeModInfoLinkable)


=====================================
compiler/GHC/Iface/Binary.hs
=====================================
@@ -18,6 +18,8 @@ module GHC.Iface.Binary (
         getSymtabName,
         CheckHiWay(..),
         TraceBinIFace(..),
+        getIfaceWithExtFields,
+        putIfaceWithExtFields,
         getWithUserData,
         putWithUserData,
 
@@ -156,18 +158,25 @@ readBinIface
 readBinIface profile name_cache checkHiWay traceBinIface hi_path = do
     (src_hash, bh) <- readBinIfaceHeader profile name_cache checkHiWay traceBinIface hi_path
 
-    extFields_p <- get bh
-
-    mod_iface <- getWithUserData name_cache bh
-
-    seekBinReader bh extFields_p
-    extFields <- get bh
+    mod_iface <- getIfaceWithExtFields name_cache bh
 
     return mod_iface
-      { mi_ext_fields = extFields
-      , mi_src_hash = src_hash
+      { mi_src_hash = src_hash
       }
 
+getIfaceWithExtFields :: NameCache -> ReadBinHandle -> IO ModIface
+getIfaceWithExtFields name_cache bh = do
+  extFields_p <- get bh
+
+  mod_iface <- getWithUserData name_cache bh
+
+  seekBinReader bh extFields_p
+  extFields <- get bh
+  pure mod_iface
+    { mi_ext_fields = extFields
+    }
+
+
 -- | This performs a get action after reading the dictionary and symbol
 -- table. It is necessary to run this before trying to deserialise any
 -- Names or FastStrings.
@@ -227,19 +236,17 @@ writeBinIface profile traceBinIface compressionLevel hi_path mod_iface = do
     put_  bh tag
     put_  bh (mi_src_hash mod_iface)
 
-    extFields_p_p <- tellBinWriter bh
-    put_ bh extFields_p_p
-
-    putWithUserData traceBinIface compressionLevel bh mod_iface
-
-    extFields_p <- tellBinWriter bh
-    putAt bh extFields_p_p extFields_p
-    seekBinWriter bh extFields_p
-    put_ bh (mi_ext_fields mod_iface)
+    putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface
 
     -- And send the result to the file
     writeBinMem bh hi_path
 
+-- | Puts the 'ModIface'
+putIfaceWithExtFields :: TraceBinIFace -> CompressionIFace -> WriteBinHandle -> ModIface -> IO ()
+putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface =
+  forwardPut_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
+    putWithUserData traceBinIface compressionLevel bh mod_iface
+
 -- | Put a piece of data with an initialised `UserData` field. This
 -- is necessary if you want to serialise Names or FastStrings.
 -- It also writes a symbol table and the dictionary.


=====================================
compiler/GHC/Iface/Make.hs
=====================================
@@ -69,10 +69,13 @@ import GHC.Types.HpcInfo
 import GHC.Types.CompleteMatch
 import GHC.Types.SourceText
 import GHC.Types.SrcLoc ( unLoc )
+import GHC.Types.Name.Cache
 
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Logger
+import GHC.Utils.Binary
+import GHC.Iface.Binary
 
 import GHC.Data.FastString
 import GHC.Data.Maybe
@@ -147,8 +150,25 @@ mkFullIface hsc_env partial_iface mb_stg_infos mb_cmm_infos = do
     let unit_state = hsc_units hsc_env
     putDumpFileMaybe (hsc_logger hsc_env) Opt_D_dump_hi "FINAL INTERFACE" FormatText
       (pprModIface unit_state full_iface)
+    final_iface <- shareIface (hsc_NC hsc_env) (flagsToIfCompression $ hsc_dflags hsc_env) full_iface
+    return final_iface
+
+shareIface :: NameCache -> CompressionIFace -> ModIface -> IO ModIface
+shareIface _ NormalCompression  mi = pure mi
+shareIface nc compressionLevel  mi = do
+  bh <- openBinMem (1024 * 1024)
+  start <- tellBinWriter bh
+  putIfaceWithExtFields QuietBinIFace compressionLevel bh mi
+  rbh <- shrinkBinBuffer bh
+  seekBinReader rbh start
+  res <- getIfaceWithExtFields nc rbh
+  let resiface = res
+        { mi_src_hash = mi_src_hash mi
+        , mi_globals = mi_globals mi
+        }
+  forceModIface resiface
+  return resiface
 
-    return full_iface
 
 updateDecl :: [IfaceDecl] -> Maybe StgCgInfos -> Maybe CmmCgInfos -> [IfaceDecl]
 updateDecl decls Nothing Nothing = decls


=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -36,6 +36,9 @@ module GHC.Utils.Binary
    tellBinWriter,
    castBin,
    withBinBuffer,
+   freezeWriteHandle,
+   shrinkBinBuffer,
+   thawReadHandle,
 
    foldGet, foldGet',
 
@@ -123,7 +126,7 @@ import Foreign hiding (shiftL, shiftR, void)
 import Data.Array
 import Data.Array.IO
 import Data.Array.Unsafe
-import Data.ByteString (ByteString)
+import Data.ByteString (ByteString, copy)
 import Data.Coerce
 import qualified Data.ByteString.Internal as BS
 import qualified Data.ByteString.Unsafe   as BS
@@ -327,6 +330,44 @@ openBinMem size
     , wbm_arr_r = arr_r
     }
 
+-- | Freeze the given 'WriteBinHandle' and turn it into an equivalent 'ReadBinHandle'.
+--
+-- The current offset of the 'WriteBinHandle' is maintained in the new 'ReadBinHandle'.
+freezeWriteHandle :: WriteBinHandle -> IO ReadBinHandle
+freezeWriteHandle wbm = do
+  rbm_off_r <- newFastMutInt =<< readFastMutInt (wbm_off_r wbm)
+  rbm_sz_r <- readFastMutInt (wbm_sz_r wbm)
+  rbm_arr_r <- readIORef (wbm_arr_r wbm)
+  pure $ ReadBinMem
+    { rbm_userData = noReaderUserData
+    , rbm_off_r = rbm_off_r
+    , rbm_sz_r = rbm_sz_r
+    , rbm_arr_r = rbm_arr_r
+    }
+
+-- Copy the BinBuffer to a new BinBuffer which is exactly the right size.
+-- This performs a copy of the underlying buffer.
+-- The buffer may be truncated if the offset is not at the end of the written
+-- output.
+--
+-- UserData is also discarded during the copy
+-- You should just use this when translating a Put handle into a Get handle.
+shrinkBinBuffer :: WriteBinHandle -> IO ReadBinHandle
+shrinkBinBuffer bh = withBinBuffer bh $ \bs -> do
+  unsafeUnpackBinBuffer (copy bs)
+
+thawReadHandle :: ReadBinHandle -> IO WriteBinHandle
+thawReadHandle rbm = do
+  wbm_off_r <- newFastMutInt =<< readFastMutInt (rbm_off_r rbm)
+  wbm_sz_r <- newFastMutInt (rbm_sz_r rbm)
+  wbm_arr_r <- newIORef (rbm_arr_r rbm)
+  pure $ WriteBinMem
+    { wbm_userData = noWriterUserData
+    , wbm_off_r = wbm_off_r
+    , wbm_sz_r = wbm_sz_r
+    , wbm_arr_r = wbm_arr_r
+    }
+
 tellBinWriter :: WriteBinHandle -> IO (Bin a)
 tellBinWriter (WriteBinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4c11762e1cf44673a5da5fda5653daa402e7a6cb

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4c11762e1cf44673a5da5fda5653daa402e7a6cb
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240502/c0a00092/attachment-0001.html>


More information about the ghc-commits mailing list