[Git][ghc/ghc][wip/fendor/ghc-iface-sharing] Improve sharing of duplicated values in `ModIface`
Hannes Siebenhandl (@fendor)
gitlab at gitlab.haskell.org
Thu May 2 08:11:10 UTC 2024
Hannes Siebenhandl pushed to branch wip/fendor/ghc-iface-sharing at Glasgow Haskell Compiler / GHC
Commits:
4c11762e by Fendor at 2024-05-02T10:10:40+02:00
Improve sharing of duplicated values in `ModIface`
As a `ModIface` contains often duplicated values that are not
necessarily shared, we improve sharing by serialising the `ModIface`
to an in-memory byte array. Serialisation uses deduplication tables, and
deserialisation implicitly shares duplicated values.
This helps reducing the peak memory usage while compiling in
`--make` mode. The peak memory usage is especially reduced when
generating interface files with core expressions
(`-fwrite-if-simplified-core`).
On agda, this reduces the peak memory usage:
* `2.2 GB` to `1.9 GB` for a ghci session.
On `lib:Cabal`, we report:
* `570 MB` to `500 MB` for a ghci session
* `790 MB` to `667 MB` for compiling `lib:Cabal` with ghc
The execution time is not affected.
- - - - -
4 changed files:
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Iface/Binary.hs
- compiler/GHC/Iface/Make.hs
- compiler/GHC/Utils/Binary.hs
Changes:
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -966,10 +966,11 @@ loadByteCode iface mod_sum = do
--------------------------------------------------------------
+
-- Knot tying! See Note [Knot-tying typecheckIface]
-- See Note [ModDetails and --make mode]
initModDetails :: HscEnv -> ModIface -> IO ModDetails
-initModDetails hsc_env iface =
+initModDetails hsc_env iface = do
fixIO $ \details' -> do
let act hpt = addToHpt hpt (moduleName $ mi_module iface)
(HomeModInfo iface details' emptyHomeModInfoLinkable)
=====================================
compiler/GHC/Iface/Binary.hs
=====================================
@@ -18,6 +18,8 @@ module GHC.Iface.Binary (
getSymtabName,
CheckHiWay(..),
TraceBinIFace(..),
+ getIfaceWithExtFields,
+ putIfaceWithExtFields,
getWithUserData,
putWithUserData,
@@ -156,18 +158,25 @@ readBinIface
readBinIface profile name_cache checkHiWay traceBinIface hi_path = do
(src_hash, bh) <- readBinIfaceHeader profile name_cache checkHiWay traceBinIface hi_path
- extFields_p <- get bh
-
- mod_iface <- getWithUserData name_cache bh
-
- seekBinReader bh extFields_p
- extFields <- get bh
+ mod_iface <- getIfaceWithExtFields name_cache bh
return mod_iface
- { mi_ext_fields = extFields
- , mi_src_hash = src_hash
+ { mi_src_hash = src_hash
}
+getIfaceWithExtFields :: NameCache -> ReadBinHandle -> IO ModIface
+getIfaceWithExtFields name_cache bh = do
+ extFields_p <- get bh
+
+ mod_iface <- getWithUserData name_cache bh
+
+ seekBinReader bh extFields_p
+ extFields <- get bh
+ pure mod_iface
+ { mi_ext_fields = extFields
+ }
+
+
-- | This performs a get action after reading the dictionary and symbol
-- table. It is necessary to run this before trying to deserialise any
-- Names or FastStrings.
@@ -227,19 +236,17 @@ writeBinIface profile traceBinIface compressionLevel hi_path mod_iface = do
put_ bh tag
put_ bh (mi_src_hash mod_iface)
- extFields_p_p <- tellBinWriter bh
- put_ bh extFields_p_p
-
- putWithUserData traceBinIface compressionLevel bh mod_iface
-
- extFields_p <- tellBinWriter bh
- putAt bh extFields_p_p extFields_p
- seekBinWriter bh extFields_p
- put_ bh (mi_ext_fields mod_iface)
+ putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface
-- And send the result to the file
writeBinMem bh hi_path
+-- | Puts the 'ModIface'
+putIfaceWithExtFields :: TraceBinIFace -> CompressionIFace -> WriteBinHandle -> ModIface -> IO ()
+putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface =
+ forwardPut_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
+ putWithUserData traceBinIface compressionLevel bh mod_iface
+
-- | Put a piece of data with an initialised `UserData` field. This
-- is necessary if you want to serialise Names or FastStrings.
-- It also writes a symbol table and the dictionary.
=====================================
compiler/GHC/Iface/Make.hs
=====================================
@@ -69,10 +69,13 @@ import GHC.Types.HpcInfo
import GHC.Types.CompleteMatch
import GHC.Types.SourceText
import GHC.Types.SrcLoc ( unLoc )
+import GHC.Types.Name.Cache
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Logger
+import GHC.Utils.Binary
+import GHC.Iface.Binary
import GHC.Data.FastString
import GHC.Data.Maybe
@@ -147,8 +150,25 @@ mkFullIface hsc_env partial_iface mb_stg_infos mb_cmm_infos = do
let unit_state = hsc_units hsc_env
putDumpFileMaybe (hsc_logger hsc_env) Opt_D_dump_hi "FINAL INTERFACE" FormatText
(pprModIface unit_state full_iface)
+ final_iface <- shareIface (hsc_NC hsc_env) (flagsToIfCompression $ hsc_dflags hsc_env) full_iface
+ return final_iface
+
+shareIface :: NameCache -> CompressionIFace -> ModIface -> IO ModIface
+shareIface _ NormalCompression mi = pure mi
+shareIface nc compressionLevel mi = do
+ bh <- openBinMem (1024 * 1024)
+ start <- tellBinWriter bh
+ putIfaceWithExtFields QuietBinIFace compressionLevel bh mi
+ rbh <- shrinkBinBuffer bh
+ seekBinReader rbh start
+ res <- getIfaceWithExtFields nc rbh
+ let resiface = res
+ { mi_src_hash = mi_src_hash mi
+ , mi_globals = mi_globals mi
+ }
+ forceModIface resiface
+ return resiface
- return full_iface
updateDecl :: [IfaceDecl] -> Maybe StgCgInfos -> Maybe CmmCgInfos -> [IfaceDecl]
updateDecl decls Nothing Nothing = decls
=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -36,6 +36,9 @@ module GHC.Utils.Binary
tellBinWriter,
castBin,
withBinBuffer,
+ freezeWriteHandle,
+ shrinkBinBuffer,
+ thawReadHandle,
foldGet, foldGet',
@@ -123,7 +126,7 @@ import Foreign hiding (shiftL, shiftR, void)
import Data.Array
import Data.Array.IO
import Data.Array.Unsafe
-import Data.ByteString (ByteString)
+import Data.ByteString (ByteString, copy)
import Data.Coerce
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
@@ -327,6 +330,44 @@ openBinMem size
, wbm_arr_r = arr_r
}
+-- | Freeze the given 'WriteBinHandle' and turn it into an equivalent 'ReadBinHandle'.
+--
+-- The current offset of the 'WriteBinHandle' is maintained in the new 'ReadBinHandle'.
+freezeWriteHandle :: WriteBinHandle -> IO ReadBinHandle
+freezeWriteHandle wbm = do
+ rbm_off_r <- newFastMutInt =<< readFastMutInt (wbm_off_r wbm)
+ rbm_sz_r <- readFastMutInt (wbm_sz_r wbm)
+ rbm_arr_r <- readIORef (wbm_arr_r wbm)
+ pure $ ReadBinMem
+ { rbm_userData = noReaderUserData
+ , rbm_off_r = rbm_off_r
+ , rbm_sz_r = rbm_sz_r
+ , rbm_arr_r = rbm_arr_r
+ }
+
+-- Copy the BinBuffer to a new BinBuffer which is exactly the right size.
+-- This performs a copy of the underlying buffer.
+-- The buffer may be truncated if the offset is not at the end of the written
+-- output.
+--
+-- UserData is also discarded during the copy
+-- You should just use this when translating a Put handle into a Get handle.
+shrinkBinBuffer :: WriteBinHandle -> IO ReadBinHandle
+shrinkBinBuffer bh = withBinBuffer bh $ \bs -> do
+ unsafeUnpackBinBuffer (copy bs)
+
+thawReadHandle :: ReadBinHandle -> IO WriteBinHandle
+thawReadHandle rbm = do
+ wbm_off_r <- newFastMutInt =<< readFastMutInt (rbm_off_r rbm)
+ wbm_sz_r <- newFastMutInt (rbm_sz_r rbm)
+ wbm_arr_r <- newIORef (rbm_arr_r rbm)
+ pure $ WriteBinMem
+ { wbm_userData = noWriterUserData
+ , wbm_off_r = wbm_off_r
+ , wbm_sz_r = wbm_sz_r
+ , wbm_arr_r = wbm_arr_r
+ }
+
tellBinWriter :: WriteBinHandle -> IO (Bin a)
tellBinWriter (WriteBinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4c11762e1cf44673a5da5fda5653daa402e7a6cb
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4c11762e1cf44673a5da5fda5653daa402e7a6cb
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240502/c0a00092/attachment-0001.html>
More information about the ghc-commits
mailing list