[Git][ghc/ghc][wip/fendor/ghc-iface-sharing-avoid-reserialisation] Avoid unneccessarily re-serialising the `ModIface`

Hannes Siebenhandl (@fendor) gitlab at gitlab.haskell.org
Fri Apr 26 13:20:30 UTC 2024



Hannes Siebenhandl pushed to branch wip/fendor/ghc-iface-sharing-avoid-reserialisation at Glasgow Haskell Compiler / GHC


Commits:
21816fcb by Fendor at 2024-04-26T15:19:55+02:00
Avoid unneccessarily re-serialising the `ModIface`

To reduce memory usage of `ModIface`, we serialise `ModIface` to an
in-memory byte array, which implicitly shares duplicated values.

This serailised byte array can be reused to avoid work when we actually
write the `ModIface` to disk.
We introduce a new field to `ModIface` which allows us to save the byte
array, and write it to disk if the `ModIface` wasn't changed after the
initial serialisation.

This requires us to change absolute offsets, for example to jump to the
deduplication table for `Name` or `FastString` with relative offsets, as
the deduplication byte array doesn't contain header information, such as
fingerprints.
To allow us to dump the binary blob to disk, we need to replace all
absolute offsets with relative ones.

This leads to new primitives for `ModIface`, which help to construct
relative offsets.

- - - - -


10 changed files:

- compiler/GHC/Iface/Binary.hs
- compiler/GHC/Iface/Ext/Binary.hs
- compiler/GHC/Iface/Ext/Fields.hs
- compiler/GHC/Iface/Load.hs
- compiler/GHC/Iface/Make.hs
- compiler/GHC/Iface/Recomp.hs
- compiler/GHC/Tc/Gen/Splice.hs
- compiler/GHC/Unit/Module/ModIface.hs
- compiler/GHC/Utils/Binary.hs
- utils/haddock


Changes:

=====================================
compiler/GHC/Iface/Binary.hs
=====================================
@@ -63,6 +63,7 @@ import Data.Map.Strict (Map)
 import Data.Word
 import System.IO.Unsafe
 import Data.Typeable (Typeable)
+import qualified GHC.Data.Strict as Strict
 
 
 -- ---------------------------------------------------------------------------
@@ -166,14 +167,18 @@ readBinIface profile name_cache checkHiWay traceBinIface hi_path = do
 
 getIfaceWithExtFields :: NameCache -> ReadBinHandle -> IO ModIface
 getIfaceWithExtFields name_cache bh = do
-  extFields_p <- get bh
+  start <- tellBinReader bh
+  extFields_p_rel <- getRelBin bh
 
   mod_iface <- getWithUserData name_cache bh
 
-  seekBinReader bh extFields_p
+  seekBinReader bh start
+  seekBinReaderRel bh extFields_p_rel
   extFields <- get bh
+  modIfaceData <- freezeBinHandle2 bh start
   pure mod_iface
     { mi_ext_fields = extFields
+    , mi_hi_bytes = FullIfaceBinHandle $ Strict.Just modIfaceData
     }
 
 
@@ -204,7 +209,7 @@ getTables name_cache bh = do
         -- add it to the 'ReaderUserData' of 'ReadBinHandle'.
         decodeReaderTable :: Typeable a => ReaderTable a -> ReadBinHandle -> IO ReadBinHandle
         decodeReaderTable tbl bh0 = do
-          table <- Binary.forwardGet bh (getTable tbl bh0)
+          table <- Binary.forwardGetRel bh (getTable tbl bh0)
           let binaryReader = mkReaderFromTable tbl table
           pure $ addReaderToUserData binaryReader bh0
 
@@ -244,8 +249,12 @@ writeBinIface profile traceBinIface compressionLevel hi_path mod_iface = do
 -- | Puts the 'ModIface'
 putIfaceWithExtFields :: TraceBinIFace -> CompressionIFace -> WriteBinHandle -> ModIface -> IO ()
 putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface =
-  forwardPut_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
-    putWithUserData traceBinIface compressionLevel bh mod_iface
+  case mi_hi_bytes mod_iface of
+    -- FullIfaceBinHandle _ -> putWithUserData traceBinIface compressionLevel bh mod_iface
+    FullIfaceBinHandle Strict.Nothing -> do
+      forwardPutRel_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
+        putWithUserData traceBinIface compressionLevel bh mod_iface
+    FullIfaceBinHandle (Strict.Just binData) -> putFullBinData bh binData
 
 -- | Put a piece of data with an initialised `UserData` field. This
 -- is necessary if you want to serialise Names or FastStrings.
@@ -316,7 +325,7 @@ putAllTables _ [] act = do
   a <- act
   pure ([], a)
 putAllTables bh (x : xs) act = do
-  (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do
+  (r, (res, a)) <- forwardPutRel bh (const $ putTable x bh) $ do
     putAllTables bh xs act
   pure (r : res, a)
 
@@ -468,7 +477,7 @@ to the table we need to deserialise first.
 What deduplication tables exist and the order of serialisation is currently statically specified
 in 'putWithTables'. 'putWithTables' also takes care of the serialisation of used deduplication tables.
 The deserialisation of the deduplication tables happens 'getTables', using 'Binary' utility
-functions such as 'forwardGet'.
+functions such as 'forwardGetRel'.
 
 Here, a visualisation of the table structure we currently have (ignoring 'ExtensibleFields'):
 
@@ -529,7 +538,6 @@ initWriteIfaceType compressionLevel = do
         putGenericSymTab sym_tab bh ty
       _ -> putIfaceType bh ty
 
-
     fullIfaceTypeSerialiser sym_tab bh ty = do
       put_ bh ifaceTypeSharedByte
       putGenericSymTab sym_tab bh ty


=====================================
compiler/GHC/Iface/Ext/Binary.hs
=====================================
@@ -235,7 +235,7 @@ readHieFileContents bh0 name_cache = do
   get bh1
   where
     get_dictionary tbl bin_handle = do
-      fsTable <- Binary.forwardGet bin_handle (getTable tbl bin_handle)
+      fsTable <- Binary.forwardGetRel bin_handle (getTable tbl bin_handle)
       let
         fsReader = mkReaderFromTable tbl fsTable
         bhFs = addReaderToUserData fsReader bin_handle


=====================================
compiler/GHC/Iface/Ext/Fields.hs
=====================================
@@ -41,7 +41,7 @@ instance Binary ExtensibleFields where
     -- to point to the start of each payload:
     forM_ header_entries $ \(field_p_p, dat) -> do
       field_p <- tellBinWriter bh
-      putAt bh field_p_p field_p
+      putAtRel bh field_p_p field_p
       seekBinWriter bh field_p
       put_ bh dat
 
@@ -50,11 +50,11 @@ instance Binary ExtensibleFields where
 
     -- Get the names and field pointers:
     header_entries <- replicateM n $
-      (,) <$> get bh <*> get bh
+      (,) <$> get bh <*> getRelBin bh
 
     -- Seek to and get each field's payload:
     fields <- forM header_entries $ \(name, field_p) -> do
-      seekBinReader bh field_p
+      seekBinReaderRel bh field_p
       dat <- get bh
       return (name, dat)
 


=====================================
compiler/GHC/Iface/Load.hs
=====================================
@@ -1107,7 +1107,7 @@ pprModIfaceSimple unit_state iface =
 --
 -- The UnitState is used to pretty-print units
 pprModIface :: UnitState -> ModIface -> SDoc
-pprModIface unit_state iface at ModIface{ mi_final_exts = exts }
+pprModIface unit_state iface
  = vcat [ text "interface"
                 <+> ppr (mi_module iface) <+> pp_hsc_src (mi_hsc_src iface)
                 <+> (if mi_orphan exts then text "[orphan module]" else Outputable.empty)
@@ -1148,6 +1148,7 @@ pprModIface unit_state iface at ModIface{ mi_final_exts = exts }
         , text "extensible fields:" $$ nest 2 (pprExtensibleFields (mi_ext_fields iface))
         ]
   where
+    exts = mi_final_exts iface
     pp_hsc_src HsBootFile = text "[boot]"
     pp_hsc_src HsigFile   = text "[hsig]"
     pp_hsc_src HsSrcFile  = Outputable.empty


=====================================
compiler/GHC/Iface/Make.hs
=====================================
@@ -153,17 +153,33 @@ mkFullIface hsc_env partial_iface mb_stg_infos mb_cmm_infos = do
     final_iface <- shareIface (hsc_NC hsc_env) (flagsToIfCompression $ hsc_dflags hsc_env) full_iface
     return final_iface
 
+-- | Compress an 'ModIface' and share as many values as possible, depending on the 'CompressionIFace' level.
+--
+-- We compress the 'ModIface' by serialising the 'ModIface' to an in-memory byte array, and then deserialising it.
+-- The deserialisation will deduplicate certain values depending on the 'CompressionIFace' level.
+-- See Note [Deduplication during iface binary serialisation] for how we do that.
+--
+-- Additionally, we cache the serialised byte array, so if the 'ModIface' is not modified
+-- after calling 'shareIface', 'writeBinIface' will reuse that buffer without serialising the 'ModIface' again.
+-- Modifying the 'ModIface' forces us to re-serialise it again.
 shareIface :: NameCache -> CompressionIFace -> ModIface -> IO ModIface
+shareIface _ NormalCompression mi = do
+  -- In 'NormalCompression', the sharing isn't reducing the memory usage, as 'Name's and 'FastString's are
+  -- already shared, and at this compression level, we don't compress/share anything else.
+  -- Thus, for a brief moment we simply double the memory residency for no reason.
+  -- Therefore, we only try to share expensive values if the compression mode is higher than
+  -- 'NormalCompression'
+  pure mi
 shareIface nc compressionLevel  mi = do
   bh <- openBinMem (1024 * 1024)
   start <- tellBinWriter bh
   putIfaceWithExtFields QuietBinIFace compressionLevel bh mi
-  rbh <- freezeWriteHandle bh
+  rbh <- shrinkBinBuffer bh
   seekBinReader rbh start
   res <- getIfaceWithExtFields nc rbh
   let resiface = res { mi_src_hash = mi_src_hash mi }
   forceModIface  resiface
-  return resiface
+  pure resiface
 
 
 updateDecl :: [IfaceDecl] -> Maybe StgCgInfos -> Maybe CmmCgInfos -> [IfaceDecl]
@@ -318,40 +334,40 @@ mkIface_ hsc_env
         icomplete_matches = map mkIfaceCompleteMatch complete_matches
         !rdrs = maybeGlobalRdrEnv rdr_env
 
-    ModIface {
-          mi_module      = this_mod,
+    emptyPartialModIface this_mod
           -- Need to record this because it depends on the -instantiated-with flag
           -- which could change
-          mi_sig_of      = if semantic_mod == this_mod
+          & set_mi_sig_of      ( if semantic_mod == this_mod
                             then Nothing
-                            else Just semantic_mod,
-          mi_hsc_src     = hsc_src,
-          mi_deps        = deps,
-          mi_usages      = usages,
-          mi_exports     = mkIfaceExports exports,
+                            else Just semantic_mod)
+          & set_mi_hsc_src     ( hsc_src)
+          & set_mi_deps        ( deps)
+          & set_mi_usages      ( usages)
+          & set_mi_exports     ( mkIfaceExports exports)
 
           -- Sort these lexicographically, so that
           -- the result is stable across compilations
-          mi_insts       = sortBy cmp_inst     iface_insts,
-          mi_fam_insts   = sortBy cmp_fam_inst iface_fam_insts,
-          mi_rules       = sortBy cmp_rule     iface_rules,
-
-          mi_fixities    = fixities,
-          mi_warns       = warns,
-          mi_anns        = annotations,
-          mi_globals     = rdrs,
-          mi_used_th     = used_th,
-          mi_decls       = decls,
-          mi_extra_decls = extra_decls,
-          mi_hpc         = isHpcUsed hpc_info,
-          mi_trust       = trust_info,
-          mi_trust_pkg   = pkg_trust_req,
-          mi_complete_matches = icomplete_matches,
-          mi_docs        = docs,
-          mi_final_exts  = (),
-          mi_ext_fields  = emptyExtensibleFields,
-          mi_src_hash = ms_hs_hash mod_summary
-          }
+          & set_mi_insts       ( sortBy cmp_inst     iface_insts)
+          & set_mi_fam_insts   ( sortBy cmp_fam_inst iface_fam_insts)
+          & set_mi_rules       ( sortBy cmp_rule     iface_rules)
+
+          & set_mi_fixities    ( fixities)
+          & set_mi_warns       ( warns)
+          & set_mi_anns        ( annotations)
+          & set_mi_globals     ( rdrs)
+          & set_mi_used_th     ( used_th)
+          & set_mi_decls       ( decls)
+          & set_mi_extra_decls ( extra_decls)
+          & set_mi_hpc         ( isHpcUsed hpc_info)
+          & set_mi_trust       ( trust_info)
+          & set_mi_trust_pkg   ( pkg_trust_req)
+          & set_mi_complete_matches ( icomplete_matches)
+          & set_mi_docs        ( docs)
+          & set_mi_final_exts  ( ())
+          & set_mi_ext_fields  ( emptyExtensibleFields)
+          & set_mi_src_hash ( ms_hs_hash mod_summary)
+          & set_mi_hi_bytes ( PartialIfaceBinHandle)
+
   where
      cmp_rule     = lexicalCompareFS `on` ifRuleName
      -- Compare these lexicographically by OccName, *not* by unique,


=====================================
compiler/GHC/Iface/Recomp.hs
=====================================
@@ -83,6 +83,7 @@ import Data.Ord
 import Data.Containers.ListUtils
 import Data.Bifunctor
 import GHC.Iface.Errors.Ppr
+import qualified GHC.Data.Strict as Strict
 
 {-
   -----------------------------------------------
@@ -1283,7 +1284,9 @@ addFingerprints hsc_env iface0
       , mi_fix_fn         = fix_fn
       , mi_hash_fn        = lookupOccEnv local_env
       }
-    final_iface = iface0 { mi_decls = sorted_decls, mi_extra_decls = sorted_extra_decls, mi_final_exts = final_iface_exts }
+    final_iface = completePartialModIface iface0
+        (sorted_decls) (sorted_extra_decls) (final_iface_exts)
+        (FullIfaceBinHandle Strict.Nothing)
    --
    return final_iface
 


=====================================
compiler/GHC/Tc/Gen/Splice.hs
=====================================
@@ -1562,7 +1562,8 @@ lookupDeclDoc nm = do
       -- Wasn't in the current module. Try searching other external ones!
       mIface <- getExternalModIface nm
       case mIface of
-        Just ModIface { mi_docs = Just Docs{docs_decls = dmap} } ->
+        Just iface
+          | Just Docs{docs_decls = dmap} <- mi_docs iface ->
           pure $ renderHsDocStrings . map hsDocString <$> lookupUniqMap dmap nm
         _ -> pure Nothing
 
@@ -1578,7 +1579,8 @@ lookupArgDoc i nm = do
     Nothing -> do
       mIface <- getExternalModIface nm
       case mIface of
-        Just ModIface { mi_docs = Just Docs{docs_args = amap} } ->
+        Just iface
+          | Just Docs{docs_args = amap} <- mi_docs iface->
           pure $ renderHsDocString . hsDocString <$> (lookupUniqMap amap nm >>= IntMap.lookup i)
         _ -> pure Nothing
 


=====================================
compiler/GHC/Unit/Module/ModIface.hs
=====================================
@@ -7,7 +7,59 @@
 
 module GHC.Unit.Module.ModIface
    ( ModIface
-   , ModIface_ (..)
+   , ModIface_
+   , mi_module
+   , mi_sig_of
+   , mi_hsc_src
+   , mi_src_hash
+   , mi_hi_bytes
+   , mi_deps
+   , mi_usages
+   , mi_exports
+   , mi_used_th
+   , mi_fixities
+   , mi_warns
+   , mi_anns
+   , mi_insts
+   , mi_fam_insts
+   , mi_rules
+   , mi_decls
+   , mi_extra_decls
+   , mi_globals
+   , mi_hpc
+   , mi_trust
+   , mi_trust_pkg
+   , mi_complete_matches
+   , mi_docs
+   , mi_final_exts
+   , mi_ext_fields
+   , set_mi_module
+   , set_mi_sig_of
+   , set_mi_hsc_src
+   , set_mi_src_hash
+   , set_mi_hi_bytes
+   , set_mi_deps
+   , set_mi_usages
+   , set_mi_exports
+   , set_mi_used_th
+   , set_mi_fixities
+   , set_mi_warns
+   , set_mi_anns
+   , set_mi_insts
+   , set_mi_fam_insts
+   , set_mi_rules
+   , set_mi_decls
+   , set_mi_extra_decls
+   , set_mi_globals
+   , set_mi_hpc
+   , set_mi_trust
+   , set_mi_trust_pkg
+   , set_mi_complete_matches
+   , set_mi_docs
+   , set_mi_final_exts
+   , set_mi_ext_fields
+   , completePartialModIface
+   , IfaceBinHandle(..)
    , PartialModIface
    , ModIfaceBackend (..)
    , IfaceDeclExts
@@ -58,6 +110,7 @@ import GHC.Utils.Binary
 
 import Control.DeepSeq
 import Control.Exception
+import qualified GHC.Data.Strict as Strict
 
 {- Note [Interface file stages]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -139,7 +192,9 @@ type family IfaceBackendExts (phase :: ModIfacePhase) = bk | bk -> phase where
   IfaceBackendExts 'ModIfaceCore = ()
   IfaceBackendExts 'ModIfaceFinal = ModIfaceBackend
 
-
+data IfaceBinHandle (phase :: ModIfacePhase) where
+  PartialIfaceBinHandle :: IfaceBinHandle 'ModIfaceCore
+  FullIfaceBinHandle :: Strict.Maybe FullBinData -> IfaceBinHandle 'ModIfaceFinal
 
 -- | A 'ModIface' plus a 'ModDetails' summarises everything we know
 -- about a compiled module.  The 'ModIface' is the stuff *before* linking,
@@ -262,8 +317,9 @@ data ModIface_ (phase :: ModIfacePhase)
                 -- chosen over `ByteString`s.
                 --
 
-        mi_src_hash :: !Fingerprint
+        mi_src_hash :: !Fingerprint,
                 -- ^ Hash of the .hs source, used for recompilation checking.
+        mi_hi_bytes :: !(IfaceBinHandle phase)
      }
 
 {-
@@ -349,6 +405,7 @@ instance Binary ModIface where
                  mi_src_hash = _src_hash, -- Don't `put_` this in the instance
                                           -- because we are going to write it
                                           -- out separately in the actual file
+                 mi_hi_bytes  = _hi_bytes, -- TODO: explain
                  mi_deps      = deps,
                  mi_usages    = usages,
                  mi_exports   = exports,
@@ -449,6 +506,7 @@ instance Binary ModIface where
                  mi_hsc_src     = hsc_src,
                  mi_src_hash = fingerprint0, -- placeholder because this is dealt
                                              -- with specially when the file is read
+                 mi_hi_bytes    = FullIfaceBinHandle Strict.Nothing,
                  mi_deps        = deps,
                  mi_usages      = usages,
                  mi_exports     = exports,
@@ -487,6 +545,7 @@ instance Binary ModIface where
                    mi_hash_fn = mkIfaceHashCache decls
                  }})
 
+
 -- | The original names declared of a certain module that are exported
 type IfaceExport = AvailInfo
 
@@ -496,6 +555,7 @@ emptyPartialModIface mod
                mi_sig_of      = Nothing,
                mi_hsc_src     = HsSrcFile,
                mi_src_hash    = fingerprint0,
+               mi_hi_bytes    = PartialIfaceBinHandle,
                mi_deps        = noDependencies,
                mi_usages      = [],
                mi_exports     = [],
@@ -522,6 +582,7 @@ emptyFullModIface :: Module -> ModIface
 emptyFullModIface mod =
     (emptyPartialModIface mod)
       { mi_decls = []
+      , mi_hi_bytes = FullIfaceBinHandle Strict.Nothing
       , mi_final_exts = ModIfaceBackend
         { mi_iface_hash = fingerprint0,
           mi_mod_hash = fingerprint0,
@@ -626,5 +687,97 @@ type WhetherHasOrphans   = Bool
 -- | Does this module define family instances?
 type WhetherHasFamInst = Bool
 
+completePartialModIface :: PartialModIface
+  -> [(Fingerprint, IfaceDecl)]
+  -> Maybe [IfaceBindingX IfaceMaybeRhs IfaceTopBndrInfo]
+  -> ModIfaceBackend
+  -> IfaceBinHandle 'ModIfaceFinal
+  -> ModIface
+completePartialModIface partial decls extra_decls final_exts hi_bytes = partial
+  { mi_decls = decls
+  , mi_extra_decls = extra_decls
+  , mi_final_exts = final_exts
+  , mi_hi_bytes = hi_bytes
+  }
+
+set_mi_module :: Module -> ModIface_ phase -> ModIface_ phase
+set_mi_module val iface = clear_mi_hi_bytes $ iface { mi_module = val }
+
+set_mi_sig_of :: Maybe Module -> ModIface_ phase -> ModIface_ phase
+set_mi_sig_of val iface = clear_mi_hi_bytes $ iface { mi_sig_of = val }
+
+set_mi_hsc_src :: HscSource -> ModIface_ phase -> ModIface_ phase
+set_mi_hsc_src val iface = clear_mi_hi_bytes $ iface { mi_hsc_src = val }
+
+set_mi_src_hash :: Fingerprint -> ModIface_ phase -> ModIface_ phase
+set_mi_src_hash val iface = clear_mi_hi_bytes $ iface { mi_src_hash = val }
+
+set_mi_hi_bytes :: IfaceBinHandle phase -> ModIface_ phase -> ModIface_ phase
+set_mi_hi_bytes val iface = iface { mi_hi_bytes = val }
+
+set_mi_deps :: Dependencies -> ModIface_ phase -> ModIface_ phase
+set_mi_deps val iface = clear_mi_hi_bytes $ iface { mi_deps = val }
+
+set_mi_usages :: [Usage] -> ModIface_ phase -> ModIface_ phase
+set_mi_usages val iface = clear_mi_hi_bytes $ iface { mi_usages = val }
+
+set_mi_exports :: [IfaceExport] -> ModIface_ phase -> ModIface_ phase
+set_mi_exports val iface = clear_mi_hi_bytes $ iface { mi_exports = val }
+
+set_mi_used_th :: Bool -> ModIface_ phase -> ModIface_ phase
+set_mi_used_th val iface = clear_mi_hi_bytes $ iface { mi_used_th = val }
+
+set_mi_fixities :: [(OccName, Fixity)] -> ModIface_ phase -> ModIface_ phase
+set_mi_fixities val iface = clear_mi_hi_bytes $ iface { mi_fixities = val }
+
+set_mi_warns :: IfaceWarnings -> ModIface_ phase -> ModIface_ phase
+set_mi_warns val iface = clear_mi_hi_bytes $ iface { mi_warns = val }
 
+set_mi_anns :: [IfaceAnnotation] -> ModIface_ phase -> ModIface_ phase
+set_mi_anns val iface = clear_mi_hi_bytes $ iface { mi_anns = val }
 
+set_mi_insts :: [IfaceClsInst] -> ModIface_ phase -> ModIface_ phase
+set_mi_insts val iface = clear_mi_hi_bytes $ iface { mi_insts = val }
+
+set_mi_fam_insts :: [IfaceFamInst] -> ModIface_ phase -> ModIface_ phase
+set_mi_fam_insts val iface = clear_mi_hi_bytes $ iface { mi_fam_insts = val }
+
+set_mi_rules :: [IfaceRule] -> ModIface_ phase -> ModIface_ phase
+set_mi_rules val iface = clear_mi_hi_bytes $ iface { mi_rules = val }
+
+set_mi_decls :: [IfaceDeclExts phase] -> ModIface_ phase -> ModIface_ phase
+set_mi_decls val iface = clear_mi_hi_bytes $ iface { mi_decls = val }
+
+set_mi_extra_decls :: Maybe [IfaceBindingX IfaceMaybeRhs IfaceTopBndrInfo] -> ModIface_ phase -> ModIface_ phase
+set_mi_extra_decls val iface = clear_mi_hi_bytes $ iface { mi_extra_decls = val }
+
+set_mi_globals :: Maybe IfGlobalRdrEnv -> ModIface_ phase -> ModIface_ phase
+set_mi_globals val iface = clear_mi_hi_bytes $ iface { mi_globals = val }
+
+set_mi_hpc :: AnyHpcUsage -> ModIface_ phase -> ModIface_ phase
+set_mi_hpc val iface = clear_mi_hi_bytes $ iface { mi_hpc = val }
+
+set_mi_trust :: IfaceTrustInfo -> ModIface_ phase -> ModIface_ phase
+set_mi_trust val iface = clear_mi_hi_bytes $ iface { mi_trust = val }
+
+set_mi_trust_pkg :: Bool -> ModIface_ phase -> ModIface_ phase
+set_mi_trust_pkg val iface = clear_mi_hi_bytes $ iface { mi_trust_pkg = val }
+
+set_mi_complete_matches :: [IfaceCompleteMatch] -> ModIface_ phase -> ModIface_ phase
+set_mi_complete_matches val iface = clear_mi_hi_bytes $ iface { mi_complete_matches = val }
+
+set_mi_docs :: Maybe Docs -> ModIface_ phase -> ModIface_ phase
+set_mi_docs val iface = clear_mi_hi_bytes $  iface { mi_docs = val }
+
+set_mi_final_exts :: IfaceBackendExts phase -> ModIface_ phase -> ModIface_ phase
+set_mi_final_exts val iface = clear_mi_hi_bytes $ iface { mi_final_exts = val }
+
+set_mi_ext_fields :: ExtensibleFields -> ModIface_ phase -> ModIface_ phase
+set_mi_ext_fields val iface = clear_mi_hi_bytes $ iface { mi_ext_fields = val }
+
+clear_mi_hi_bytes :: ModIface_ phase -> ModIface_ phase
+clear_mi_hi_bytes iface = iface
+  { mi_hi_bytes = case mi_hi_bytes iface of
+      PartialIfaceBinHandle -> PartialIfaceBinHandle
+      FullIfaceBinHandle _ -> FullIfaceBinHandle Strict.Nothing
+  }


=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -4,7 +4,6 @@
 {-# LANGUAGE UnboxedTuples #-}
 
 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-{-# LANGUAGE TypeFamilies #-}
 -- We always optimise this, otherwise performance of a non-optimised
 -- compiler is severely affected
 
@@ -20,7 +19,7 @@
 --     http://www.cs.york.ac.uk/fp/nhc98/
 
 module GHC.Utils.Binary
-  ( {-type-}  Bin,
+  ( {-type-}  Bin, RelBin(..), getRelBin,
     {-class-} Binary(..),
     {-type-}  ReadBinHandle, WriteBinHandle,
     SymbolTable, Dictionary,
@@ -33,6 +32,7 @@ module GHC.Utils.Binary
 
    seekBinWriter,
    seekBinReader,
+   seekBinReaderRel,
    tellBinReader,
    tellBinWriter,
    castBin,
@@ -47,7 +47,9 @@ module GHC.Utils.Binary
    readBinMemN,
 
    putAt, getAt,
+   putAtRel,
    forwardPut, forwardPut_, forwardGet,
+   forwardPutRel, forwardPutRel_, forwardGetRel,
 
    -- * For writing instances
    putByte,
@@ -102,6 +104,10 @@ module GHC.Utils.Binary
    BindingName(..),
    simpleBindingNameWriter,
    simpleBindingNameReader,
+   FullBinData(..), freezeBinHandle, thawBinHandle, putFullBinData,
+   shrinkBinBuffer,
+   freezeBinHandle2,
+   BinArray,
   ) where
 
 import GHC.Prelude
@@ -126,7 +132,7 @@ import Foreign hiding (shiftL, shiftR, void)
 import Data.Array
 import Data.Array.IO
 import Data.Array.Unsafe
-import Data.ByteString (ByteString)
+import Data.ByteString (ByteString, copy)
 import Data.Coerce
 import qualified Data.ByteString.Internal as BS
 import qualified Data.ByteString.Unsafe   as BS
@@ -156,7 +162,6 @@ import GHC.ForeignPtr           ( unsafeWithForeignPtr )
 import Unsafe.Coerce (unsafeCoerce)
 
 import GHC.Data.TrieMap
-
 type BinArray = ForeignPtr Word8
 
 #if !MIN_VERSION_base(4,15,0)
@@ -196,6 +201,63 @@ dataHandle (BinData size bin) = do
 handleData :: WriteBinHandle -> IO BinData
 handleData (WriteBinMem _ ixr _ binr) = BinData <$> readFastMutInt ixr <*> readIORef binr
 
+---------------------------------------------------------------
+-- FullBinData
+---------------------------------------------------------------
+
+data FullBinData = FullBinData
+  { fbd_readerUserData :: ReaderUserData
+  , fbd_off_s :: {-# UNPACK #-} !Int
+  -- ^ start offset
+  , fbd_off_e :: {-# UNPACK #-} !Int
+  -- ^ end offset
+  , fbd_size :: {-# UNPACK #-} !Int
+  -- ^ total buffer size
+  , fbd_buffer :: {-# UNPACK #-} !BinArray
+  }
+
+-- Equality and Ord assume that two distinct buffers are different, even if they compare the same things.
+instance Eq FullBinData where
+  (FullBinData _ b c d e) == (FullBinData _ b1 c1 d1 e1) = b == b1 && c == c1 && d == d1 && e == e1
+
+instance Ord FullBinData where
+  compare (FullBinData _ b c d e) (FullBinData _ b1 c1 d1 e1) =
+    compare b b1 `mappend` compare c c1 `mappend` compare d d1 `mappend` compare e e1
+
+putFullBinData :: WriteBinHandle -> FullBinData -> IO ()
+putFullBinData bh (FullBinData _ o1 o2 _sz ba) = do
+  let sz = o2 - o1
+  putPrim bh sz $ \dest ->
+    unsafeWithForeignPtr (ba `plusForeignPtr` o1) $ \orig ->
+    copyBytes dest orig sz
+
+freezeBinHandle :: Bin () -> ReadBinHandle -> IO FullBinData
+freezeBinHandle (BinPtr len) (ReadBinMem user_data ixr sz binr) = do
+  ix <- readFastMutInt ixr
+  pure (FullBinData user_data ix len sz binr)
+
+freezeBinHandle2 :: ReadBinHandle -> Bin () -> IO FullBinData
+freezeBinHandle2 (ReadBinMem user_data ixr sz binr) (BinPtr start) = do
+  ix <- readFastMutInt ixr
+  pure (FullBinData user_data start ix sz binr)
+
+thawBinHandle :: FullBinData -> IO ReadBinHandle
+thawBinHandle (FullBinData user_data ix _end sz ba) = do
+  ixr <- newFastMutInt ix
+  return $ ReadBinMem user_data ixr sz ba
+
+-- Copy the BinBuffer to a new BinBuffer which is exactly the right size.
+-- This performs a copy of the underlying buffer.
+-- The buffer may be truncated if the offset is not at the end of the written
+-- output.
+--
+-- UserData is also discarded during the copy
+-- You should just use this when translating a Put handle into a Get handle.
+shrinkBinBuffer :: WriteBinHandle -> IO ReadBinHandle
+shrinkBinBuffer bh = withBinBuffer bh $ \bs -> do
+  unsafeUnpackBinBuffer (copy bs)
+
+
 ---------------------------------------------------------------
 -- BinHandle
 ---------------------------------------------------------------
@@ -289,9 +351,30 @@ unsafeUnpackBinBuffer (BS.BS arr len) = do
 newtype Bin a = BinPtr Int
   deriving (Eq, Ord, Show, Bounded)
 
+data RelBin a = RelBin !(Bin a) !(Bin a)
+  deriving (Eq, Ord, Show, Bounded)
+
+newtype RelBinPtr a = RelBinPtr (Bin a)
+
 castBin :: Bin a -> Bin b
 castBin (BinPtr i) = BinPtr i
 
+getRelBin :: ReadBinHandle -> IO (RelBin a)
+getRelBin bh = do
+  start <- tellBinReader bh
+  off <- get bh
+  pure $ RelBin start off
+
+makeAbsoluteBin ::  RelBin a -> Bin a
+makeAbsoluteBin (RelBin (BinPtr !start) (BinPtr !offset)) = BinPtr (start + offset)
+
+makeRelativeBin :: RelBin a -> RelBinPtr a
+makeRelativeBin (RelBin _ offset) = RelBinPtr offset
+
+toRelBin :: Bin (RelBinPtr a) -> Bin a -> RelBin a
+toRelBin (BinPtr !start) (BinPtr !goal) =
+  RelBin (BinPtr start) (BinPtr $! goal - start)
+
 ---------------------------------------------------------------
 -- class Binary
 ---------------------------------------------------------------
@@ -312,6 +395,9 @@ class Binary a where
 putAt  :: Binary a => WriteBinHandle -> Bin a -> a -> IO ()
 putAt bh p x = do seekBinWriter bh p; put_ bh x; return ()
 
+putAtRel :: WriteBinHandle -> Bin (RelBinPtr a) -> Bin a -> IO ()
+putAtRel bh from to = putAt bh from (makeRelativeBin $ toRelBin from to)
+
 getAt  :: Binary a => ReadBinHandle -> Bin a -> IO a
 getAt bh p = do seekBinReader bh p; get bh
 
@@ -382,12 +468,18 @@ seekBinNoExpandWriter (WriteBinMem _ ix_r sz_r _) (BinPtr !p) = do
         else writeFastMutInt ix_r p
 
 -- | SeekBin but without calling expandBin
-seekBinReader :: ReadBinHandle -> Bin a -> IO ()
+seekBinReader :: HasCallStack => ReadBinHandle -> Bin a -> IO ()
 seekBinReader (ReadBinMem _ ix_r sz_r _) (BinPtr !p) = do
   if (p > sz_r)
         then panic "seekBinReader: seek out of range"
         else writeFastMutInt ix_r p
 
+seekBinReaderRel :: HasCallStack => ReadBinHandle -> RelBin a -> IO ()
+seekBinReaderRel (ReadBinMem _ ix_r sz_r _) (RelBin (BinPtr !start) (BinPtr !offset)) = do
+  if (start + offset > sz_r)
+        then panic "seekBinReaderRel: seek out of range"
+        else writeFastMutInt ix_r (start + offset)
+
 writeBinMem :: WriteBinHandle -> FilePath -> IO ()
 writeBinMem (WriteBinMem _ ix_r _ arr_r) fn = do
   h <- openBinaryFile fn WriteMode
@@ -1108,6 +1200,11 @@ instance Binary (Bin a) where
   put_ bh (BinPtr i) = putWord32 bh (fromIntegral i :: Word32)
   get bh = do i <- getWord32 bh; return (BinPtr (fromIntegral (i :: Word32)))
 
+-- Instance uses fixed-width encoding to allow inserting
+-- Bin placeholders in the stream.
+instance Binary (RelBinPtr a) where
+  put_ bh (RelBinPtr i) = put_ bh i
+  get bh = RelBinPtr <$> get bh
 
 -- -----------------------------------------------------------------------------
 -- Forward reading/writing
@@ -1136,7 +1233,7 @@ forwardPut_ :: WriteBinHandle -> (b -> IO a) -> IO b -> IO ()
 forwardPut_ bh put_A put_B = void $ forwardPut bh put_A put_B
 
 -- | Read a value stored using a forward reference
-forwardGet :: ReadBinHandle -> IO a -> IO a
+forwardGet :: HasCallStack => ReadBinHandle -> IO a -> IO a
 forwardGet bh get_A = do
     -- read forward reference
     p <- get bh -- a BinPtr
@@ -1148,6 +1245,43 @@ forwardGet bh get_A = do
     seekBinReader bh p_a
     pure r
 
+
+-- | "forwardPutRel put_A put_B" outputs A after B but allows A to be read before B
+-- by using a forward reference.
+forwardPutRel :: WriteBinHandle -> (b -> IO a) -> IO b -> IO (a,b)
+forwardPutRel bh put_A put_B = do
+  -- write placeholder pointer to A
+  pre_a <- tellBinWriter bh
+  put_ bh pre_a
+
+  -- write B
+  r_b <- put_B
+
+  -- update A's pointer
+  a <- tellBinWriter bh
+  putAtRel bh pre_a a
+  seekBinNoExpandWriter bh a
+
+  -- write A
+  r_a <- put_A r_b
+  pure (r_a,r_b)
+
+forwardPutRel_ :: WriteBinHandle -> (b -> IO a) -> IO b -> IO ()
+forwardPutRel_ bh put_A put_B = void $ forwardPutRel bh put_A put_B
+
+-- | Read a value stored using a forward reference
+forwardGetRel :: ReadBinHandle -> IO a -> IO a
+forwardGetRel bh get_A = do
+    -- read forward reference
+    p <- getRelBin bh
+    -- store current position
+    p_a <- tellBinReader bh
+    -- go read the forward value, then seek back
+    seekBinReader bh $ makeAbsoluteBin p
+    r <- get_A
+    seekBinReader bh p_a
+    pure r
+
 -- -----------------------------------------------------------------------------
 -- Lazy reading/writing
 
@@ -1157,19 +1291,19 @@ lazyPut = lazyPut' put_
 lazyGet :: Binary a => ReadBinHandle -> IO a
 lazyGet = lazyGet' get
 
-lazyPut' :: HasDebugCallStack => (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO ()
+lazyPut' :: (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO ()
 lazyPut' f bh a = do
     -- output the obj with a ptr to skip over it:
     pre_a <- tellBinWriter bh
     put_ bh pre_a       -- save a slot for the ptr
     f bh a           -- dump the object
     q <- tellBinWriter bh     -- q = ptr to after object
-    putAt bh pre_a q    -- fill in slot before a with ptr to q
+    putAtRel bh pre_a q    -- fill in slot before a with ptr to q
     seekBinWriter bh q        -- finally carry on writing at q
 
 lazyGet' :: HasDebugCallStack => (ReadBinHandle -> IO a) -> ReadBinHandle -> IO a
 lazyGet' f bh = do
-    p <- get bh -- a BinPtr
+    p <- getRelBin bh -- a BinPtr
     p_a <- tellBinReader bh
     a <- unsafeInterleaveIO $ do
         -- NB: Use a fresh rbm_off_r variable in the child thread, for thread
@@ -1178,7 +1312,7 @@ lazyGet' f bh = do
         let bh' = bh { rbm_off_r = off_r }
         seekBinReader bh' p_a
         f bh'
-    seekBinReader bh p -- skip over the object for now
+    seekBinReader bh (makeAbsoluteBin p) -- skip over the object for now
     return a
 
 -- | Serialize the constructor strictly but lazily serialize a value inside a
@@ -1472,13 +1606,13 @@ putGenericSymbolTable gen_sym_tab serialiser bh = do
                 mapM_ (\n -> serialiser bh n) (reverse todo)
                 loop
       snd <$>
-        (forwardPut bh (const $ readFastMutInt symtab_next >>= put_ bh) $
+        (forwardPutRel bh (const $ readFastMutInt symtab_next >>= put_ bh) $
           loop)
 
 -- | Read the elements of a 'GenericSymbolTable' from disk into a 'SymbolTable'.
 getGenericSymbolTable :: forall a . (ReadBinHandle -> IO a) -> ReadBinHandle -> IO (SymbolTable a)
 getGenericSymbolTable deserialiser bh = do
-  sz <- forwardGet bh (get bh) :: IO Int
+  sz <- forwardGetRel bh (get bh) :: IO Int
   mut_arr <- newArray_ (0, sz-1) :: IO (IOArray Int a)
   forM_ [0..(sz-1)] $ \i -> do
     f <- deserialiser bh


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit fa76e1ee98906f5bc8fc4598524610020b653412
+Subproject commit e9eee9ea56dd21f22fff70106a1289d7f35440a3



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/21816fcb43f9ba80413a58febf1c9e4406d94aae

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/21816fcb43f9ba80413a58febf1c9e4406d94aae
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240426/8944dcf5/attachment-0001.html>


More information about the ghc-commits mailing list