[Git][ghc/ghc][wip/fendor/ghc-iface-sharing-avoid-reserialisation] Reuse the 'ReadBinMem' after sharing to avoid recomputations

Hannes Siebenhandl (@fendor) gitlab at gitlab.haskell.org
Wed Apr 24 11:23:56 UTC 2024



Hannes Siebenhandl pushed to branch wip/fendor/ghc-iface-sharing-avoid-reserialisation at Glasgow Haskell Compiler / GHC


Commits:
e534e60d by Fendor at 2024-04-24T13:23:40+02:00
Reuse the 'ReadBinMem' after sharing to avoid recomputations

- - - - -


7 changed files:

- compiler/GHC/Iface/Binary.hs
- compiler/GHC/Iface/Load.hs
- compiler/GHC/Iface/Make.hs
- compiler/GHC/Iface/Recomp.hs
- compiler/GHC/Tc/Gen/Splice.hs
- compiler/GHC/Unit/Module/ModIface.hs
- compiler/GHC/Utils/Binary.hs


Changes:

=====================================
compiler/GHC/Iface/Binary.hs
=====================================
@@ -63,6 +63,7 @@ import Data.Map.Strict (Map)
 import Data.Word
 import System.IO.Unsafe
 import Data.Typeable (Typeable)
+import qualified GHC.Data.Strict as Strict
 
 
 -- ---------------------------------------------------------------------------
@@ -166,14 +167,18 @@ readBinIface profile name_cache checkHiWay traceBinIface hi_path = do
 
 getIfaceWithExtFields :: NameCache -> ReadBinHandle -> IO ModIface
 getIfaceWithExtFields name_cache bh = do
-  extFields_p <- get bh
+  start <- tellBinReader bh
+  extFields_p_rel <- getRelBin bh
 
   mod_iface <- getWithUserData name_cache bh
 
-  seekBinReader bh extFields_p
+  seekBinReader bh start
+  seekBinReaderRel bh extFields_p_rel
   extFields <- get bh
+  modIfaceData <- freezeBinHandle2 bh start
   pure mod_iface
     { mi_ext_fields = extFields
+    , mi_hi_bytes = FullIfaceBinHandle $ Strict.Just modIfaceData
     }
 
 
@@ -204,7 +209,7 @@ getTables name_cache bh = do
         -- add it to the 'ReaderUserData' of 'ReadBinHandle'.
         decodeReaderTable :: Typeable a => ReaderTable a -> ReadBinHandle -> IO ReadBinHandle
         decodeReaderTable tbl bh0 = do
-          table <- Binary.forwardGet bh (getTable tbl bh0)
+          table <- Binary.forwardGetRel bh (getTable tbl bh0)
           let binaryReader = mkReaderFromTable tbl table
           pure $ addReaderToUserData binaryReader bh0
 
@@ -244,8 +249,12 @@ writeBinIface profile traceBinIface compressionLevel hi_path mod_iface = do
 -- | Puts the 'ModIface'
 putIfaceWithExtFields :: TraceBinIFace -> CompressionIFace -> WriteBinHandle -> ModIface -> IO ()
 putIfaceWithExtFields traceBinIface compressionLevel bh mod_iface =
-  forwardPut_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
-    putWithUserData traceBinIface compressionLevel bh mod_iface
+  case mi_hi_bytes mod_iface of
+    -- FullIfaceBinHandle _ -> putWithUserData traceBinIface compressionLevel bh mod_iface
+    FullIfaceBinHandle Strict.Nothing -> do
+      forwardPutRel_ bh (\_ -> put_ bh (mi_ext_fields mod_iface)) $ do
+        putWithUserData traceBinIface compressionLevel bh mod_iface
+    FullIfaceBinHandle (Strict.Just binData) -> putFullBinData bh binData
 
 -- | Put a piece of data with an initialised `UserData` field. This
 -- is necessary if you want to serialise Names or FastStrings.
@@ -316,7 +325,7 @@ putAllTables _ [] act = do
   a <- act
   pure ([], a)
 putAllTables bh (x : xs) act = do
-  (r, (res, a)) <- forwardPut bh (const $ putTable x bh) $ do
+  (r, (res, a)) <- forwardPutRel bh (const $ putTable x bh) $ do
     putAllTables bh xs act
   pure (r : res, a)
 
@@ -529,7 +538,6 @@ initWriteIfaceType compressionLevel = do
         putGenericSymTab sym_tab bh ty
       _ -> putIfaceType bh ty
 
-
     fullIfaceTypeSerialiser sym_tab bh ty = do
       put_ bh ifaceTypeSharedByte
       putGenericSymTab sym_tab bh ty


=====================================
compiler/GHC/Iface/Load.hs
=====================================
@@ -1107,7 +1107,7 @@ pprModIfaceSimple unit_state iface =
 --
 -- The UnitState is used to pretty-print units
 pprModIface :: UnitState -> ModIface -> SDoc
-pprModIface unit_state iface at ModIface{ mi_final_exts = exts }
+pprModIface unit_state iface
  = vcat [ text "interface"
                 <+> ppr (mi_module iface) <+> pp_hsc_src (mi_hsc_src iface)
                 <+> (if mi_orphan exts then text "[orphan module]" else Outputable.empty)
@@ -1148,6 +1148,7 @@ pprModIface unit_state iface at ModIface{ mi_final_exts = exts }
         , text "extensible fields:" $$ nest 2 (pprExtensibleFields (mi_ext_fields iface))
         ]
   where
+    exts = mi_final_exts iface
     pp_hsc_src HsBootFile = text "[boot]"
     pp_hsc_src HsigFile   = text "[hsig]"
     pp_hsc_src HsSrcFile  = Outputable.empty


=====================================
compiler/GHC/Iface/Make.hs
=====================================
@@ -153,17 +153,33 @@ mkFullIface hsc_env partial_iface mb_stg_infos mb_cmm_infos = do
     final_iface <- shareIface (hsc_NC hsc_env) (flagsToIfCompression $ hsc_dflags hsc_env) full_iface
     return final_iface
 
+-- | Compress an 'ModIface' and share as many values as possible, depending on the 'CompressionIFace' level.
+--
+-- We compress the 'ModIface' by serialising the 'ModIface' to an in-memory byte array, and then deserialising it.
+-- The deserialisation will deduplicate certain values depending on the 'CompressionIFace' level.
+-- See Note [Deduplication during iface binary serialisation] for how we do that.
+--
+-- Additionally, we cache the serialised byte array, so if the 'ModIface' is not modified
+-- after calling 'shareIface', 'writeBinIface' will reuse that buffer without serialising the 'ModIface' again.
+-- Modifying the 'ModIface' forces us to re-serialise it again.
 shareIface :: NameCache -> CompressionIFace -> ModIface -> IO ModIface
+shareIface _ NormalCompression mi = do
+  -- In 'NormalCompression', the sharing isn't reducing the memory usage, as 'Name's and 'FastString's are
+  -- already shared, and at this compression level, we don't compress/share anything else.
+  -- Thus, for a brief moment we simply double the memory residency for no reason.
+  -- Therefore, we only try to share expensive values if the compression mode is higher than
+  -- 'NormalCompression'
+  pure mi
 shareIface nc compressionLevel  mi = do
   bh <- openBinMem (1024 * 1024)
   start <- tellBinWriter bh
   putIfaceWithExtFields QuietBinIFace compressionLevel bh mi
-  rbh <- freezeWriteHandle bh
+  rbh <- shrinkBinBuffer bh
   seekBinReader rbh start
   res <- getIfaceWithExtFields nc rbh
   let resiface = res { mi_src_hash = mi_src_hash mi }
   forceModIface  resiface
-  return resiface
+  pure resiface
 
 
 updateDecl :: [IfaceDecl] -> Maybe StgCgInfos -> Maybe CmmCgInfos -> [IfaceDecl]
@@ -318,40 +334,40 @@ mkIface_ hsc_env
         icomplete_matches = map mkIfaceCompleteMatch complete_matches
         !rdrs = maybeGlobalRdrEnv rdr_env
 
-    ModIface {
-          mi_module      = this_mod,
+    emptyPartialModIface this_mod
           -- Need to record this because it depends on the -instantiated-with flag
           -- which could change
-          mi_sig_of      = if semantic_mod == this_mod
+          & set_mi_sig_of      ( if semantic_mod == this_mod
                             then Nothing
-                            else Just semantic_mod,
-          mi_hsc_src     = hsc_src,
-          mi_deps        = deps,
-          mi_usages      = usages,
-          mi_exports     = mkIfaceExports exports,
+                            else Just semantic_mod)
+          & set_mi_hsc_src     ( hsc_src)
+          & set_mi_deps        ( deps)
+          & set_mi_usages      ( usages)
+          & set_mi_exports     ( mkIfaceExports exports)
 
           -- Sort these lexicographically, so that
           -- the result is stable across compilations
-          mi_insts       = sortBy cmp_inst     iface_insts,
-          mi_fam_insts   = sortBy cmp_fam_inst iface_fam_insts,
-          mi_rules       = sortBy cmp_rule     iface_rules,
-
-          mi_fixities    = fixities,
-          mi_warns       = warns,
-          mi_anns        = annotations,
-          mi_globals     = rdrs,
-          mi_used_th     = used_th,
-          mi_decls       = decls,
-          mi_extra_decls = extra_decls,
-          mi_hpc         = isHpcUsed hpc_info,
-          mi_trust       = trust_info,
-          mi_trust_pkg   = pkg_trust_req,
-          mi_complete_matches = icomplete_matches,
-          mi_docs        = docs,
-          mi_final_exts  = (),
-          mi_ext_fields  = emptyExtensibleFields,
-          mi_src_hash = ms_hs_hash mod_summary
-          }
+          & set_mi_insts       ( sortBy cmp_inst     iface_insts)
+          & set_mi_fam_insts   ( sortBy cmp_fam_inst iface_fam_insts)
+          & set_mi_rules       ( sortBy cmp_rule     iface_rules)
+
+          & set_mi_fixities    ( fixities)
+          & set_mi_warns       ( warns)
+          & set_mi_anns        ( annotations)
+          & set_mi_globals     ( rdrs)
+          & set_mi_used_th     ( used_th)
+          & set_mi_decls       ( decls)
+          & set_mi_extra_decls ( extra_decls)
+          & set_mi_hpc         ( isHpcUsed hpc_info)
+          & set_mi_trust       ( trust_info)
+          & set_mi_trust_pkg   ( pkg_trust_req)
+          & set_mi_complete_matches ( icomplete_matches)
+          & set_mi_docs        ( docs)
+          & set_mi_final_exts  ( ())
+          & set_mi_ext_fields  ( emptyExtensibleFields)
+          & set_mi_src_hash ( ms_hs_hash mod_summary)
+          & set_mi_hi_bytes ( PartialIfaceBinHandle)
+
   where
      cmp_rule     = lexicalCompareFS `on` ifRuleName
      -- Compare these lexicographically by OccName, *not* by unique,


=====================================
compiler/GHC/Iface/Recomp.hs
=====================================
@@ -83,6 +83,7 @@ import Data.Ord
 import Data.Containers.ListUtils
 import Data.Bifunctor
 import GHC.Iface.Errors.Ppr
+import qualified GHC.Data.Strict as Strict
 
 {-
   -----------------------------------------------
@@ -1283,7 +1284,9 @@ addFingerprints hsc_env iface0
       , mi_fix_fn         = fix_fn
       , mi_hash_fn        = lookupOccEnv local_env
       }
-    final_iface = iface0 { mi_decls = sorted_decls, mi_extra_decls = sorted_extra_decls, mi_final_exts = final_iface_exts }
+    final_iface = completePartialModIface iface0
+        (sorted_decls) (sorted_extra_decls) (final_iface_exts)
+        (FullIfaceBinHandle Strict.Nothing)
    --
    return final_iface
 


=====================================
compiler/GHC/Tc/Gen/Splice.hs
=====================================
@@ -1562,7 +1562,8 @@ lookupDeclDoc nm = do
       -- Wasn't in the current module. Try searching other external ones!
       mIface <- getExternalModIface nm
       case mIface of
-        Just ModIface { mi_docs = Just Docs{docs_decls = dmap} } ->
+        Just iface
+          | Just Docs{docs_decls = dmap} <- mi_docs iface ->
           pure $ renderHsDocStrings . map hsDocString <$> lookupUniqMap dmap nm
         _ -> pure Nothing
 
@@ -1578,7 +1579,8 @@ lookupArgDoc i nm = do
     Nothing -> do
       mIface <- getExternalModIface nm
       case mIface of
-        Just ModIface { mi_docs = Just Docs{docs_args = amap} } ->
+        Just iface
+          | Just Docs{docs_args = amap} <- mi_docs iface->
           pure $ renderHsDocString . hsDocString <$> (lookupUniqMap amap nm >>= IntMap.lookup i)
         _ -> pure Nothing
 


=====================================
compiler/GHC/Unit/Module/ModIface.hs
=====================================
@@ -7,7 +7,59 @@
 
 module GHC.Unit.Module.ModIface
    ( ModIface
-   , ModIface_ (..)
+   , ModIface_
+   , mi_module
+   , mi_sig_of
+   , mi_hsc_src
+   , mi_src_hash
+   , mi_hi_bytes
+   , mi_deps
+   , mi_usages
+   , mi_exports
+   , mi_used_th
+   , mi_fixities
+   , mi_warns
+   , mi_anns
+   , mi_insts
+   , mi_fam_insts
+   , mi_rules
+   , mi_decls
+   , mi_extra_decls
+   , mi_globals
+   , mi_hpc
+   , mi_trust
+   , mi_trust_pkg
+   , mi_complete_matches
+   , mi_docs
+   , mi_final_exts
+   , mi_ext_fields
+   , set_mi_module
+   , set_mi_sig_of
+   , set_mi_hsc_src
+   , set_mi_src_hash
+   , set_mi_hi_bytes
+   , set_mi_deps
+   , set_mi_usages
+   , set_mi_exports
+   , set_mi_used_th
+   , set_mi_fixities
+   , set_mi_warns
+   , set_mi_anns
+   , set_mi_insts
+   , set_mi_fam_insts
+   , set_mi_rules
+   , set_mi_decls
+   , set_mi_extra_decls
+   , set_mi_globals
+   , set_mi_hpc
+   , set_mi_trust
+   , set_mi_trust_pkg
+   , set_mi_complete_matches
+   , set_mi_docs
+   , set_mi_final_exts
+   , set_mi_ext_fields
+   , completePartialModIface
+   , IfaceBinHandle(..)
    , PartialModIface
    , ModIfaceBackend (..)
    , IfaceDeclExts
@@ -58,6 +110,7 @@ import GHC.Utils.Binary
 
 import Control.DeepSeq
 import Control.Exception
+import qualified GHC.Data.Strict as Strict
 
 {- Note [Interface file stages]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -139,7 +192,9 @@ type family IfaceBackendExts (phase :: ModIfacePhase) = bk | bk -> phase where
   IfaceBackendExts 'ModIfaceCore = ()
   IfaceBackendExts 'ModIfaceFinal = ModIfaceBackend
 
-
+data IfaceBinHandle (phase :: ModIfacePhase) where
+  PartialIfaceBinHandle :: IfaceBinHandle 'ModIfaceCore
+  FullIfaceBinHandle :: Strict.Maybe FullBinData -> IfaceBinHandle 'ModIfaceFinal
 
 -- | A 'ModIface' plus a 'ModDetails' summarises everything we know
 -- about a compiled module.  The 'ModIface' is the stuff *before* linking,
@@ -262,8 +317,9 @@ data ModIface_ (phase :: ModIfacePhase)
                 -- chosen over `ByteString`s.
                 --
 
-        mi_src_hash :: !Fingerprint
+        mi_src_hash :: !Fingerprint,
                 -- ^ Hash of the .hs source, used for recompilation checking.
+        mi_hi_bytes :: !(IfaceBinHandle phase)
      }
 
 {-
@@ -349,6 +405,7 @@ instance Binary ModIface where
                  mi_src_hash = _src_hash, -- Don't `put_` this in the instance
                                           -- because we are going to write it
                                           -- out separately in the actual file
+                 mi_hi_bytes  = _hi_bytes, -- TODO: explain
                  mi_deps      = deps,
                  mi_usages    = usages,
                  mi_exports   = exports,
@@ -449,6 +506,7 @@ instance Binary ModIface where
                  mi_hsc_src     = hsc_src,
                  mi_src_hash = fingerprint0, -- placeholder because this is dealt
                                              -- with specially when the file is read
+                 mi_hi_bytes    = FullIfaceBinHandle Strict.Nothing,
                  mi_deps        = deps,
                  mi_usages      = usages,
                  mi_exports     = exports,
@@ -487,6 +545,7 @@ instance Binary ModIface where
                    mi_hash_fn = mkIfaceHashCache decls
                  }})
 
+
 -- | The original names declared of a certain module that are exported
 type IfaceExport = AvailInfo
 
@@ -496,6 +555,7 @@ emptyPartialModIface mod
                mi_sig_of      = Nothing,
                mi_hsc_src     = HsSrcFile,
                mi_src_hash    = fingerprint0,
+               mi_hi_bytes    = PartialIfaceBinHandle,
                mi_deps        = noDependencies,
                mi_usages      = [],
                mi_exports     = [],
@@ -522,6 +582,7 @@ emptyFullModIface :: Module -> ModIface
 emptyFullModIface mod =
     (emptyPartialModIface mod)
       { mi_decls = []
+      , mi_hi_bytes = FullIfaceBinHandle Strict.Nothing
       , mi_final_exts = ModIfaceBackend
         { mi_iface_hash = fingerprint0,
           mi_mod_hash = fingerprint0,
@@ -626,5 +687,97 @@ type WhetherHasOrphans   = Bool
 -- | Does this module define family instances?
 type WhetherHasFamInst = Bool
 
+completePartialModIface :: PartialModIface
+  -> [(Fingerprint, IfaceDecl)]
+  -> Maybe [IfaceBindingX IfaceMaybeRhs IfaceTopBndrInfo]
+  -> ModIfaceBackend
+  -> IfaceBinHandle 'ModIfaceFinal
+  -> ModIface
+completePartialModIface partial decls extra_decls final_exts hi_bytes = partial
+  { mi_decls = decls
+  , mi_extra_decls = extra_decls
+  , mi_final_exts = final_exts
+  , mi_hi_bytes = hi_bytes
+  }
+
+set_mi_module :: Module -> ModIface_ phase -> ModIface_ phase
+set_mi_module val iface = clear_mi_hi_bytes $ iface { mi_module = val }
+
+set_mi_sig_of :: Maybe Module -> ModIface_ phase -> ModIface_ phase
+set_mi_sig_of val iface = clear_mi_hi_bytes $ iface { mi_sig_of = val }
+
+set_mi_hsc_src :: HscSource -> ModIface_ phase -> ModIface_ phase
+set_mi_hsc_src val iface = clear_mi_hi_bytes $ iface { mi_hsc_src = val }
+
+set_mi_src_hash :: Fingerprint -> ModIface_ phase -> ModIface_ phase
+set_mi_src_hash val iface = clear_mi_hi_bytes $ iface { mi_src_hash = val }
+
+set_mi_hi_bytes :: IfaceBinHandle phase -> ModIface_ phase -> ModIface_ phase
+set_mi_hi_bytes val iface = iface { mi_hi_bytes = val }
+
+set_mi_deps :: Dependencies -> ModIface_ phase -> ModIface_ phase
+set_mi_deps val iface = clear_mi_hi_bytes $ iface { mi_deps = val }
+
+set_mi_usages :: [Usage] -> ModIface_ phase -> ModIface_ phase
+set_mi_usages val iface = clear_mi_hi_bytes $ iface { mi_usages = val }
+
+set_mi_exports :: [IfaceExport] -> ModIface_ phase -> ModIface_ phase
+set_mi_exports val iface = clear_mi_hi_bytes $ iface { mi_exports = val }
+
+set_mi_used_th :: Bool -> ModIface_ phase -> ModIface_ phase
+set_mi_used_th val iface = clear_mi_hi_bytes $ iface { mi_used_th = val }
+
+set_mi_fixities :: [(OccName, Fixity)] -> ModIface_ phase -> ModIface_ phase
+set_mi_fixities val iface = clear_mi_hi_bytes $ iface { mi_fixities = val }
+
+set_mi_warns :: IfaceWarnings -> ModIface_ phase -> ModIface_ phase
+set_mi_warns val iface = clear_mi_hi_bytes $ iface { mi_warns = val }
 
+set_mi_anns :: [IfaceAnnotation] -> ModIface_ phase -> ModIface_ phase
+set_mi_anns val iface = clear_mi_hi_bytes $ iface { mi_anns = val }
 
+set_mi_insts :: [IfaceClsInst] -> ModIface_ phase -> ModIface_ phase
+set_mi_insts val iface = clear_mi_hi_bytes $ iface { mi_insts = val }
+
+set_mi_fam_insts :: [IfaceFamInst] -> ModIface_ phase -> ModIface_ phase
+set_mi_fam_insts val iface = clear_mi_hi_bytes $ iface { mi_fam_insts = val }
+
+set_mi_rules :: [IfaceRule] -> ModIface_ phase -> ModIface_ phase
+set_mi_rules val iface = clear_mi_hi_bytes $ iface { mi_rules = val }
+
+set_mi_decls :: [IfaceDeclExts phase] -> ModIface_ phase -> ModIface_ phase
+set_mi_decls val iface = clear_mi_hi_bytes $ iface { mi_decls = val }
+
+set_mi_extra_decls :: Maybe [IfaceBindingX IfaceMaybeRhs IfaceTopBndrInfo] -> ModIface_ phase -> ModIface_ phase
+set_mi_extra_decls val iface = clear_mi_hi_bytes $ iface { mi_extra_decls = val }
+
+set_mi_globals :: Maybe IfGlobalRdrEnv -> ModIface_ phase -> ModIface_ phase
+set_mi_globals val iface = clear_mi_hi_bytes $ iface { mi_globals = val }
+
+set_mi_hpc :: AnyHpcUsage -> ModIface_ phase -> ModIface_ phase
+set_mi_hpc val iface = clear_mi_hi_bytes $ iface { mi_hpc = val }
+
+set_mi_trust :: IfaceTrustInfo -> ModIface_ phase -> ModIface_ phase
+set_mi_trust val iface = clear_mi_hi_bytes $ iface { mi_trust = val }
+
+set_mi_trust_pkg :: Bool -> ModIface_ phase -> ModIface_ phase
+set_mi_trust_pkg val iface = clear_mi_hi_bytes $ iface { mi_trust_pkg = val }
+
+set_mi_complete_matches :: [IfaceCompleteMatch] -> ModIface_ phase -> ModIface_ phase
+set_mi_complete_matches val iface = clear_mi_hi_bytes $ iface { mi_complete_matches = val }
+
+set_mi_docs :: Maybe Docs -> ModIface_ phase -> ModIface_ phase
+set_mi_docs val iface = clear_mi_hi_bytes $  iface { mi_docs = val }
+
+set_mi_final_exts :: IfaceBackendExts phase -> ModIface_ phase -> ModIface_ phase
+set_mi_final_exts val iface = clear_mi_hi_bytes $ iface { mi_final_exts = val }
+
+set_mi_ext_fields :: ExtensibleFields -> ModIface_ phase -> ModIface_ phase
+set_mi_ext_fields val iface = clear_mi_hi_bytes $ iface { mi_ext_fields = val }
+
+clear_mi_hi_bytes :: ModIface_ phase -> ModIface_ phase
+clear_mi_hi_bytes iface = iface
+  { mi_hi_bytes = case mi_hi_bytes iface of
+      PartialIfaceBinHandle -> PartialIfaceBinHandle
+      FullIfaceBinHandle _ -> FullIfaceBinHandle Strict.Nothing
+  }


=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -20,7 +20,7 @@
 --     http://www.cs.york.ac.uk/fp/nhc98/
 
 module GHC.Utils.Binary
-  ( {-type-}  Bin,
+  ( {-type-}  Bin, RelBin(..), getRelBin,
     {-class-} Binary(..),
     {-type-}  ReadBinHandle, WriteBinHandle,
     SymbolTable, Dictionary,
@@ -33,6 +33,7 @@ module GHC.Utils.Binary
 
    seekBinWriter,
    seekBinReader,
+   seekBinReaderRel,
    tellBinReader,
    tellBinWriter,
    castBin,
@@ -48,6 +49,7 @@ module GHC.Utils.Binary
 
    putAt, getAt,
    forwardPut, forwardPut_, forwardGet,
+   forwardPutRel, forwardPutRel_, forwardGetRel,
 
    -- * For writing instances
    putByte,
@@ -102,6 +104,10 @@ module GHC.Utils.Binary
    BindingName(..),
    simpleBindingNameWriter,
    simpleBindingNameReader,
+   FullBinData(..), freezeBinHandle, thawBinHandle, putFullBinData,
+   shrinkBinBuffer,
+   freezeBinHandle2,
+   BinArray,
   ) where
 
 import GHC.Prelude
@@ -126,7 +132,7 @@ import Foreign hiding (shiftL, shiftR, void)
 import Data.Array
 import Data.Array.IO
 import Data.Array.Unsafe
-import Data.ByteString (ByteString)
+import Data.ByteString (ByteString, copy)
 import Data.Coerce
 import qualified Data.ByteString.Internal as BS
 import qualified Data.ByteString.Unsafe   as BS
@@ -156,7 +162,6 @@ import GHC.ForeignPtr           ( unsafeWithForeignPtr )
 import Unsafe.Coerce (unsafeCoerce)
 
 import GHC.Data.TrieMap
-
 type BinArray = ForeignPtr Word8
 
 #if !MIN_VERSION_base(4,15,0)
@@ -196,6 +201,63 @@ dataHandle (BinData size bin) = do
 handleData :: WriteBinHandle -> IO BinData
 handleData (WriteBinMem _ ixr _ binr) = BinData <$> readFastMutInt ixr <*> readIORef binr
 
+---------------------------------------------------------------
+-- FullBinData
+---------------------------------------------------------------
+
+data FullBinData = FullBinData
+  { fbd_readerUserData :: ReaderUserData
+  , fbd_off_s :: {-# UNPACK #-} !Int
+  -- ^ start offset
+  , fbd_off_e :: {-# UNPACK #-} !Int
+  -- ^ end offset
+  , fbd_size :: {-# UNPACK #-} !Int
+  -- ^ total buffer size
+  , fbd_buffer :: {-# UNPACK #-} !BinArray
+  }
+
+-- Equality and Ord assume that two distinct buffers are different, even if they compare the same things.
+instance Eq FullBinData where
+  (FullBinData _ b c d e) == (FullBinData _ b1 c1 d1 e1) = b == b1 && c == c1 && d == d1 && e == e1
+
+instance Ord FullBinData where
+  compare (FullBinData _ b c d e) (FullBinData _ b1 c1 d1 e1) =
+    compare b b1 `mappend` compare c c1 `mappend` compare d d1 `mappend` compare e e1
+
+putFullBinData :: WriteBinHandle -> FullBinData -> IO ()
+putFullBinData bh (FullBinData _ o1 o2 _sz ba) = do
+  let sz = o2 - o1
+  putPrim bh sz $ \dest ->
+    unsafeWithForeignPtr (ba `plusForeignPtr` o1) $ \orig ->
+    copyBytes dest orig sz
+
+freezeBinHandle :: Bin () -> ReadBinHandle -> IO FullBinData
+freezeBinHandle (BinPtr len) (ReadBinMem user_data ixr sz binr) = do
+  ix <- readFastMutInt ixr
+  pure (FullBinData user_data ix len sz binr)
+
+freezeBinHandle2 :: ReadBinHandle -> Bin () -> IO FullBinData
+freezeBinHandle2 (ReadBinMem user_data ixr sz binr) (BinPtr start) = do
+  ix <- readFastMutInt ixr
+  pure (FullBinData user_data start ix sz binr)
+
+thawBinHandle :: FullBinData -> IO ReadBinHandle
+thawBinHandle (FullBinData user_data ix _end sz ba) = do
+  ixr <- newFastMutInt ix
+  return $ ReadBinMem user_data ixr sz ba
+
+-- Copy the BinBuffer to a new BinBuffer which is exactly the right size.
+-- This performs a copy of the underlying buffer.
+-- The buffer may be truncated if the offset is not at the end of the written
+-- output.
+--
+-- UserData is also discarded during the copy
+-- You should just use this when translating a Put handle into a Get handle.
+shrinkBinBuffer :: WriteBinHandle -> IO ReadBinHandle
+shrinkBinBuffer bh = withBinBuffer bh $ \bs -> do
+  unsafeUnpackBinBuffer (copy bs)
+
+
 ---------------------------------------------------------------
 -- BinHandle
 ---------------------------------------------------------------
@@ -289,9 +351,30 @@ unsafeUnpackBinBuffer (BS.BS arr len) = do
 newtype Bin a = BinPtr Int
   deriving (Eq, Ord, Show, Bounded)
 
+data RelBin a = RelBin !(Bin a) !(Bin a)
+  deriving (Eq, Ord, Show, Bounded)
+
+newtype RelBinPtr a = RelBinPtr (Bin a)
+
 castBin :: Bin a -> Bin b
 castBin (BinPtr i) = BinPtr i
 
+getRelBin :: ReadBinHandle -> IO (RelBin a)
+getRelBin bh = do
+  start <- tellBinReader bh
+  off <- get bh
+  pure $ RelBin start off
+
+makeAbsoluteBin ::  RelBin a -> Bin a
+makeAbsoluteBin (RelBin (BinPtr !start) (BinPtr !offset)) = BinPtr (start + offset)
+
+makeRelativeBin :: RelBin a -> RelBinPtr a
+makeRelativeBin (RelBin _ offset) = RelBinPtr offset
+
+toRelBin :: Bin (RelBinPtr ()) -> Bin a -> RelBin a
+toRelBin (BinPtr !start) (BinPtr !goal) =
+  RelBin (BinPtr start) (BinPtr $! goal - start)
+
 ---------------------------------------------------------------
 -- class Binary
 ---------------------------------------------------------------
@@ -382,12 +465,18 @@ seekBinNoExpandWriter (WriteBinMem _ ix_r sz_r _) (BinPtr !p) = do
         else writeFastMutInt ix_r p
 
 -- | SeekBin but without calling expandBin
-seekBinReader :: ReadBinHandle -> Bin a -> IO ()
+seekBinReader :: HasCallStack => ReadBinHandle -> Bin a -> IO ()
 seekBinReader (ReadBinMem _ ix_r sz_r _) (BinPtr !p) = do
   if (p > sz_r)
         then panic "seekBinReader: seek out of range"
         else writeFastMutInt ix_r p
 
+seekBinReaderRel :: HasCallStack => ReadBinHandle -> RelBin a -> IO ()
+seekBinReaderRel (ReadBinMem _ ix_r sz_r _) (RelBin (BinPtr !start) (BinPtr !offset)) = do
+  if (start + offset > sz_r)
+        then panic "seekBinReaderRel: seek out of range"
+        else writeFastMutInt ix_r (start + offset)
+
 writeBinMem :: WriteBinHandle -> FilePath -> IO ()
 writeBinMem (WriteBinMem _ ix_r _ arr_r) fn = do
   h <- openBinaryFile fn WriteMode
@@ -1108,6 +1197,11 @@ instance Binary (Bin a) where
   put_ bh (BinPtr i) = putWord32 bh (fromIntegral i :: Word32)
   get bh = do i <- getWord32 bh; return (BinPtr (fromIntegral (i :: Word32)))
 
+-- Instance uses fixed-width encoding to allow inserting
+-- Bin placeholders in the stream.
+instance Binary (RelBinPtr a) where
+  put_ bh (RelBinPtr i) = put_ bh i
+  get bh = RelBinPtr <$> get bh
 
 -- -----------------------------------------------------------------------------
 -- Forward reading/writing
@@ -1136,7 +1230,7 @@ forwardPut_ :: WriteBinHandle -> (b -> IO a) -> IO b -> IO ()
 forwardPut_ bh put_A put_B = void $ forwardPut bh put_A put_B
 
 -- | Read a value stored using a forward reference
-forwardGet :: ReadBinHandle -> IO a -> IO a
+forwardGet :: HasCallStack => ReadBinHandle -> IO a -> IO a
 forwardGet bh get_A = do
     -- read forward reference
     p <- get bh -- a BinPtr
@@ -1148,6 +1242,45 @@ forwardGet bh get_A = do
     seekBinReader bh p_a
     pure r
 
+
+-- | "forwardPut put_A put_B" outputs A after B but allows A to be read before B
+-- by using a forward reference
+forwardPutRel :: WriteBinHandle -> (b -> IO a) -> IO b -> IO (a,b)
+forwardPutRel bh put_A put_B = do
+  -- write placeholder pointer to A
+  pre_a <- tellBinWriter bh
+  put_ bh pre_a
+
+  -- write B
+  r_b <- put_B
+
+  -- update A's pointer
+  a <- tellBinWriter bh
+  let relBin = toRelBin pre_a a
+  putAt bh pre_a (makeRelativeBin relBin)
+  seekBinNoExpandWriter bh a
+
+  -- write A
+  r_a <- put_A r_b
+  pure (r_a,r_b)
+
+
+forwardPutRel_ :: WriteBinHandle -> (b -> IO a) -> IO b -> IO ()
+forwardPutRel_ bh put_A put_B = void $ forwardPutRel bh put_A put_B
+
+-- | Read a value stored using a forward reference
+forwardGetRel :: ReadBinHandle -> IO a -> IO a
+forwardGetRel bh get_A = do
+    -- read forward reference
+    p <- getRelBin bh
+    -- store current position
+    p_a <- tellBinReader bh
+    -- go read the forward value, then seek back
+    seekBinReader bh $ makeAbsoluteBin p
+    r <- get_A
+    seekBinReader bh p_a
+    pure r
+
 -- -----------------------------------------------------------------------------
 -- Lazy reading/writing
 
@@ -1157,19 +1290,19 @@ lazyPut = lazyPut' put_
 lazyGet :: Binary a => ReadBinHandle -> IO a
 lazyGet = lazyGet' get
 
-lazyPut' :: HasDebugCallStack => (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO ()
+lazyPut' :: (WriteBinHandle -> a -> IO ()) -> WriteBinHandle -> a -> IO ()
 lazyPut' f bh a = do
     -- output the obj with a ptr to skip over it:
     pre_a <- tellBinWriter bh
     put_ bh pre_a       -- save a slot for the ptr
     f bh a           -- dump the object
     q <- tellBinWriter bh     -- q = ptr to after object
-    putAt bh pre_a q    -- fill in slot before a with ptr to q
+    putAt bh pre_a (makeRelativeBin $ toRelBin pre_a q)    -- fill in slot before a with ptr to q
     seekBinWriter bh q        -- finally carry on writing at q
 
 lazyGet' :: HasDebugCallStack => (ReadBinHandle -> IO a) -> ReadBinHandle -> IO a
 lazyGet' f bh = do
-    p <- get bh -- a BinPtr
+    p <- getRelBin bh -- a BinPtr
     p_a <- tellBinReader bh
     a <- unsafeInterleaveIO $ do
         -- NB: Use a fresh rbm_off_r variable in the child thread, for thread
@@ -1178,7 +1311,7 @@ lazyGet' f bh = do
         let bh' = bh { rbm_off_r = off_r }
         seekBinReader bh' p_a
         f bh'
-    seekBinReader bh p -- skip over the object for now
+    seekBinReader bh (makeAbsoluteBin p) -- skip over the object for now
     return a
 
 -- | Serialize the constructor strictly but lazily serialize a value inside a
@@ -1472,13 +1605,13 @@ putGenericSymbolTable gen_sym_tab serialiser bh = do
                 mapM_ (\n -> serialiser bh n) (reverse todo)
                 loop
       snd <$>
-        (forwardPut bh (const $ readFastMutInt symtab_next >>= put_ bh) $
+        (forwardPutRel bh (const $ readFastMutInt symtab_next >>= put_ bh) $
           loop)
 
 -- | Read the elements of a 'GenericSymbolTable' from disk into a 'SymbolTable'.
 getGenericSymbolTable :: forall a . (ReadBinHandle -> IO a) -> ReadBinHandle -> IO (SymbolTable a)
 getGenericSymbolTable deserialiser bh = do
-  sz <- forwardGet bh (get bh) :: IO Int
+  sz <- forwardGetRel bh (get bh) :: IO Int
   mut_arr <- newArray_ (0, sz-1) :: IO (IOArray Int a)
   forM_ [0..(sz-1)] $ \i -> do
     f <- deserialiser bh



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e534e60d2574d7d6603b62b0aa7c58778535effd

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e534e60d2574d7d6603b62b0aa7c58778535effd
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240424/a2353905/attachment-0001.html>


More information about the ghc-commits mailing list