[Git][ghc/ghc][wip/bytecode-serialize] WIP: ghc bytecode serialization logic
Cheng Shao (@TerrorJack)
gitlab at gitlab.haskell.org
Sat Feb 15 05:49:43 UTC 2025
Cheng Shao pushed to branch wip/bytecode-serialize at Glasgow Haskell Compiler / GHC
Commits:
cf1c2f6c by Cheng Shao at 2025-02-15T05:49:25+00:00
WIP: ghc bytecode serialization logic
- - - - -
3 changed files:
- + compiler/GHC/ByteCode/Serialize.hs
- compiler/GHC/Driver/Main.hs
- compiler/ghc.cabal.in
Changes:
=====================================
compiler/GHC/ByteCode/Serialize.hs
=====================================
@@ -0,0 +1,268 @@
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module GHC.ByteCode.Serialize where
+
+import Control.Exception
+import Control.Monad
+import Data.Binary qualified as Binary
+import Data.ByteString.Lazy qualified as LBS
+import Data.Foldable
+import Data.IORef
+import Data.Proxy
+import Data.Word
+import GHC.Builtin.PrimOps
+import GHC.ByteCode.Types
+import GHC.Data.FlatBag as FlatBag
+import GHC.Driver.Env.Types
+import GHC.Iface.Binary
+import GHC.Prelude
+import GHC.Types.Name
+import GHC.Types.Name.Cache
+import GHC.Types.SptEntry
+import GHC.Types.SrcLoc
+import GHC.Utils.Binary
+import GHC.Utils.Panic
+import GHCi.FFI
+import GHCi.Message
+
+import System.FilePath
+import GHC.Utils.TmpFs
+
+unsupportedBCO :: UnlinkedBCO -> Bool
+unsupportedBCO UnlinkedBCO {..} = any w unlinkedBCOPtrs
+ where
+ w BCOPtrBreakArray {} = True
+ w (BCOPtrBCO bco) = unsupportedBCO bco
+ w _ = False
+
+testBinByteCode :: HscEnv -> CompiledByteCode -> IO CompiledByteCode
+testBinByteCode hsc_env cbc at CompiledByteCode {..}
+ | Just _ <- bc_breaks = evaluate cbc
+ | any unsupportedBCO bc_bcos = evaluate cbc
+ | otherwise = withSystemTempDirectory "ghc-bbc" $ \tmpdir -> do
+ appendFile "/tmp/ghc-bbc.log" "1"
+ let f = tmpdir </> "ghc-bbc"
+ roundtripBinByteCode hsc_env f cbc
+
+roundtripBinByteCode :: HscEnv -> FilePath -> CompiledByteCode -> IO CompiledByteCode
+roundtripBinByteCode hsc_env f cbc = do
+ writeBinByteCode f cbc
+ readBinByteCode hsc_env f
+
+readBinByteCode :: HscEnv -> FilePath -> IO CompiledByteCode
+readBinByteCode hsc_env f = do
+ bh' <- readBinMem f
+ bh <- addSerializableNameReader hsc_env bh'
+ getWithUserData (hsc_NC hsc_env) bh
+
+writeBinByteCode :: FilePath -> CompiledByteCode -> IO ()
+writeBinByteCode f cbc = do
+ bh' <- openBinMem (1024*1024)
+ bh <- addSerializableNameWriter bh'
+ putWithUserData QuietBinIFace NormalCompression bh cbc
+ writeBinMem bh f
+
+instance Binary CompiledByteCode where
+ get bh = do
+ bc_bcos <- get bh
+ bc_itbls_len <- get bh
+ bc_itbls <- replicateM bc_itbls_len $ do
+ nm <- getViaSerializableName bh
+ msg <-
+ MkConInfoTable
+ <$> get bh
+ <*> get bh
+ <*> get bh
+ <*> get bh
+ <*> get bh
+ <*> get bh
+ evaluate (nm, msg)
+ bc_strs_len <- get bh
+ bc_strs <-
+ replicateM bc_strs_len
+ $ (,)
+ <$> getViaSerializableName bh
+ <*> get bh
+ bc_spt_entries <- get bh
+ evaluate
+ CompiledByteCode
+ { bc_bcos,
+ bc_itbls,
+ bc_strs,
+ bc_breaks = Nothing,
+ bc_spt_entries
+ }
+
+ put_ bh CompiledByteCode {..} = do
+ put_ bh bc_bcos
+ put_ bh $ length bc_itbls
+ for_ bc_itbls
+ $ \(nm, MkConInfoTable tntc ptrs nptrs conNo tag descr) -> do
+ putViaSerializableName bh nm
+ put_ bh tntc
+ put_ bh ptrs
+ put_ bh nptrs
+ put_ bh conNo
+ put_ bh tag
+ put_ bh descr
+ put_ bh $ length bc_strs
+ for_ bc_strs
+ $ \(nm, str) -> putViaSerializableName bh nm *> put_ bh str
+ put_ bh bc_spt_entries
+
+instance Binary UnlinkedBCO where
+ get bh =
+ UnlinkedBCO
+ <$> getViaSerializableName bh
+ <*> get bh
+ <*> (Binary.decode . LBS.fromStrict <$> get bh)
+ <*> (Binary.decode . LBS.fromStrict <$> get bh)
+ <*> get bh
+ <*> get bh
+
+ put_ bh UnlinkedBCO {..} = do
+ putViaSerializableName bh unlinkedBCOName
+ put_ bh unlinkedBCOArity
+ put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOInstrs
+ put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOBitmap
+ put_ bh unlinkedBCOLits
+ put_ bh unlinkedBCOPtrs
+
+instance Binary BCOPtr where
+ get bh = do
+ t <- getByte bh
+ case t of
+ 0 -> BCOPtrName <$> getViaSerializableName bh
+ 1 -> BCOPtrPrimOp <$> get bh
+ 2 -> BCOPtrBCO <$> get bh
+ _ -> panic "GHC.ByteCode.Serialize.BCOPtr.get"
+
+ put_ bh ptr = case ptr of
+ BCOPtrName nm -> putByte bh 0 *> putViaSerializableName bh nm
+ BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op
+ BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco
+ BCOPtrBreakArray {} -> panic "GHC.ByteCode.Serialize.BCOPtr.put"
+
+instance Binary BCONPtr where
+ get bh = do
+ t <- getByte bh
+ case t of
+ 0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64)
+ 1 -> BCONPtrLbl <$> get bh
+ 2 -> BCONPtrItbl <$> getViaSerializableName bh
+ 3 -> BCONPtrAddr <$> getViaSerializableName bh
+ 4 -> BCONPtrStr <$> get bh
+ 5 -> BCONPtrFFIInfo <$> get bh
+ _ -> panic "GHC.ByteCode.Serialize.BCONPtr.get"
+
+ put_ bh ptr = case ptr of
+ BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64)
+ BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym
+ BCONPtrItbl nm -> putByte bh 2 *> putViaSerializableName bh nm
+ BCONPtrAddr nm -> putByte bh 3 *> putViaSerializableName bh nm
+ BCONPtrStr str -> putByte bh 4 *> put_ bh str
+ BCONPtrFFIInfo ffi -> putByte bh 5 *> put_ bh ffi
+
+instance Binary SptEntry where
+ get bh = SptEntry <$> getViaSerializableName bh <*> get bh
+
+ put_ bh (SptEntry nm fp) = putViaSerializableName bh nm *> put_ bh fp
+
+newtype SerializableName = SerializableName {unSerializableName :: Name}
+
+getViaSerializableName :: ReadBinHandle -> IO Name
+getViaSerializableName bh = case findUserDataReader Proxy bh of
+ BinaryReader f -> unSerializableName <$> f bh
+
+putViaSerializableName :: WriteBinHandle -> Name -> IO ()
+putViaSerializableName bh nm = case findUserDataWriter Proxy bh of
+ BinaryWriter f -> f bh $ SerializableName nm
+
+addSerializableNameWriter :: WriteBinHandle -> IO WriteBinHandle
+addSerializableNameWriter bh' =
+ evaluate
+ $ flip addWriterToUserData bh'
+ $ BinaryWriter
+ $ \bh
+ ( SerializableName
+ nm
+ ) ->
+ if
+ | isExternalName nm -> do
+ putByte bh 0
+ put_ bh nm
+ | otherwise -> do
+ putByte bh 1
+ put_ bh $ occName nm
+
+addSerializableNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle
+addSerializableNameReader HscEnv {..} bh' = do
+ nc <- evaluate hsc_NC
+ env_ref <- newIORef emptyOccEnv
+ evaluate $ flip addReaderToUserData bh' $ BinaryReader $ \bh -> do
+ t <- getByte bh
+ case t of
+ 0 -> do
+ nm <- get bh
+ evaluate $ SerializableName nm
+ 1 -> do
+ occ <- get bh
+ u <- takeUniqFromNameCache nc
+ nm' <- evaluate $ mkInternalName u occ noSrcSpan
+ fmap SerializableName
+ $ atomicModifyIORef' env_ref
+ $ \env -> case lookupOccEnv env occ of
+ Just nm -> (env, nm)
+ _ -> (extendOccEnv env occ nm', nm')
+ _ -> panic "GHC.ByteCode.Serialize.addSerializableNameReader"
+
+instance Binary PrimOp where
+ get bh = (allThePrimOps !!) <$> get bh
+
+ put_ bh = put_ bh . primOpTag
+
+instance Binary FFIInfo where
+ get bh = FFIInfo <$> get bh <*> get bh
+
+ put_ bh FFIInfo {..} = put_ bh ffiInfoArgs *> put_ bh ffiInfoRet
+
+instance Binary FFIType where
+ get bh = do
+ t <- getByte bh
+ evaluate $ case t of
+ 0 -> FFIVoid
+ 1 -> FFIPointer
+ 2 -> FFIFloat
+ 3 -> FFIDouble
+ 4 -> FFISInt8
+ 5 -> FFISInt16
+ 6 -> FFISInt32
+ 7 -> FFISInt64
+ 8 -> FFIUInt8
+ 9 -> FFIUInt16
+ 10 -> FFIUInt32
+ 11 -> FFIUInt64
+ _ -> panic "GHC.ByteCode.Serialize.FFIType.get"
+
+ put_ bh t = putByte bh $ case t of
+ FFIVoid -> 0
+ FFIPointer -> 1
+ FFIFloat -> 2
+ FFIDouble -> 3
+ FFISInt8 -> 4
+ FFISInt16 -> 5
+ FFISInt32 -> 6
+ FFISInt64 -> 7
+ FFIUInt8 -> 8
+ FFIUInt16 -> 9
+ FFIUInt32 -> 10
+ FFIUInt64 -> 11
+
+instance (Binary a) => Binary (FlatBag a) where
+ get bh = do
+ xs <- get bh
+ evaluate $ FlatBag.fromList (fromIntegral $ length xs) xs
+
+ put_ bh = put_ bh . FlatBag.elemsFlatBag
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -303,6 +303,8 @@ import Data.Bifunctor
import qualified GHC.Unit.Home.Graph as HUG
import GHC.Unit.Home.PackageTable
+import GHC.ByteCode.Serialize
+
{- **********************************************************************
%* *
Initialisation
@@ -2187,7 +2189,8 @@ generateByteCode :: HscEnv
-> ModLocation
-> IO (CompiledByteCode, [FilePath])
generateByteCode hsc_env cgguts mod_location = do
- (hasStub, comp_bc) <- hscInteractive hsc_env cgguts mod_location
+ (hasStub, comp_bc') <- hscInteractive hsc_env cgguts mod_location
+ comp_bc <- testBinByteCode hsc_env comp_bc'
compile_for_interpreter hsc_env $ \ i_env -> do
stub_o <- traverse (compileForeign i_env LangC) hasStub
foreign_files_o <- traverse (uncurry (compileForeign i_env)) (cgi_foreign_files cgguts)
=====================================
compiler/ghc.cabal.in
=====================================
@@ -226,6 +226,7 @@ Library
GHC.ByteCode.InfoTable
GHC.ByteCode.Instr
GHC.ByteCode.Linker
+ GHC.ByteCode.Serialize
GHC.ByteCode.Types
GHC.Cmm
GHC.Cmm.BlockId
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/cf1c2f6c5d9b2b90ea5f235453b0a71ae2412a73
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/cf1c2f6c5d9b2b90ea5f235453b0a71ae2412a73
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250215/378a55fd/attachment-0001.html>
More information about the ghc-commits
mailing list