[Git][ghc/ghc][wip/bytecode-serialize] 3 commits: compiler: make FFIInfo serializable in BCO

Cheng Shao (@TerrorJack) gitlab at gitlab.haskell.org
Sat Feb 15 05:54:16 UTC 2025



Cheng Shao pushed to branch wip/bytecode-serialize at Glasgow Haskell Compiler / GHC


Commits:
2e388adb by Cheng Shao at 2025-02-15T05:53:54+00:00
compiler: make FFIInfo serializable in BCO

- - - - -
b9e49a6a by Cheng Shao at 2025-02-15T05:53:58+00:00
compiler: make SptEntry serializable

- - - - -
e29d68ba by Cheng Shao at 2025-02-15T05:53:58+00:00
WIP: ghc bytecode serialization logic

- - - - -


12 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/ByteCode/Linker.hs
- + compiler/GHC/ByteCode/Serialize.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Iface/Tidy/StaticPtrTable.hs
- compiler/GHC/StgToByteCode.hs
- compiler/GHC/StgToJS/StaticPtr.hs
- compiler/GHC/Types/SptEntry.hs
- compiler/ghc.cabal.in
- testsuite/tests/bytecode/T22376/all.T


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -468,7 +468,7 @@ assembleI platform i = case i of
   ENTER                    -> emit bci_ENTER []
   RETURN rep               -> emit (return_non_tuple rep) []
   RETURN_TUPLE             -> emit bci_RETURN_T []
-  CCALL off m_addr i       -> do np <- addr m_addr
+  CCALL off ffi i          -> do np <- lit [BCONPtrFFIInfo ffi]
                                  emit bci_CCALL [wOp off, Op np, SmallOp i]
   PRIMCALL                 -> emit bci_PRIMCALL []
   BRK_FUN arr tick_mod tickx info_mod infox cc ->


=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -15,7 +15,6 @@ import GHC.Prelude
 
 import GHC.ByteCode.Types
 import GHCi.RemoteTypes
-import GHCi.FFI (C_ffi_cif)
 import GHC.StgToCmm.Layout     ( ArgRep(..) )
 import GHC.Utils.Outputable
 import GHC.Types.Name
@@ -201,7 +200,7 @@ data BCInstr
 
    -- For doing calls to C (via glue code generated by libffi)
    | CCALL            !WordOff  -- stack frame size
-                      (RemotePtr C_ffi_cif) -- addr of the glue code
+                      !FFIInfo  -- libffi ffi_cif function prototype
                       !Word16   -- flags.
                                 --
                                 -- 0x1: call is interruptible
@@ -382,9 +381,9 @@ instance Outputable BCInstr where
    ppr (TESTEQ_P  i lab)     = text "TESTEQ_P" <+> ppr i <+> text "__" <> ppr lab
    ppr CASEFAIL              = text "CASEFAIL"
    ppr (JMP lab)             = text "JMP"      <+> ppr lab
-   ppr (CCALL off marshal_addr flags) = text "CCALL   " <+> ppr off
+   ppr (CCALL off ffi flags) = text "CCALL   " <+> ppr off
                                                 <+> text "marshal code at"
-                                               <+> text (show marshal_addr)
+                                               <+> text (show ffi)
                                                <+> (case flags of
                                                       0x1 -> text "(interruptible)"
                                                       0x2 -> text "(unsafe)"


=====================================
compiler/GHC/ByteCode/Linker.hs
=====================================
@@ -3,6 +3,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE RecordWildCards       #-}
 {-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-}
 --
 --  (c) The University of Glasgow 2002-2006
@@ -86,6 +87,9 @@ lookupLiteral interp pkgs_loaded le ptr = case ptr of
   BCONPtrStr bs -> do
     RemotePtr p <- fmap head $ interpCmd interp $ MallocStrings [bs]
     pure $ fromIntegral p
+  BCONPtrFFIInfo (FFIInfo {..}) -> do
+    RemotePtr p <- interpCmd interp $ PrepFFI ffiInfoArgs ffiInfoRet
+    pure $ fromIntegral p
 
 lookupStaticPtr :: Interp -> FastString -> IO (Ptr ())
 lookupStaticPtr interp addr_of_label_string = do


=====================================
compiler/GHC/ByteCode/Serialize.hs
=====================================
@@ -0,0 +1,268 @@
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module GHC.ByteCode.Serialize where
+
+import Control.Exception
+import Control.Monad
+import Data.Binary qualified as Binary
+import Data.ByteString.Lazy qualified as LBS
+import Data.Foldable
+import Data.IORef
+import Data.Proxy
+import Data.Word
+import GHC.Builtin.PrimOps
+import GHC.ByteCode.Types
+import GHC.Data.FlatBag as FlatBag
+import GHC.Driver.Env.Types
+import GHC.Iface.Binary
+import GHC.Prelude
+import GHC.Types.Name
+import GHC.Types.Name.Cache
+import GHC.Types.SptEntry
+import GHC.Types.SrcLoc
+import GHC.Utils.Binary
+import GHC.Utils.Panic
+import GHCi.FFI
+import GHCi.Message
+
+import System.FilePath
+import GHC.Utils.TmpFs
+
+unsupportedBCO :: UnlinkedBCO -> Bool
+unsupportedBCO UnlinkedBCO {..} = any w unlinkedBCOPtrs
+  where
+    w BCOPtrBreakArray {} = True
+    w (BCOPtrBCO bco) = unsupportedBCO bco
+    w _ = False
+
+testBinByteCode :: HscEnv -> CompiledByteCode -> IO CompiledByteCode
+testBinByteCode hsc_env cbc at CompiledByteCode {..}
+  | Just _ <- bc_breaks = evaluate cbc
+  | any unsupportedBCO bc_bcos = evaluate cbc
+  | otherwise = withSystemTempDirectory "ghc-bbc" $ \tmpdir -> do
+  appendFile "/tmp/ghc-bbc.log" "1"
+  let f = tmpdir </> "ghc-bbc"
+  roundtripBinByteCode hsc_env f cbc
+
+roundtripBinByteCode :: HscEnv -> FilePath -> CompiledByteCode -> IO CompiledByteCode
+roundtripBinByteCode hsc_env f cbc = do
+  writeBinByteCode f cbc
+  readBinByteCode hsc_env f
+
+readBinByteCode :: HscEnv -> FilePath -> IO CompiledByteCode
+readBinByteCode hsc_env f = do
+  bh' <- readBinMem f
+  bh <- addSerializableNameReader hsc_env bh'
+  getWithUserData (hsc_NC hsc_env) bh
+
+writeBinByteCode :: FilePath -> CompiledByteCode -> IO ()
+writeBinByteCode f cbc = do
+  bh' <- openBinMem (1024*1024)
+  bh <- addSerializableNameWriter bh'
+  putWithUserData QuietBinIFace NormalCompression bh cbc
+  writeBinMem bh f
+
+instance Binary CompiledByteCode where
+  get bh = do
+    bc_bcos <- get bh
+    bc_itbls_len <- get bh
+    bc_itbls <- replicateM bc_itbls_len $ do
+      nm <- getViaSerializableName bh
+      msg <-
+        MkConInfoTable
+          <$> get bh
+          <*> get bh
+          <*> get bh
+          <*> get bh
+          <*> get bh
+          <*> get bh
+      evaluate (nm, msg)
+    bc_strs_len <- get bh
+    bc_strs <-
+      replicateM bc_strs_len
+        $ (,)
+        <$> getViaSerializableName bh
+        <*> get bh
+    bc_spt_entries <- get bh
+    evaluate
+      CompiledByteCode
+        { bc_bcos,
+          bc_itbls,
+          bc_strs,
+          bc_breaks = Nothing,
+          bc_spt_entries
+        }
+
+  put_ bh CompiledByteCode {..} = do
+    put_ bh bc_bcos
+    put_ bh $ length bc_itbls
+    for_ bc_itbls
+      $ \(nm, MkConInfoTable tntc ptrs nptrs conNo tag descr) -> do
+        putViaSerializableName bh nm
+        put_ bh tntc
+        put_ bh ptrs
+        put_ bh nptrs
+        put_ bh conNo
+        put_ bh tag
+        put_ bh descr
+    put_ bh $ length bc_strs
+    for_ bc_strs
+      $ \(nm, str) -> putViaSerializableName bh nm *> put_ bh str
+    put_ bh bc_spt_entries
+
+instance Binary UnlinkedBCO where
+  get bh =
+    UnlinkedBCO
+      <$> getViaSerializableName bh
+      <*> get bh
+      <*> (Binary.decode . LBS.fromStrict <$> get bh)
+      <*> (Binary.decode . LBS.fromStrict <$> get bh)
+      <*> get bh
+      <*> get bh
+
+  put_ bh UnlinkedBCO {..} = do
+    putViaSerializableName bh unlinkedBCOName
+    put_ bh unlinkedBCOArity
+    put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOInstrs
+    put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOBitmap
+    put_ bh unlinkedBCOLits
+    put_ bh unlinkedBCOPtrs
+
+instance Binary BCOPtr where
+  get bh = do
+    t <- getByte bh
+    case t of
+      0 -> BCOPtrName <$> getViaSerializableName bh
+      1 -> BCOPtrPrimOp <$> get bh
+      2 -> BCOPtrBCO <$> get bh
+      _ -> panic "GHC.ByteCode.Serialize.BCOPtr.get"
+
+  put_ bh ptr = case ptr of
+    BCOPtrName nm -> putByte bh 0 *> putViaSerializableName bh nm
+    BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op
+    BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco
+    BCOPtrBreakArray {} -> panic "GHC.ByteCode.Serialize.BCOPtr.put"
+
+instance Binary BCONPtr where
+  get bh = do
+    t <- getByte bh
+    case t of
+      0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64)
+      1 -> BCONPtrLbl <$> get bh
+      2 -> BCONPtrItbl <$> getViaSerializableName bh
+      3 -> BCONPtrAddr <$> getViaSerializableName bh
+      4 -> BCONPtrStr <$> get bh
+      5 -> BCONPtrFFIInfo <$> get bh
+      _ -> panic "GHC.ByteCode.Serialize.BCONPtr.get"
+
+  put_ bh ptr = case ptr of
+    BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64)
+    BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym
+    BCONPtrItbl nm -> putByte bh 2 *> putViaSerializableName bh nm
+    BCONPtrAddr nm -> putByte bh 3 *> putViaSerializableName bh nm
+    BCONPtrStr str -> putByte bh 4 *> put_ bh str
+    BCONPtrFFIInfo ffi -> putByte bh 5 *> put_ bh ffi
+
+instance Binary SptEntry where
+  get bh = SptEntry <$> getViaSerializableName bh <*> get bh
+
+  put_ bh (SptEntry nm fp) = putViaSerializableName bh nm *> put_ bh fp
+
+newtype SerializableName = SerializableName {unSerializableName :: Name}
+
+getViaSerializableName :: ReadBinHandle -> IO Name
+getViaSerializableName bh = case findUserDataReader Proxy bh of
+  BinaryReader f -> unSerializableName <$> f bh
+
+putViaSerializableName :: WriteBinHandle -> Name -> IO ()
+putViaSerializableName bh nm = case findUserDataWriter Proxy bh of
+  BinaryWriter f -> f bh $ SerializableName nm
+
+addSerializableNameWriter :: WriteBinHandle -> IO WriteBinHandle
+addSerializableNameWriter bh' =
+  evaluate
+    $ flip addWriterToUserData bh'
+    $ BinaryWriter
+    $ \bh
+       ( SerializableName
+           nm
+         ) ->
+        if
+          | isExternalName nm -> do
+              putByte bh 0
+              put_ bh nm
+          | otherwise -> do
+              putByte bh 1
+              put_ bh $ occName nm
+
+addSerializableNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle
+addSerializableNameReader HscEnv {..} bh' = do
+  nc <- evaluate hsc_NC
+  env_ref <- newIORef emptyOccEnv
+  evaluate $ flip addReaderToUserData bh' $ BinaryReader $ \bh -> do
+    t <- getByte bh
+    case t of
+      0 -> do
+        nm <- get bh
+        evaluate $ SerializableName nm
+      1 -> do
+        occ <- get bh
+        u <- takeUniqFromNameCache nc
+        nm' <- evaluate $ mkInternalName u occ noSrcSpan
+        fmap SerializableName
+          $ atomicModifyIORef' env_ref
+          $ \env -> case lookupOccEnv env occ of
+            Just nm -> (env, nm)
+            _ -> (extendOccEnv env occ nm', nm')
+      _ -> panic "GHC.ByteCode.Serialize.addSerializableNameReader"
+
+instance Binary PrimOp where
+  get bh = (allThePrimOps !!) <$> get bh
+
+  put_ bh = put_ bh . primOpTag
+
+instance Binary FFIInfo where
+  get bh = FFIInfo <$> get bh <*> get bh
+
+  put_ bh FFIInfo {..} = put_ bh ffiInfoArgs *> put_ bh ffiInfoRet
+
+instance Binary FFIType where
+  get bh = do
+    t <- getByte bh
+    evaluate $ case t of
+      0 -> FFIVoid
+      1 -> FFIPointer
+      2 -> FFIFloat
+      3 -> FFIDouble
+      4 -> FFISInt8
+      5 -> FFISInt16
+      6 -> FFISInt32
+      7 -> FFISInt64
+      8 -> FFIUInt8
+      9 -> FFIUInt16
+      10 -> FFIUInt32
+      11 -> FFIUInt64
+      _ -> panic "GHC.ByteCode.Serialize.FFIType.get"
+
+  put_ bh t = putByte bh $ case t of
+    FFIVoid -> 0
+    FFIPointer -> 1
+    FFIFloat -> 2
+    FFIDouble -> 3
+    FFISInt8 -> 4
+    FFISInt16 -> 5
+    FFISInt32 -> 6
+    FFISInt64 -> 7
+    FFIUInt8 -> 8
+    FFIUInt16 -> 9
+    FFIUInt32 -> 10
+    FFIUInt64 -> 11
+
+instance (Binary a) => Binary (FlatBag a) where
+  get bh = do
+    xs <- get bh
+    evaluate $ FlatBag.fromList (fromIntegral $ length xs) xs
+
+  put_ bh = put_ bh . FlatBag.elemsFlatBag


=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -74,8 +74,8 @@ data CompiledByteCode = CompiledByteCode
     -- "GHC.Iface.Tidy.StaticPtrTable".
   }
                 -- ToDo: we're not tracking strings that we malloc'd
-newtype FFIInfo = FFIInfo (RemotePtr C_ffi_cif)
-  deriving (Show, NFData)
+data FFIInfo = FFIInfo { ffiInfoArgs :: ![FFIType], ffiInfoRet :: !FFIType }
+  deriving (Show)
 
 instance Outputable CompiledByteCode where
   ppr CompiledByteCode{..} = ppr $ elemsFlatBag bc_bcos
@@ -198,6 +198,8 @@ data BCONPtr
   | BCONPtrAddr  !Name
   -- | A top-level string literal.
   | BCONPtrStr   !ByteString
+  -- | A libffi ffi_cif function prototype.
+  | BCONPtrFFIInfo !FFIInfo
 
 instance NFData BCONPtr where
   rnf x = x `seq` ()


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -303,6 +303,8 @@ import Data.Bifunctor
 import qualified GHC.Unit.Home.Graph as HUG
 import GHC.Unit.Home.PackageTable
 
+import GHC.ByteCode.Serialize
+
 {- **********************************************************************
 %*                                                                      *
                 Initialisation
@@ -2187,7 +2189,8 @@ generateByteCode :: HscEnv
   -> ModLocation
   -> IO (CompiledByteCode, [FilePath])
 generateByteCode hsc_env cgguts mod_location = do
-  (hasStub, comp_bc) <- hscInteractive hsc_env cgguts mod_location
+  (hasStub, comp_bc') <- hscInteractive hsc_env cgguts mod_location
+  comp_bc <- testBinByteCode hsc_env comp_bc'
   compile_for_interpreter hsc_env $ \ i_env -> do
     stub_o <- traverse (compileForeign i_env LangC) hasStub
     foreign_files_o <- traverse (uncurry (compileForeign i_env)) (cgi_foreign_files cgguts)
@@ -2567,7 +2570,7 @@ hscAddSptEntries hsc_env entries = do
     let add_spt_entry :: SptEntry -> IO ()
         add_spt_entry (SptEntry i fpr) = do
             -- These are only names from the current module
-            (val, _, _) <- loadName interp hsc_env (idName i)
+            (val, _, _) <- loadName interp hsc_env i
             addSptEntry interp fpr val
     mapM_ add_spt_entry entries
 


=====================================
compiler/GHC/Iface/Tidy/StaticPtrTable.hs
=====================================
@@ -144,6 +144,7 @@ import GHC.Utils.Outputable as Outputable
 import GHC.Linker.Types
 
 import GHC.Types.Id
+import GHC.Types.Id.Info
 import GHC.Types.ForeignStubs
 import GHC.Data.Maybe
 import GHC.Data.FastString
@@ -205,7 +206,7 @@ sptCreateStaticBinds opts this_mod binds = do
         Nothing      -> return (Nothing, (b, e))
         Just (_, t, info, arg) -> do
           (fp, e') <- mkStaticBind t info arg
-          return (Just (SptEntry b fp), (b, foldr Lam e' tvs))
+          return (Just (SptEntry (idName b) fp), (b, foldr Lam e' tvs))
 
     mkStaticBind :: Type -> CoreExpr -> CoreExpr
                  -> StateT Int IO (Fingerprint, CoreExpr)
@@ -257,11 +258,11 @@ sptModuleInitCode platform this_mod entries
         [  text "static StgWord64 k" <> int i <> text "[2] = "
            <> pprFingerprint fp <> semi
         $$ text "extern StgPtr "
-           <> (pprCLabel platform $ mkClosureLabel (idName n) (idCafInfo n)) <> semi
+           <> (pprCLabel platform $ mkClosureLabel n MayHaveCafRefs) <> semi
         $$ text "hs_spt_insert" <> parens
              (hcat $ punctuate comma
                 [ char 'k' <> int i
-                , char '&' <> pprCLabel platform (mkClosureLabel (idName n) (idCafInfo n))
+                , char '&' <> pprCLabel platform (mkClosureLabel n MayHaveCafRefs)
                 ]
              )
         <> semi


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -1682,12 +1682,10 @@ generateCCall d0 s p (CCallSpec target _ safety) result_ty args
 
      let ffires = primRepToFFIType platform r_rep
          ffiargs = map (primRepToFFIType platform) a_reps
-     interp <- hscInterp <$> getHscEnv
-     token <- ioToBc $ interpCmd interp (PrepFFI ffiargs ffires)
 
      let
          -- do the call
-         do_call      = unitOL (CCALL stk_offset token flags)
+         do_call      = unitOL (CCALL stk_offset (FFIInfo ffiargs ffires) flags)
            where flags = case safety of
                            PlaySafe          -> 0x0
                            PlayInterruptible -> 0x1


=====================================
compiler/GHC/StgToJS/StaticPtr.hs
=====================================
@@ -6,8 +6,10 @@ module GHC.StgToJS.StaticPtr
 where
 
 import GHC.Prelude
+import GHC.Builtin.Types
 import GHC.Linker.Types (SptEntry(..))
 import GHC.Fingerprint.Type
+import GHC.Types.Id
 import GHC.Types.Literal
 
 import GHC.JS.JStg.Syntax
@@ -21,8 +23,8 @@ import GHC.StgToJS.Types
 initStaticPtrs :: [SptEntry] -> G JStgStat
 initStaticPtrs ptrs = mconcat <$> mapM initStatic ptrs
   where
-    initStatic (SptEntry sp_id (Fingerprint w1 w2)) = do
-      i <- varForId sp_id
+    initStatic (SptEntry sp_nm (Fingerprint w1 w2)) = do
+      i <- varForId $ mkVanillaGlobal sp_nm anyTy
       fpa <- concat <$> mapM (genLit . mkLitWord64 . fromIntegral) [w1,w2]
       let sptInsert = ApplStat hdHsSptInsert (fpa ++ [i])
       return $ (hdInitStatic .^ "push") `ApplStat` [Func [] sptInsert]


=====================================
compiler/GHC/Types/SptEntry.hs
=====================================
@@ -3,15 +3,13 @@ module GHC.Types.SptEntry
   )
 where
 
-import GHC.Types.Var           ( Id )
 import GHC.Fingerprint.Type    ( Fingerprint )
+import GHC.Types.Name
 import GHC.Utils.Outputable
 
 -- | An entry to be inserted into a module's static pointer table.
 -- See Note [Grand plan for static forms] in "GHC.Iface.Tidy.StaticPtrTable".
-data SptEntry = SptEntry Id Fingerprint
+data SptEntry = SptEntry !Name !Fingerprint
 
 instance Outputable SptEntry where
   ppr (SptEntry id fpr) = ppr id <> colon <+> ppr fpr
-
-


=====================================
compiler/ghc.cabal.in
=====================================
@@ -226,6 +226,7 @@ Library
         GHC.ByteCode.InfoTable
         GHC.ByteCode.Instr
         GHC.ByteCode.Linker
+        GHC.ByteCode.Serialize
         GHC.ByteCode.Types
         GHC.Cmm
         GHC.Cmm.BlockId


=====================================
testsuite/tests/bytecode/T22376/all.T
=====================================
@@ -1,2 +1,2 @@
-test('T22376', [req_interp, extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run,
+test('T22376', [extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run,
      ['T22376', '-O1 -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/cf1c2f6c5d9b2b90ea5f235453b0a71ae2412a73...e29d68bac5fa98d94d425affee3ae00a05350fa2

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/cf1c2f6c5d9b2b90ea5f235453b0a71ae2412a73...e29d68bac5fa98d94d425affee3ae00a05350fa2
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250215/cd32c0ee/attachment-0001.html>


More information about the ghc-commits mailing list