[Git][ghc/ghc][wip/bytecode-serialize] 6 commits: hackage-doc-tarball: Allow ghc-boot-th to be uploaded to hackage

Cheng Shao (@TerrorJack) gitlab at gitlab.haskell.org
Tue Feb 4 01:54:40 UTC 2025



Cheng Shao pushed to branch wip/bytecode-serialize at Glasgow Haskell Compiler / GHC


Commits:
7bfc93a7 by Zubin Duggal at 2025-01-29T21:41:17-05:00
hackage-doc-tarball: Allow ghc-boot-th to be uploaded to hackage

It can't refer to files outside its source directory, so patch that part out.
This is OK because those files are only used while bootstrapping.

Also add ghci to the list of packages to be uploaded

Fixes #25687

- - - - -
704eeb02 by Roman S at 2025-01-29T21:42:05-05:00
Fix Control.Arrow (***) diagram (fixes #25698)
- - - - -
662645f0 by Matthew Pickering at 2025-02-03T11:44:41-05:00
compiler: Always load GHC.Data.FastString optimised into GHCi

The FastString table is shared between the boot compiler and interpreted
compiler. Therefore it's very important the representation of
`FastString` matches in both cases. Otherwise, the interpreter will read
a FastString from the shared variable but place the fields in the wrong
place which leads to segfaults.

Ideally this state would not be shared, but for now we can always
compile both with `-O2` and this leads to a working interpreter.

- - - - -
05e5785a by Peter Trommler at 2025-02-03T11:45:17-05:00
RTS: Fix compile on powerpc64 ELF v1

Cabal does not know about the different ABIs for powerpc64 and compiles
StgCRunAsm.S unconditionally. The old make-based build system excluded
this file from the build and it was OK to signal an error when it was
compiled accidentally.

With this patch we compile StgCRunAsm.S to an empty file, which fixes
the build.

Fixes #25700

- - - - -
5c3cb265 by Cheng Shao at 2025-02-04T01:54:26+00:00
WIP

- - - - -
94e3652f by Cheng Shao at 2025-02-04T01:54:26+00:00
WIP

- - - - -


13 changed files:

- .gitlab/rel_eng/upload_ghc_libs.py
- compiler/GHC/ByteCode/Asm.hs
- + compiler/GHC/ByteCode/Serialize.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Data/FastString.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Iface/Binary.hs
- compiler/GHC/Linker/Loader.hs
- compiler/GHC/StgToByteCode.hs
- compiler/GHC/Types/Name.hs
- compiler/ghc.cabal.in
- libraries/ghc-internal/src/GHC/Internal/Control/Arrow.hs
- rts/StgCRunAsm.S


Changes:

=====================================
.gitlab/rel_eng/upload_ghc_libs.py
=====================================
@@ -93,6 +93,11 @@ def prep_ghc():
     build_copy_file(PACKAGES['ghc'], 'GHC/Platform/Constants.hs')
     build_copy_file(PACKAGES['ghc'], 'GHC/Settings/Config.hs')
 
+def prep_ghc_boot_th():
+    # Drop ghc-internal from `hs-source-dirs` as Hackage rejects this
+    modify_file(PACKAGES['ghc-boot-th'], 'ghc-boot-th.cabal',
+                lambda s: s.replace('../ghc-internal/src', ''))
+
 PACKAGES = {
     pkg.name: pkg
     for pkg in [
@@ -105,9 +110,10 @@ PACKAGES = {
         Package('template-haskell', Path("libraries/template-haskell"), no_prep),
         Package('ghc-heap', Path("libraries/ghc-heap"), no_prep),
         Package('ghc-boot', Path("libraries/ghc-boot"), prep_ghc_boot),
-        Package('ghc-boot-th', Path("libraries/ghc-boot-th"), no_prep),
+        Package('ghc-boot-th', Path("libraries/ghc-boot-th"), prep_ghc_boot_th),
         Package('ghc-compact', Path("libraries/ghc-compact"), no_prep),
         Package('ghc', Path("compiler"), prep_ghc),
+        Package('ghci', Path("libraries/ghci"), no_prep),
     ]
 }
 # Dict[str, Package]


=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -52,6 +52,7 @@ import qualified Data.Array.Unboxed as Array
 import Data.Array.Base  ( UArray(..) )
 
 import Foreign hiding (shiftL, shiftR)
+import Data.ByteString  ( ByteString )
 import Data.Char        ( ord )
 import Data.List        ( genericLength )
 import Data.Map.Strict (Map)
@@ -94,7 +95,7 @@ assembleBCOs
   -> Profile
   -> FlatBag (ProtoBCO Name)
   -> [TyCon]
-  -> AddrEnv
+  -> [(Name, ByteString)]
   -> Maybe ModBreaks
   -> [SptEntry]
   -> IO CompiledByteCode


=====================================
compiler/GHC/ByteCode/Serialize.hs
=====================================
@@ -0,0 +1,108 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# OPTIONS_GHC -Wno-orphans -fdefer-typed-holes -Wno-typed-holes #-}
+
+module GHC.ByteCode.Serialize where
+
+import GHC.Prelude
+import GHC.ByteCode.Types
+import GHC.Utils.Binary
+import GHC.Data.FlatBag as FlatBag
+import GHC.Builtin.PrimOps
+import Data.Word
+import GHC.Utils.Panic
+import qualified Data.Binary as Binary
+import GHCi.ResolvedBCO ()
+import qualified Data.ByteString.Lazy as LBS
+import Data.Foldable
+import GHC.Types.Name.Env
+import Control.Monad
+import GHC.Types.Name
+import GHC.Types.SrcLoc
+import GHC.Types.Unique.Supply
+import Data.Proxy
+
+instance Binary CompiledByteCode where
+  get bh = do
+    bc_bcos <- get bh
+    bc_strs_len <- get bh
+    bc_strs <- replicateM bc_strs_len $ (,) <$> getViaSerializableName bh <*> get bh
+    pure CompiledByteCode {bc_bcos, bc_itbls = emptyNameEnv, bc_ffis = [], bc_strs, bc_breaks = Nothing, bc_spt_entries = []}
+
+  put_ bh CompiledByteCode {..} = do
+    put_ bh bc_bcos
+    put_ bh $ length bc_strs
+    for_ bc_strs $ \(nm, str) -> putViaSerializableName bh nm *> put_ bh str
+
+instance Binary UnlinkedBCO where
+  get bh = UnlinkedBCO <$> getViaSerializableName bh <*> get bh <*> (Binary.decode . LBS.fromStrict <$> get bh) <*> (Binary.decode . LBS.fromStrict <$> get bh) <*> get bh <*> get bh
+
+  put_ bh UnlinkedBCO {..} = do
+    putViaSerializableName bh unlinkedBCOName
+    put_ bh unlinkedBCOArity
+    put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOInstrs
+    put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOBitmap
+    put_ bh unlinkedBCOLits
+    put_ bh unlinkedBCOPtrs
+
+instance Binary BCOPtr where
+  get bh = do
+    t <- getByte bh
+    case t of
+      0 -> BCOPtrName <$> getViaSerializableName bh
+      1 -> BCOPtrPrimOp <$> get bh
+      2 -> BCOPtrBCO <$> get bh
+      _ -> panic "GHC.ByteCode.Serialize.BCOPtr.get"
+  put_ bh ptr = case ptr of
+    BCOPtrName nm -> putByte bh 0 *> putViaSerializableName bh nm
+    BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op
+    BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco
+    BCOPtrBreakArray {} -> panic "GHC.ByteCode.Serialize.BCOPtr.put"
+
+instance Binary BCONPtr where
+  get bh = do
+    t <- getByte bh
+    case t of
+      0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64)
+      1 -> BCONPtrLbl <$> get bh
+      2 -> BCONPtrItbl <$> getViaSerializableName bh
+      3 -> BCONPtrAddr <$> getViaSerializableName bh
+      4 -> BCONPtrStr <$> get bh
+      _ -> panic "GHC.ByteCode.Serialize.BCONPtr.get"
+
+  put_ bh ptr = case ptr of
+    BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64)
+    BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym
+    BCONPtrItbl nm -> putByte bh 2 *> putViaSerializableName bh nm
+    BCONPtrAddr nm -> putByte bh 3 *> putViaSerializableName bh nm
+    BCONPtrStr str -> putByte bh 4 *> put_ bh str
+
+newtype SerializableName = SerializableName { unSerializableName :: Name }
+
+instance Binary SerializableName where
+  get bh =
+    case findUserDataReader Proxy bh of
+      BinaryReader f -> f bh
+
+  put_ bh nm =
+    case findUserDataWriter Proxy bh of
+      BinaryWriter f -> f bh nm
+
+getViaSerializableName :: ReadBinHandle -> IO Name
+getViaSerializableName bh = unSerializableName <$> get bh
+
+putViaSerializableName :: WriteBinHandle -> Name -> IO ()
+putViaSerializableName bh nm = put_ bh $ SerializableName nm
+
+addSerializableNameWriter :: WriteBinHandle -> IO WriteBinHandle
+addSerializableNameWriter = undefined
+
+instance Binary PrimOp where
+  get bh = (allThePrimOps !!) <$> get bh
+  put_ bh = put_ bh . primOpTag
+
+instance Binary a => Binary (FlatBag a) where
+  get bh = do
+    xs <- get bh
+    pure $ FlatBag.fromList (fromIntegral $ length xs) xs
+
+  put_ bh = put_ bh . FlatBag.elemsFlatBag


=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -64,7 +64,7 @@ data CompiledByteCode = CompiledByteCode
   , bc_ffis   :: [FFIInfo]
     -- ^ ffi blocks we allocated
 
-  , bc_strs   :: AddrEnv
+  , bc_strs   :: [(Name, ByteString)]
     -- ^ top-level strings (heap allocated)
 
   , bc_breaks :: Maybe ModBreaks
@@ -89,7 +89,7 @@ seqCompiledByteCode CompiledByteCode{..} =
   rnf bc_bcos `seq`
   seqEltsNameEnv rnf bc_itbls `seq`
   rnf bc_ffis `seq`
-  seqEltsNameEnv rnf bc_strs `seq`
+  rnf bc_strs `seq`
   rnf (fmap seqModBreaks bc_breaks)
 
 newtype ByteOff = ByteOff Int


=====================================
compiler/GHC/Data/FastString.hs
=====================================
@@ -2,10 +2,19 @@
 {-# LANGUAGE MagicHash #-}
 {-# LANGUAGE UnboxedTuples #-}
 {-# LANGUAGE UnliftedFFITypes #-}
+{-# LANGUAGE CPP #-}
 
 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
+#if MIN_VERSION_GLASGOW_HASKELL(9,8,0,0)
+{-# OPTIONS_GHC -fno-unoptimized-core-for-interpreter #-}
+#endif
 -- We always optimise this, otherwise performance of a non-optimised
 -- compiler is severely affected
+--
+-- Also important, if you load this module into GHCi then the data representation of
+-- FastString has to match that of the host compiler due to the shared FastString
+-- table. Otherwise you will get segfaults when the table is consulted and the fields
+-- from the FastString are in an incorrect order.
 
 -- |
 -- There are two principal string types used internally by GHC:


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -2747,7 +2747,8 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr = do
   -- The id has to be exported for the JS backend. This isn't required for the
   -- byte-code interpreter but it does no harm to always do it.
   u <- uniqFromTag 'I'
-  let binding_name = mkSystemVarName u (fsLit ("BCO_toplevel"))
+  let this_mod = mkInteractiveModule (show u)
+  let binding_name = mkExternalName u this_mod (mkVarOcc ("BCO_toplevel")) noSrcSpan
   let binding_id   = mkExportedVanillaId binding_name (exprType simpl_expr)
 
   {- Tidy it (temporary, until coreSat does cloning) -}
@@ -2781,7 +2782,6 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr = do
   -- guaranteed.
   --
   -- We reuse the unique we obtained for the binding, but any unique would do.
-  let this_mod = mkInteractiveModule (show u)
   let for_bytecode = True
 
   (stg_binds_with_deps, _prov_map, _collected_ccs, _stg_cg_infos) <-


=====================================
compiler/GHC/Iface/Binary.hs
=====================================
@@ -43,7 +43,6 @@ import GHC.Types.Unique.FM
 import GHC.Utils.Panic
 import GHC.Utils.Binary as Binary
 import GHC.Data.FastMutInt
-import GHC.Data.FastString (FastString)
 import GHC.Types.Unique
 import GHC.Utils.Outputable
 import GHC.Types.Name.Cache
@@ -321,18 +320,7 @@ putWithTables compressionLevel bh' put_payload = do
   (ifaceType_wt, ifaceTypeWriter) <- initWriteIfaceType compressionLevel
 
   -- Initialise the 'WriterUserData'.
-  let writerUserData = mkWriterUserData
-        [ mkSomeBinaryWriter @FastString fsWriter
-        , mkSomeBinaryWriter @Name nameWriter
-        -- We sometimes serialise binding and non-binding names differently, but
-        -- not during 'ModIface' serialisation. Here, we serialise both to the same
-        -- deduplication table.
-        --
-        -- See Note [Binary UserData]
-        , mkSomeBinaryWriter @BindingName  $ mkWriter (\bh name -> putEntry nameWriter bh (getBindingName name))
-        , mkSomeBinaryWriter @IfaceType ifaceTypeWriter
-        ]
-  let bh = setWriterUserData bh' writerUserData
+  let bh = foldl' (&) bh' [addWriterToUserData fsWriter, addWriterToUserData nameWriter, addWriterToUserData $ mkWriter (\bh name -> putEntry nameWriter bh (getBindingName name)), addWriterToUserData ifaceTypeWriter]
 
   ([fs_count, name_count, ifacetype_count] , r) <-
     -- The order of these entries matters!


=====================================
compiler/GHC/Linker/Loader.hs
=====================================
@@ -97,6 +97,7 @@ import GHC.Linker.Types
 -- Standard libraries
 import Control.Monad
 
+import Data.ByteString (ByteString)
 import qualified Data.Set as Set
 import Data.Char (isSpace)
 import qualified Data.Foldable as Foldable
@@ -723,8 +724,9 @@ loadDecls interp hsc_env span linkable = do
         else do
           -- Link the expression itself
           let le  = linker_env pls
-              le2 = le { itbl_env = foldl' (\acc cbc -> plusNameEnv acc (bc_itbls cbc)) (itbl_env le) cbcs
-                       , addr_env = foldl' (\acc cbc -> plusNameEnv acc (bc_strs cbc)) (addr_env le) cbcs }
+          le2_addr_env <- foldlM (\env cbc -> allocateTopStrings interp (bc_strs cbc) env) (addr_env le) cbcs
+          let le2 = le { itbl_env = foldl' (\acc cbc -> plusNameEnv acc (bc_itbls cbc)) (itbl_env le) cbcs
+                       , addr_env = le2_addr_env }
 
           -- Link the necessary packages and linkables
           new_bindings <- linkSomeBCOs interp (pkgs_loaded pls) le2 cbcs
@@ -947,8 +949,8 @@ dynLinkBCOs interp pls bcos = do
 
             le1 = linker_env pls
             ie2 = foldr plusNameEnv (itbl_env le1) (map bc_itbls cbcs)
-            ae2 = foldr plusNameEnv (addr_env le1) (map bc_strs cbcs)
-            le2 = le1 { itbl_env = ie2, addr_env = ae2 }
+        ae2 <- foldlM (\env cbc -> allocateTopStrings interp (bc_strs cbc) env) (addr_env le1) cbcs
+        let le2 = le1 { itbl_env = ie2, addr_env = ae2 }
 
         names_and_refs <- linkSomeBCOs interp (pkgs_loaded pls) le2 cbcs
 
@@ -1649,3 +1651,17 @@ maybePutStr logger s = maybePutSDoc logger (text s)
 
 maybePutStrLn :: Logger -> String -> IO ()
 maybePutStrLn logger s = maybePutSDoc logger (text s <> text "\n")
+
+-- | see Note [Generating code for top-level string literal bindings]
+allocateTopStrings
+  :: Interp
+  -> [(Name, ByteString)]
+  -> AddrEnv
+  -> IO AddrEnv
+allocateTopStrings interp topStrings prev_env =
+  do
+  let !(bndrs, strings) = unzip topStrings
+  ptrs <- interpCmd interp $ MallocStrings strings
+  return $ extendNameEnvList prev_env (zipWith mk_entry bndrs ptrs)
+  where
+    mk_entry nm ptr = (nm, (nm, AddrPtr ptr))


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -66,7 +66,6 @@ import GHC.Data.Bitmap
 import GHC.Data.FlatBag as FlatBag
 import GHC.Data.OrdList
 import GHC.Data.Maybe
-import GHC.Types.Name.Env (mkNameEnv)
 import GHC.Types.Tickish
 import GHC.Types.SptEntry
 
@@ -81,7 +80,6 @@ import GHC.Unit.Home.PackageTable (lookupHpt)
 
 import Data.Array
 import Data.Coerce (coerce)
-import Data.ByteString (ByteString)
 #if MIN_VERSION_rts(1,0,3)
 import qualified Data.ByteString.Char8 as BS
 #endif
@@ -96,6 +94,12 @@ import Data.Either ( partitionEithers )
 import GHC.Stg.Syntax
 import qualified Data.IntSet as IntSet
 import GHC.CoreToIface
+import GHC.Types.Name.Env
+import GHC.Utils.Binary
+import GHC.Iface.Binary
+import GHC.ByteCode.Serialize ()
+import GHC.Utils.TmpFs
+import System.FilePath
 
 -- -----------------------------------------------------------------------------
 -- Generating byte code for a complete module
@@ -117,10 +121,9 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries
                 bnd <- binds
                 case bnd of
                   StgTopLifted bnd      -> [Right bnd]
-                  StgTopStringLit b str -> [Left (b, str)]
+                  StgTopStringLit b str -> [Left (idName b, str)]
             flattenBind (StgNonRec b e) = [(b,e)]
             flattenBind (StgRec bs)     = bs
-        stringPtrs <- allocateTopStrings interp strings
 
         (BcM_State{..}, proto_bcos) <-
            runBc hsc_env this_mod mb_modBreaks $ do
@@ -137,7 +140,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries
         let mod_breaks = case modBreaks of
              Nothing -> Nothing
              Just mb -> Just mb{ modBreaks_breakInfo = breakInfo }
-        cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs mod_breaks spt_entries
+        cbc <- assembleBCOs interp profile proto_bcos tycs strings mod_breaks spt_entries
 
         -- Squash space leaks in the CompiledByteCode.  This is really
         -- important, because when loading a set of modules into GHCi
@@ -147,26 +150,26 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries
         -- modules.
         evaluate (seqCompiledByteCode cbc)
 
-        return cbc
+        cbc' <- case cbc of
+          CompiledByteCode{bc_itbls, bc_ffis = [], bc_breaks = Nothing, bc_spt_entries = []} | isEmptyNameEnv bc_itbls ->
+            withSystemTempDirectory "foo" $ \tmpdir ->
+            do
+              let fn = tmpdir </> "bar"
+              appendFile "/tmp/test.log" "1"
+              bh0 <- openBinMem (1024*1024)
+              putWithUserData QuietBinIFace NormalCompression bh0 cbc
+              writeBinMem bh0 fn
+              bh1 <- readBinMem fn
+              getWithUserData (hsc_NC hsc_env) bh1
+          _ -> pure cbc
+
+        return cbc'
 
   where dflags  = hsc_dflags hsc_env
         logger  = hsc_logger hsc_env
         interp  = hscInterp hsc_env
         profile = targetProfile dflags
 
--- | see Note [Generating code for top-level string literal bindings]
-allocateTopStrings
-  :: Interp
-  -> [(Id, ByteString)]
-  -> IO AddrEnv
-allocateTopStrings interp topStrings = do
-  let !(bndrs, strings) = unzip topStrings
-  ptrs <- interpCmd interp $ MallocStrings strings
-  return $ mkNameEnv (zipWith mk_entry bndrs ptrs)
-  where
-    mk_entry bndr ptr = let nm = getName bndr
-                        in (nm, (nm, AddrPtr ptr))
-
 {- Note [Generating code for top-level string literal bindings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 As described in Note [Compilation plan for top-level string literals]


=====================================
compiler/GHC/Types/Name.hs
=====================================
@@ -51,7 +51,7 @@ module GHC.Types.Name (
         mkExternalName, mkWiredInName,
 
         -- ** Manipulating and deconstructing 'Name's
-        nameUnique, setNameUnique,
+        nameUnique, setNameExternalModule, setNameUnique,
         nameOccName, nameNameSpace, nameModule, nameModule_maybe,
         setNameLoc,
         tidyNameOcc,
@@ -563,6 +563,9 @@ mkFCallName :: Unique -> FastString -> Name
 mkFCallName uniq str = mkInternalName uniq (mkVarOccFS str) noSrcSpan
    -- The encoded string completely describes the ccall
 
+setNameExternalModule :: Name -> Module -> Name
+setNameExternalModule name mod = name {n_sort = External mod}
+
 -- When we renumber/rename things, we need to be
 -- able to change a Name's Unique to match the cached
 -- one in the thing it's the name of.  If you know what I mean.


=====================================
compiler/ghc.cabal.in
=====================================
@@ -226,6 +226,7 @@ Library
         GHC.ByteCode.InfoTable
         GHC.ByteCode.Instr
         GHC.ByteCode.Linker
+        GHC.ByteCode.Serialize
         GHC.ByteCode.Types
         GHC.Cmm
         GHC.Cmm.BlockId


=====================================
libraries/ghc-internal/src/GHC/Internal/Control/Arrow.hs
=====================================
@@ -131,10 +131,10 @@ class Category a => Arrow a where
     --   The default definition may be overridden with a more efficient
     --   version if desired.
     --
-    -- >   b ╭─────╮ b'
+    -- >   b ╭─────╮ c
     -- > >───┼─ f ─┼───>
     -- > >───┼─ g ─┼───>
-    -- >   c ╰─────╯ c'
+    -- >   b'╰─────╯ c'
     (***) :: a b c -> a b' c' -> a (b,b') (c,c')
     f *** g = first f >>> arr swap >>> first g >>> arr swap
       where swap ~(x,y) = (y,x)


=====================================
rts/StgCRunAsm.S
=====================================
@@ -69,7 +69,7 @@ StgReturn:
 
 	.section	.note.GNU-stack,"", at progbits
 # else // Not ELF v2
-# error Only ELF v2 supported.
+       // ELF v1 is in StgCrun.c
 # endif
 
 #elif defined(powerpc_HOST_ARCH)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d5a7ece3fb0c2734f9829a1f567adc49c871b905...94e3652f700fcdfd2356b6954f46318e9159d601

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d5a7ece3fb0c2734f9829a1f567adc49c871b905...94e3652f700fcdfd2356b6954f46318e9159d601
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250203/3546aedb/attachment-0001.html>


More information about the ghc-commits mailing list