[Git][ghc/ghc][wip/js-staging] 3 commits: Refactor symbol generation.
Sylvain Henry (@hsyl20)
gitlab at gitlab.haskell.org
Mon Oct 17 13:25:16 UTC 2022
Sylvain Henry pushed to branch wip/js-staging at Glasgow Haskell Compiler / GHC
Commits:
e5e5fa36 by Sylvain Henry at 2022-10-17T13:30:43+02:00
Refactor symbol generation.
Avoid concatenating Strings or FastStrings. Concatenate ByteString
instead.
- - - - -
fb2482e8 by Sylvain Henry at 2022-10-17T14:49:37+02:00
Enhance Label symbol generation
- - - - -
660ebb57 by Sylvain Henry at 2022-10-17T15:27:02+02:00
Rename fresh idents with their FastString unique
Reduce by 2x the size of the generated file (on Cabal's Setup).
- - - - -
10 changed files:
- compiler/GHC/StgToJS/CodeGen.hs
- compiler/GHC/StgToJS/Ids.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Literal.hs
- compiler/GHC/StgToJS/Printer.hs
- compiler/GHC/StgToJS/Profiling.hs
- + compiler/GHC/StgToJS/Symbols.hs
- − compiler/GHC/StgToJS/UnitUtils.hs
- compiler/GHC/Types/Unique.hs
- compiler/ghc.cabal.in
Changes:
=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -30,7 +30,7 @@ import GHC.StgToJS.Monad
import GHC.StgToJS.Profiling
import GHC.StgToJS.Regs
import GHC.StgToJS.StaticPtr
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
import GHC.StgToJS.Stack
import GHC.StgToJS.Ids
=====================================
compiler/GHC/StgToJS/Ids.hs
=====================================
@@ -41,7 +41,7 @@ import GHC.Prelude
import GHC.StgToJS.Types
import GHC.StgToJS.Monad
import GHC.StgToJS.CoreUtils
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
import GHC.JS.Syntax
import GHC.JS.Make
@@ -52,7 +52,6 @@ import GHC.Types.Unique
import GHC.Types.Unique.FM
import GHC.Types.Name
import GHC.Unit.Module
-import GHC.Utils.Encoding (zEncodeString)
import GHC.Data.FastString
import GHC.Data.FastMutInt
@@ -61,6 +60,7 @@ import Control.Monad.IO.Class
import qualified Control.Monad.Trans.State.Strict as State
import qualified Data.Map as M
import Data.Maybe
+import qualified Data.ByteString.Char8 as BSC
-- | Get fresh unique number
freshUnique :: G Int
@@ -77,20 +77,10 @@ freshIdent :: G Ident
freshIdent = do
i <- freshUnique
mod <- State.gets gsModule
- let !name = mkFastString $ mconcat
- [ "h$$"
- , zEncodeString (unitModuleString mod)
- , "_"
- , encodeUnique i
- ]
+ let !name = mkFreshJsSymbol mod i
return (TxtI name)
-
--- | Encode a Unique number as a base-62 String
-encodeUnique :: Int -> String
-encodeUnique = reverse . iToBase62 -- reversed is more compressible
-
-- | Generate unique Ident for the given ID (uncached!)
--
-- The ident has the following forms:
@@ -113,24 +103,35 @@ makeIdentForId i num id_type current_module = TxtI ident
where
exported = isExportedId i
name = getName i
- !ident = mkFastString $ mconcat
- [ "h$"
- , if exported then "" else "$"
- , zEncodeString $ unitModuleString $ case exported of
- True | Just m <- nameModule_maybe name -> m
- _ -> current_module
- , zEncodeString "."
- , zString (zEncodeFS (occNameFS (nameOccName name)))
+ mod
+ | exported
+ , Just m <- nameModule_maybe name
+ = m
+ | otherwise
+ = current_module
+
+ !ident = mkFastStringByteString $ mconcat
+ [ mkJsSymbolBS exported mod (occNameFS (nameOccName name))
+
+ -------------
+ -- suffixes
+
+ -- suffix for Ids represented with more than one JS var ("_0", "_1", etc.)
, case num of
- Nothing -> ""
- Just v -> "_" ++ show v
+ Nothing -> mempty
+ Just v -> mconcat [BSC.pack "_", intBS v]
+
+ -- suffix for entry and constructor entry
, case id_type of
- IdPlain -> ""
- IdEntry -> "_e"
- IdConEntry -> "_con_e"
+ IdPlain -> mempty
+ IdEntry -> BSC.pack "_e"
+ IdConEntry -> BSC.pack "_con_e"
+
+ -- unique suffix for non-exported Ids
, if exported
- then ""
- else "_" ++ encodeUnique (getKey (getUnique i))
+ then mempty
+ else let (c,u) = unpkUnique (getUnique i)
+ in mconcat [BSC.pack ['_',c,'_'], intBS u]
]
-- | Retrieve the cached Ident for the given Id if there is one. Otherwise make
=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -45,7 +45,7 @@ import GHC.StgToJS.Linker.Utils
import GHC.StgToJS.Rts.Rts
import GHC.StgToJS.Object
import GHC.StgToJS.Types hiding (LinkableUnit)
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
import GHC.StgToJS.Printer
import GHC.StgToJS.Arg
import GHC.StgToJS.Closure
@@ -56,7 +56,6 @@ import GHC.Unit.Home
import GHC.Unit.Types
import GHC.Unit.Module (moduleStableString)
-import GHC.Utils.Encoding
import GHC.Utils.Outputable hiding ((<>))
import GHC.Utils.Panic
import GHC.Utils.Error
@@ -701,16 +700,7 @@ mkExportedFuns :: UnitId -> FastString -> [FastString] -> [ExportedFun]
mkExportedFuns uid mod_name symbols = map mk_fun symbols
where
mod = mkModule (RealUnit (Definite uid)) (mkModuleNameFS mod_name)
- mk_fun sym = ExportedFun mod (LexicalFastString (mkJsSymbol mod sym))
-
--- | Make JS symbol corresponding to the given Haskell symbol in the given
--- module
-mkJsSymbol :: Module -> FastString -> FastString
-mkJsSymbol mod s = mkFastString $ mconcat
- [ "h$"
- , zEncodeString (unitModuleString mod <> ".")
- , zString (zEncodeFS s)
- ]
+ mk_fun sym = ExportedFun mod (LexicalFastString (mkJsSymbol True mod sym))
-- | read all dependency data from the to-be-linked files
loadObjDeps :: [LinkedObj] -- ^ object files to link
=====================================
compiler/GHC/StgToJS/Literal.hs
=====================================
@@ -15,6 +15,7 @@ import GHC.JS.Make
import GHC.StgToJS.Types
import GHC.StgToJS.Monad
import GHC.StgToJS.Ids
+import GHC.StgToJS.Symbols
import GHC.Data.FastString
import GHC.Types.Literal
@@ -59,10 +60,10 @@ genLit = \case
LitDouble r -> return [ toJExpr (r2d r) ]
LitLabel name _size fod
| fod == IsFunction -> return [ ApplExpr (var "h$mkFunctionPtr")
- [var (mkFastString $ "h$" ++ unpackFS name)]
+ [var (mkRawSymbol True name)]
, ValExpr (JInt 0)
]
- | otherwise -> return [ toJExpr (TxtI . mkFastString $ "h$" ++ unpackFS name)
+ | otherwise -> return [ toJExpr (TxtI (mkRawSymbol True name))
, ValExpr (JInt 0)
]
LitRubbish _rep -> return [ null_ ]
@@ -89,7 +90,7 @@ genStaticLit = \case
LitNumBigNat -> panic "genStaticLit: unexpected BigNat that should have been removed in CorePrep"
LitFloat r -> return [ DoubleLit . SaneDouble . r2f $ r ]
LitDouble r -> return [ DoubleLit . SaneDouble . r2d $ r ]
- LitLabel name _size fod -> return [ LabelLit (fod == IsFunction) (mkFastString $ "h$" ++ unpackFS name)
+ LitLabel name _size fod -> return [ LabelLit (fod == IsFunction) (mkRawSymbol True name)
, IntLit 0 ]
l -> pprPanic "genStaticLit" (ppr l)
=====================================
compiler/GHC/StgToJS/Printer.hs
=====================================
@@ -1,4 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
-----------------------------------------------------------------------------
-- |
@@ -24,7 +26,8 @@ module GHC.StgToJS.Printer
where
import GHC.Prelude
-
+import GHC.Int
+import GHC.Exts
import GHC.JS.Syntax
import GHC.JS.Ppr
@@ -34,15 +37,59 @@ import GHC.Data.FastString
import GHC.Types.Unique.Map
import Data.List (sortOn)
-import Data.Char (isAlpha,isDigit)
+import Data.Char (isAlpha,isDigit,ord)
+import qualified Data.ByteString.Short as SBS
pretty :: JStat -> Doc
pretty = jsToDocR ghcjsRenderJs
ghcjsRenderJs :: RenderJs
-ghcjsRenderJs = defaultRenderJs { renderJsV = ghcjsRenderJsV
- , renderJsS = ghcjsRenderJsS
- }
+ghcjsRenderJs = defaultRenderJs
+ { renderJsV = ghcjsRenderJsV
+ , renderJsS = ghcjsRenderJsS
+ , renderJsI = ghcjsRenderJsI
+ }
+
+hdd :: SBS.ShortByteString
+hdd = SBS.pack (map (fromIntegral . ord) "h$$")
+
+ghcjsRenderJsI :: RenderJs -> Ident -> Doc
+ghcjsRenderJsI _ (TxtI fs)
+ -- Fresh symbols are prefixed with "h$$". They aren't explicitly referred by
+ -- name in user code, only in compiled code. Hence we can rename them if we do
+ -- it consistently in all the linked code.
+ --
+ -- These symbols are usually very large because their name includes the
+ -- unit-id, the module name, and some unique number. So we rename these
+ -- symbols with a much shorter globally unique number.
+ --
+ -- Here we reuse their FastString unique for this purpose! Note that it only
+ -- works if we pretty-print all the JS code linked together at once, which we
+ -- currently do. GHCJS used to maintain a CompactorState to support
+ -- incremental linking: it contained the mapping between original symbols and
+ -- their renaming.
+ | hdd `SBS.isPrefixOf` fastStringToShortByteString fs
+ , u <- uniqueOfFS fs
+ = text "h$$" <> hexDoc (fromIntegral u)
+ | otherwise
+ = ftext fs
+
+-- | Render as an hexadecimal number in reversed order (because it's faster and we
+-- don't care about the actual value).
+hexDoc :: Word -> Doc
+hexDoc 0 = char '0'
+hexDoc v = text $ go v
+ where
+ sym (I# i) = C# (indexCharOffAddr# chars i)
+ chars = "0123456789abcdef"#
+ go = \case
+ 0 -> []
+ n -> sym (fromIntegral (n .&. 0x0F))
+ : sym (fromIntegral ((n .&. 0xF0) `shiftR` 4))
+ : go (n `shiftR` 8)
+
+
+
-- attempt to resugar some of the common constructs
ghcjsRenderJsS :: RenderJs -> JStat -> Doc
=====================================
compiler/GHC/StgToJS/Profiling.hs
=====================================
@@ -31,7 +31,7 @@ import GHC.JS.Make
import GHC.StgToJS.Regs
import GHC.StgToJS.Types
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
import GHC.StgToJS.Monad
import GHC.Types.CostCentre
=====================================
compiler/GHC/StgToJS/Symbols.hs
=====================================
@@ -0,0 +1,89 @@
+
+-- | JS symbol generation
+module GHC.StgToJS.Symbols
+ ( moduleGlobalSymbol
+ , moduleExportsSymbol
+ , mkJsSymbol
+ , mkJsSymbolBS
+ , mkFreshJsSymbol
+ , mkRawSymbol
+ , intBS
+ ) where
+
+import GHC.Prelude
+
+import GHC.Data.FastString
+import GHC.Unit.Module
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BSC
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Lazy as BSL
+
+-- | Hexadecimal representation of an int
+--
+-- Used for uniques. We could use base-62 as GHC usually does but this is likely
+-- faster.
+intBS :: Int -> ByteString
+intBS = BSL.toStrict . BSB.toLazyByteString . BSB.wordHex . fromIntegral
+
+-- | Return z-encoded unit:module
+unitModuleStringZ :: Module -> ByteString
+unitModuleStringZ mod = mconcat
+ [ fastZStringToByteString (zEncodeFS (unitIdFS (moduleUnitId mod)))
+ , BSC.pack "ZC" -- z-encoding for ":"
+ , fastZStringToByteString (zEncodeFS (moduleNameFS (moduleName mod)))
+ ]
+
+-- | the global linkable unit of a module exports this symbol, depend on it to
+-- include that unit (used for cost centres)
+moduleGlobalSymbol :: Module -> FastString
+moduleGlobalSymbol m = mkFastStringByteString $ mconcat
+ [ hd
+ , unitModuleStringZ m
+ , BSC.pack "_<global>"
+ ]
+
+moduleExportsSymbol :: Module -> FastString
+moduleExportsSymbol m = mkFastStringByteString $ mconcat
+ [ hd
+ , unitModuleStringZ m
+ , BSC.pack "_<exports>"
+ ]
+
+-- | Make JS symbol corresponding to the given Haskell symbol in the given
+-- module
+mkJsSymbolBS :: Bool -> Module -> FastString -> ByteString
+mkJsSymbolBS exported mod s = mconcat
+ [ if exported then hd else hdd
+ , unitModuleStringZ mod
+ , BSC.pack "zi" -- z-encoding of "."
+ , fastZStringToByteString (zEncodeFS s)
+ ]
+
+-- | Make JS symbol corresponding to the given Haskell symbol in the given
+-- module
+mkJsSymbol :: Bool -> Module -> FastString -> FastString
+mkJsSymbol exported mod s = mkFastStringByteString (mkJsSymbolBS exported mod s)
+
+-- | Make JS symbol for given module and unique.
+mkFreshJsSymbol :: Module -> Int -> FastString
+mkFreshJsSymbol mod i = mkFastStringByteString $ mconcat
+ [ hdd
+ , unitModuleStringZ mod
+ , BSC.pack "_"
+ , intBS i
+ ]
+
+-- | Make symbol "h$XYZ" or "h$$XYZ"
+mkRawSymbol :: Bool -> FastString -> FastString
+mkRawSymbol exported fs
+ | exported = mkFastStringByteString $ mconcat [ hd, bytesFS fs ]
+ | otherwise = mkFastStringByteString $ mconcat [ hdd, bytesFS fs ]
+
+-- | "h$$" constant string
+hdd :: ByteString
+hdd = BSC.pack "h$$"
+
+-- | "h$" constant string
+hd :: ByteString
+hd = BSC.take 2 hdd
=====================================
compiler/GHC/StgToJS/UnitUtils.hs deleted
=====================================
@@ -1,36 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-
-module GHC.StgToJS.UnitUtils
- ( unitModuleString
- , moduleGlobalSymbol
- , moduleExportsSymbol
- ) where
-
-import GHC.Prelude
-
-import GHC.Data.FastString
-import GHC.Unit.Module
-import GHC.Utils.Encoding
-
-unitModuleString :: Module -> String
-unitModuleString mod = mconcat
- [ unitIdString (moduleUnitId mod)
- , ":"
- , moduleNameString (moduleName mod)
- ]
-
--- | the global linkable unit of a module exports this symbol, depend on it to
--- include that unit (used for cost centres)
-moduleGlobalSymbol :: Module -> FastString
-moduleGlobalSymbol m = mconcat -- TODO: Is there a better way to concatenate FastStrings?
- [ "h$"
- , mkFastString (zEncodeString $ unitModuleString m)
- , "_<global>"
- ]
-
-moduleExportsSymbol :: Module -> FastString
-moduleExportsSymbol m = mconcat -- TODO: Is there a better way to concatenate FastStrings?
- [ "h$"
- , mkFastString (zEncodeString $ unitModuleString m)
- , "_<exports>"
- ]
=====================================
compiler/GHC/Types/Unique.hs
=====================================
@@ -40,8 +40,6 @@ module GHC.Types.Unique (
nonDetCmpUnique,
isValidKnownKeyUnique,
- iToBase62,
-
-- ** Local uniques
-- | These are exposed exclusively for use by 'GHC.Types.Var.Env.uniqAway', which
-- has rather peculiar needs. See Note [Local uniques].
=====================================
compiler/ghc.cabal.in
=====================================
@@ -659,8 +659,8 @@ Library
GHC.StgToJS.Stack
GHC.StgToJS.StaticPtr
GHC.StgToJS.StgUtils
+ GHC.StgToJS.Symbols
GHC.StgToJS.Types
- GHC.StgToJS.UnitUtils
GHC.StgToJS.Utils
GHC.StgToJS.Linker.Linker
GHC.StgToJS.Linker.Types
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d049e360c124ab632408637e0d30e291b99aa20f...660ebb578b4fce1d9a0ceb6beee36d23c332590e
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d049e360c124ab632408637e0d30e291b99aa20f...660ebb578b4fce1d9a0ceb6beee36d23c332590e
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221017/20cccfd1/attachment-0001.html>
More information about the ghc-commits
mailing list