[Git][ghc/ghc][wip/js-staging] 3 commits: Refactor symbol generation.

Sylvain Henry (@hsyl20) gitlab at gitlab.haskell.org
Mon Oct 17 13:25:16 UTC 2022



Sylvain Henry pushed to branch wip/js-staging at Glasgow Haskell Compiler / GHC


Commits:
e5e5fa36 by Sylvain Henry at 2022-10-17T13:30:43+02:00
Refactor symbol generation.

Avoid concatenating Strings or FastStrings. Concatenate ByteString
instead.

- - - - -
fb2482e8 by Sylvain Henry at 2022-10-17T14:49:37+02:00
Enhance Label symbol generation

- - - - -
660ebb57 by Sylvain Henry at 2022-10-17T15:27:02+02:00
Rename fresh idents with their FastString unique

Reduce by 2x the size of the generated file (on Cabal's Setup).

- - - - -


10 changed files:

- compiler/GHC/StgToJS/CodeGen.hs
- compiler/GHC/StgToJS/Ids.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Literal.hs
- compiler/GHC/StgToJS/Printer.hs
- compiler/GHC/StgToJS/Profiling.hs
- + compiler/GHC/StgToJS/Symbols.hs
- − compiler/GHC/StgToJS/UnitUtils.hs
- compiler/GHC/Types/Unique.hs
- compiler/ghc.cabal.in


Changes:

=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -30,7 +30,7 @@ import GHC.StgToJS.Monad
 import GHC.StgToJS.Profiling
 import GHC.StgToJS.Regs
 import GHC.StgToJS.StaticPtr
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
 import GHC.StgToJS.Stack
 import GHC.StgToJS.Ids
 


=====================================
compiler/GHC/StgToJS/Ids.hs
=====================================
@@ -41,7 +41,7 @@ import GHC.Prelude
 import GHC.StgToJS.Types
 import GHC.StgToJS.Monad
 import GHC.StgToJS.CoreUtils
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
 
 import GHC.JS.Syntax
 import GHC.JS.Make
@@ -52,7 +52,6 @@ import GHC.Types.Unique
 import GHC.Types.Unique.FM
 import GHC.Types.Name
 import GHC.Unit.Module
-import GHC.Utils.Encoding (zEncodeString)
 import GHC.Data.FastString
 import GHC.Data.FastMutInt
 
@@ -61,6 +60,7 @@ import Control.Monad.IO.Class
 import qualified Control.Monad.Trans.State.Strict as State
 import qualified Data.Map  as M
 import Data.Maybe
+import qualified Data.ByteString.Char8 as BSC
 
 -- | Get fresh unique number
 freshUnique :: G Int
@@ -77,20 +77,10 @@ freshIdent :: G Ident
 freshIdent = do
   i <- freshUnique
   mod <- State.gets gsModule
-  let !name = mkFastString $ mconcat
-                [ "h$$"
-                , zEncodeString (unitModuleString mod)
-                , "_"
-                , encodeUnique i
-                ]
+  let !name = mkFreshJsSymbol mod i
   return (TxtI name)
 
 
-
--- | Encode a Unique number as a base-62 String
-encodeUnique :: Int -> String
-encodeUnique = reverse . iToBase62  -- reversed is more compressible
-
 -- | Generate unique Ident for the given ID (uncached!)
 --
 -- The ident has the following forms:
@@ -113,24 +103,35 @@ makeIdentForId i num id_type current_module = TxtI ident
   where
     exported = isExportedId i
     name     = getName i
-    !ident   = mkFastString $ mconcat
-      [ "h$"
-      , if exported then "" else "$"
-      , zEncodeString $ unitModuleString $ case exported of
-          True | Just m <- nameModule_maybe name -> m
-          _                                      -> current_module
-      , zEncodeString "."
-      , zString (zEncodeFS (occNameFS (nameOccName name)))
+    mod
+      | exported
+      , Just m <- nameModule_maybe name
+      = m
+      | otherwise
+      = current_module
+
+    !ident   = mkFastStringByteString $ mconcat
+      [ mkJsSymbolBS exported mod (occNameFS (nameOccName name))
+
+        -------------
+        -- suffixes
+
+        -- suffix for Ids represented with more than one JS var ("_0", "_1", etc.)
       , case num of
-          Nothing -> ""
-          Just v  -> "_" ++ show v
+          Nothing -> mempty
+          Just v  -> mconcat [BSC.pack "_", intBS v]
+
+        -- suffix for entry and constructor entry
       , case id_type of
-          IdPlain    -> ""
-          IdEntry    -> "_e"
-          IdConEntry -> "_con_e"
+          IdPlain    -> mempty
+          IdEntry    -> BSC.pack "_e"
+          IdConEntry -> BSC.pack "_con_e"
+
+        -- unique suffix for non-exported Ids
       , if exported
-          then ""
-          else "_" ++ encodeUnique (getKey (getUnique i))
+          then mempty
+          else let (c,u) = unpkUnique (getUnique i)
+               in mconcat [BSC.pack ['_',c,'_'], intBS u]
       ]
 
 -- | Retrieve the cached Ident for the given Id if there is one. Otherwise make


=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -45,7 +45,7 @@ import GHC.StgToJS.Linker.Utils
 import GHC.StgToJS.Rts.Rts
 import GHC.StgToJS.Object
 import GHC.StgToJS.Types hiding (LinkableUnit)
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
 import GHC.StgToJS.Printer
 import GHC.StgToJS.Arg
 import GHC.StgToJS.Closure
@@ -56,7 +56,6 @@ import GHC.Unit.Home
 import GHC.Unit.Types
 import GHC.Unit.Module (moduleStableString)
 
-import GHC.Utils.Encoding
 import GHC.Utils.Outputable hiding ((<>))
 import GHC.Utils.Panic
 import GHC.Utils.Error
@@ -701,16 +700,7 @@ mkExportedFuns :: UnitId -> FastString -> [FastString] -> [ExportedFun]
 mkExportedFuns uid mod_name symbols = map mk_fun symbols
   where
     mod        = mkModule (RealUnit (Definite uid)) (mkModuleNameFS mod_name)
-    mk_fun sym = ExportedFun mod (LexicalFastString (mkJsSymbol mod sym))
-
--- | Make JS symbol corresponding to the given Haskell symbol in the given
--- module
-mkJsSymbol :: Module -> FastString -> FastString
-mkJsSymbol mod s = mkFastString $ mconcat
-  [ "h$"
-  , zEncodeString (unitModuleString mod <> ".")
-  , zString (zEncodeFS s)
-  ]
+    mk_fun sym = ExportedFun mod (LexicalFastString (mkJsSymbol True mod sym))
 
 -- | read all dependency data from the to-be-linked files
 loadObjDeps :: [LinkedObj] -- ^ object files to link


=====================================
compiler/GHC/StgToJS/Literal.hs
=====================================
@@ -15,6 +15,7 @@ import GHC.JS.Make
 import GHC.StgToJS.Types
 import GHC.StgToJS.Monad
 import GHC.StgToJS.Ids
+import GHC.StgToJS.Symbols
 
 import GHC.Data.FastString
 import GHC.Types.Literal
@@ -59,10 +60,10 @@ genLit = \case
   LitDouble r              -> return [ toJExpr (r2d r) ]
   LitLabel name _size fod
     | fod == IsFunction      -> return [ ApplExpr (var "h$mkFunctionPtr")
-                                                  [var (mkFastString $ "h$" ++ unpackFS name)]
+                                                  [var (mkRawSymbol True name)]
                                        , ValExpr (JInt 0)
                                        ]
-    | otherwise              -> return [ toJExpr (TxtI . mkFastString $ "h$" ++ unpackFS name)
+    | otherwise              -> return [ toJExpr (TxtI (mkRawSymbol True name))
                                        , ValExpr (JInt 0)
                                        ]
   LitRubbish _rep -> return [ null_ ]
@@ -89,7 +90,7 @@ genStaticLit = \case
     LitNumBigNat  -> panic "genStaticLit: unexpected BigNat that should have been removed in CorePrep"
   LitFloat r               -> return [ DoubleLit . SaneDouble . r2f $ r ]
   LitDouble r              -> return [ DoubleLit . SaneDouble . r2d $ r ]
-  LitLabel name _size fod  -> return [ LabelLit (fod == IsFunction) (mkFastString $ "h$" ++ unpackFS name)
+  LitLabel name _size fod  -> return [ LabelLit (fod == IsFunction) (mkRawSymbol True name)
                                      , IntLit 0 ]
   l -> pprPanic "genStaticLit" (ppr l)
 


=====================================
compiler/GHC/StgToJS/Printer.hs
=====================================
@@ -1,4 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -24,7 +26,8 @@ module GHC.StgToJS.Printer
 where
 
 import GHC.Prelude
-
+import GHC.Int
+import GHC.Exts
 
 import GHC.JS.Syntax
 import GHC.JS.Ppr
@@ -34,15 +37,59 @@ import GHC.Data.FastString
 import GHC.Types.Unique.Map
 
 import Data.List (sortOn)
-import Data.Char (isAlpha,isDigit)
+import Data.Char (isAlpha,isDigit,ord)
+import qualified Data.ByteString.Short as SBS
 
 pretty :: JStat -> Doc
 pretty = jsToDocR ghcjsRenderJs
 
 ghcjsRenderJs :: RenderJs
-ghcjsRenderJs = defaultRenderJs { renderJsV = ghcjsRenderJsV
-                                , renderJsS = ghcjsRenderJsS
-                                }
+ghcjsRenderJs = defaultRenderJs
+  { renderJsV = ghcjsRenderJsV
+  , renderJsS = ghcjsRenderJsS
+  , renderJsI = ghcjsRenderJsI
+  }
+
+hdd :: SBS.ShortByteString
+hdd = SBS.pack (map (fromIntegral . ord) "h$$")
+
+ghcjsRenderJsI :: RenderJs -> Ident -> Doc
+ghcjsRenderJsI _ (TxtI fs)
+  -- Fresh symbols are prefixed with "h$$". They aren't explicitly referred by
+  -- name in user code, only in compiled code. Hence we can rename them if we do
+  -- it consistently in all the linked code.
+  --
+  -- These symbols are usually very large because their name includes the
+  -- unit-id, the module name, and some unique number. So we rename these
+  -- symbols with a much shorter globally unique number.
+  --
+  -- Here we reuse their FastString unique for this purpose! Note that it only
+  -- works if we pretty-print all the JS code linked together at once, which we
+  -- currently do. GHCJS used to maintain a CompactorState to support
+  -- incremental linking: it contained the mapping between original symbols and
+  -- their renaming.
+  | hdd `SBS.isPrefixOf` fastStringToShortByteString fs
+  , u <- uniqueOfFS fs
+  = text "h$$" <> hexDoc (fromIntegral u)
+  | otherwise
+  = ftext fs
+
+-- | Render as an hexadecimal number in reversed order (because it's faster and we
+-- don't care about the actual value).
+hexDoc :: Word -> Doc
+hexDoc 0 = char '0'
+hexDoc v = text $ go v
+  where
+    sym (I# i) = C# (indexCharOffAddr# chars i)
+    chars = "0123456789abcdef"#
+    go = \case
+      0 -> []
+      n -> sym (fromIntegral (n .&. 0x0F))
+           : sym (fromIntegral ((n .&. 0xF0) `shiftR` 4))
+           : go (n `shiftR` 8)
+
+
+
 
 -- attempt to resugar some of the common constructs
 ghcjsRenderJsS :: RenderJs -> JStat -> Doc


=====================================
compiler/GHC/StgToJS/Profiling.hs
=====================================
@@ -31,7 +31,7 @@ import GHC.JS.Make
 
 import GHC.StgToJS.Regs
 import GHC.StgToJS.Types
-import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Symbols
 import GHC.StgToJS.Monad
 
 import GHC.Types.CostCentre


=====================================
compiler/GHC/StgToJS/Symbols.hs
=====================================
@@ -0,0 +1,89 @@
+
+-- | JS symbol generation
+module GHC.StgToJS.Symbols
+  ( moduleGlobalSymbol
+  , moduleExportsSymbol
+  , mkJsSymbol
+  , mkJsSymbolBS
+  , mkFreshJsSymbol
+  , mkRawSymbol
+  , intBS
+  ) where
+
+import GHC.Prelude
+
+import GHC.Data.FastString
+import GHC.Unit.Module
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8   as BSC
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Lazy    as BSL
+
+-- | Hexadecimal representation of an int
+--
+-- Used for uniques. We could use base-62 as GHC usually does but this is likely
+-- faster.
+intBS :: Int -> ByteString
+intBS = BSL.toStrict . BSB.toLazyByteString . BSB.wordHex . fromIntegral
+
+-- | Return z-encoded unit:module
+unitModuleStringZ :: Module -> ByteString
+unitModuleStringZ mod = mconcat
+  [ fastZStringToByteString (zEncodeFS (unitIdFS (moduleUnitId mod)))
+  , BSC.pack "ZC" -- z-encoding for ":"
+  , fastZStringToByteString (zEncodeFS (moduleNameFS (moduleName mod)))
+  ]
+
+-- | the global linkable unit of a module exports this symbol, depend on it to
+--   include that unit (used for cost centres)
+moduleGlobalSymbol :: Module -> FastString
+moduleGlobalSymbol m = mkFastStringByteString $ mconcat
+  [ hd
+  , unitModuleStringZ m
+  , BSC.pack "_<global>"
+  ]
+
+moduleExportsSymbol :: Module -> FastString
+moduleExportsSymbol m = mkFastStringByteString $ mconcat
+  [ hd
+  , unitModuleStringZ m
+  , BSC.pack "_<exports>"
+  ]
+
+-- | Make JS symbol corresponding to the given Haskell symbol in the given
+-- module
+mkJsSymbolBS :: Bool -> Module -> FastString -> ByteString
+mkJsSymbolBS exported mod s = mconcat
+  [ if exported then hd else hdd
+  , unitModuleStringZ mod
+  , BSC.pack "zi" -- z-encoding of "."
+  , fastZStringToByteString (zEncodeFS s)
+  ]
+
+-- | Make JS symbol corresponding to the given Haskell symbol in the given
+-- module
+mkJsSymbol :: Bool -> Module -> FastString -> FastString
+mkJsSymbol exported mod s = mkFastStringByteString (mkJsSymbolBS exported mod s)
+
+-- | Make JS symbol for given module and unique.
+mkFreshJsSymbol :: Module -> Int -> FastString
+mkFreshJsSymbol mod i = mkFastStringByteString $ mconcat
+  [ hdd
+  , unitModuleStringZ mod
+  , BSC.pack "_"
+  , intBS i
+  ]
+
+-- | Make symbol "h$XYZ" or "h$$XYZ"
+mkRawSymbol :: Bool -> FastString -> FastString
+mkRawSymbol exported fs
+  | exported  = mkFastStringByteString $ mconcat [ hd,  bytesFS fs ]
+  | otherwise = mkFastStringByteString $ mconcat [ hdd, bytesFS fs ]
+
+-- | "h$$" constant string
+hdd :: ByteString
+hdd = BSC.pack "h$$"
+
+-- | "h$" constant string
+hd :: ByteString
+hd = BSC.take 2 hdd


=====================================
compiler/GHC/StgToJS/UnitUtils.hs deleted
=====================================
@@ -1,36 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-
-module GHC.StgToJS.UnitUtils
-  ( unitModuleString
-  , moduleGlobalSymbol
-  , moduleExportsSymbol
-  ) where
-
-import GHC.Prelude
-
-import GHC.Data.FastString
-import GHC.Unit.Module
-import GHC.Utils.Encoding
-
-unitModuleString :: Module -> String
-unitModuleString mod = mconcat
-  [ unitIdString (moduleUnitId mod)
-  , ":"
-  , moduleNameString (moduleName mod)
-  ]
-
--- | the global linkable unit of a module exports this symbol, depend on it to
---   include that unit (used for cost centres)
-moduleGlobalSymbol :: Module -> FastString
-moduleGlobalSymbol m = mconcat -- TODO: Is there a better way to concatenate FastStrings?
-  [ "h$"
-  , mkFastString (zEncodeString $ unitModuleString m)
-  , "_<global>"
-  ]
-
-moduleExportsSymbol :: Module -> FastString
-moduleExportsSymbol m = mconcat -- TODO: Is there a better way to concatenate FastStrings?
-  [ "h$"
-  , mkFastString (zEncodeString $ unitModuleString m)
-  , "_<exports>"
-  ]


=====================================
compiler/GHC/Types/Unique.hs
=====================================
@@ -40,8 +40,6 @@ module GHC.Types.Unique (
         nonDetCmpUnique,
         isValidKnownKeyUnique,
 
-        iToBase62,
-
         -- ** Local uniques
         -- | These are exposed exclusively for use by 'GHC.Types.Var.Env.uniqAway', which
         -- has rather peculiar needs. See Note [Local uniques].


=====================================
compiler/ghc.cabal.in
=====================================
@@ -659,8 +659,8 @@ Library
         GHC.StgToJS.Stack
         GHC.StgToJS.StaticPtr
         GHC.StgToJS.StgUtils
+        GHC.StgToJS.Symbols
         GHC.StgToJS.Types
-        GHC.StgToJS.UnitUtils
         GHC.StgToJS.Utils
         GHC.StgToJS.Linker.Linker
         GHC.StgToJS.Linker.Types



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d049e360c124ab632408637e0d30e291b99aa20f...660ebb578b4fce1d9a0ceb6beee36d23c332590e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d049e360c124ab632408637e0d30e291b99aa20f...660ebb578b4fce1d9a0ceb6beee36d23c332590e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221017/20cccfd1/attachment-0001.html>


More information about the ghc-commits mailing list