[Git][ghc/ghc][wip/T23479] JS: Re-add optimization for literal strings in genApp (fixes #23479)
Serge S. Gulin (@gulin.serge)
gitlab at gitlab.haskell.org
Tue Oct 15 06:36:22 UTC 2024
Serge S. Gulin pushed to branch wip/T23479 at Glasgow Haskell Compiler / GHC
Commits:
0cc2c9e9 by Serge S. Gulin at 2024-10-15T09:35:51+03:00
JS: Re-add optimization for literal strings in genApp (fixes #23479)
Based on https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588/
Co-authored-by: Sylvain Henry <sylvain at haskus.fr>
Co-authored-by: Andrei Borzenkov <root at sandwitch.dev>
Co-authored-by: Danil Berestov <goosedb at yandex.ru>
-------------------------
Metric Decrease:
size_hello_artifact
size_hello_unicode
-------------------------
- - - - -
24 changed files:
- compiler/GHC/Builtin/Names.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/StgToJS/Apply.hs
- compiler/GHC/StgToJS/CodeGen.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Literal.hs
- compiler/GHC/StgToJS/Monad.hs
- + compiler/GHC/StgToJS/Sinker/Collect.hs
- compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/Sinker.hs
- + compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
- compiler/GHC/StgToJS/Symbols.hs
- compiler/GHC/StgToJS/Types.hs
- compiler/GHC/Types/ForeignCall.hs
- compiler/ghc.cabal.in
- docs/users_guide/debugging.rst
- testsuite/tests/javascript/Makefile
- + testsuite/tests/javascript/T23479_1.hs
- + testsuite/tests/javascript/T23479_1.stdout
- + testsuite/tests/javascript/T23479_2.hs
- + testsuite/tests/javascript/T23479_2.stdout
- testsuite/tests/javascript/T24495.hs
- testsuite/tests/javascript/T24495.stdout
- testsuite/tests/javascript/all.T
Changes:
=====================================
compiler/GHC/Builtin/Names.hs
=====================================
@@ -522,6 +522,8 @@ basicKnownKeyNames
, unsafeEqualityTyConName
, unsafeReflDataConName
, unsafeCoercePrimName
+
+ , unsafeUnpackJSStringUtf8ShShName
]
genericTyConNames :: [Name]
@@ -590,7 +592,8 @@ gHC_INTERNAL_BASE, gHC_INTERNAL_ENUM,
gHC_INTERNAL_ARROW, gHC_INTERNAL_DESUGAR, gHC_INTERNAL_RANDOM, gHC_INTERNAL_EXTS, gHC_INTERNAL_IS_LIST,
gHC_INTERNAL_CONTROL_EXCEPTION_BASE, gHC_INTERNAL_TYPEERROR, gHC_INTERNAL_TYPELITS, gHC_INTERNAL_TYPELITS_INTERNAL,
gHC_INTERNAL_TYPENATS, gHC_INTERNAL_TYPENATS_INTERNAL,
- gHC_INTERNAL_DATA_COERCE, gHC_INTERNAL_DEBUG_TRACE, gHC_INTERNAL_UNSAFE_COERCE, gHC_INTERNAL_FOREIGN_C_CONSTPTR :: Module
+ gHC_INTERNAL_DATA_COERCE, gHC_INTERNAL_DEBUG_TRACE, gHC_INTERNAL_UNSAFE_COERCE, gHC_INTERNAL_FOREIGN_C_CONSTPTR,
+ gHC_INTERNAL_JS_PRIM, gHC_INTERNAL_WASM_PRIM_TYPES :: Module
gHC_INTERNAL_BASE = mkGhcInternalModule (fsLit "GHC.Internal.Base")
gHC_INTERNAL_ENUM = mkGhcInternalModule (fsLit "GHC.Internal.Enum")
gHC_INTERNAL_GHCI = mkGhcInternalModule (fsLit "GHC.Internal.GHCi")
@@ -633,7 +636,7 @@ gHC_INTERNAL_RANDOM = mkGhcInternalModule (fsLit "GHC.Internal.S
gHC_INTERNAL_EXTS = mkGhcInternalModule (fsLit "GHC.Internal.Exts")
gHC_INTERNAL_IS_LIST = mkGhcInternalModule (fsLit "GHC.Internal.IsList")
gHC_INTERNAL_CONTROL_EXCEPTION_BASE = mkGhcInternalModule (fsLit "GHC.Internal.Control.Exception.Base")
-gHC_INTERNAL_EXCEPTION_CONTEXT = mkGhcInternalModule (fsLit "GHC.Internal.Exception.Context")
+gHC_INTERNAL_EXCEPTION_CONTEXT = mkGhcInternalModule (fsLit "GHC.Internal.Exception.Context")
gHC_INTERNAL_GENERICS = mkGhcInternalModule (fsLit "GHC.Internal.Generics")
gHC_INTERNAL_TYPEERROR = mkGhcInternalModule (fsLit "GHC.Internal.TypeError")
gHC_INTERNAL_TYPELITS = mkGhcInternalModule (fsLit "GHC.Internal.TypeLits")
@@ -644,6 +647,8 @@ gHC_INTERNAL_DATA_COERCE = mkGhcInternalModule (fsLit "GHC.Internal.D
gHC_INTERNAL_DEBUG_TRACE = mkGhcInternalModule (fsLit "GHC.Internal.Debug.Trace")
gHC_INTERNAL_UNSAFE_COERCE = mkGhcInternalModule (fsLit "GHC.Internal.Unsafe.Coerce")
gHC_INTERNAL_FOREIGN_C_CONSTPTR = mkGhcInternalModule (fsLit "GHC.Internal.Foreign.C.ConstPtr")
+gHC_INTERNAL_JS_PRIM = mkGhcInternalModule (fsLit "GHC.Internal.JS.Prim")
+gHC_INTERNAL_WASM_PRIM_TYPES = mkGhcInternalModule (fsLit "GHC.Internal.Wasm.Prim.Types")
gHC_INTERNAL_SRCLOC :: Module
gHC_INTERNAL_SRCLOC = mkGhcInternalModule (fsLit "GHC.Internal.SrcLoc")
@@ -1676,7 +1681,10 @@ constPtrConName =
tcQual gHC_INTERNAL_FOREIGN_C_CONSTPTR (fsLit "ConstPtr") constPtrTyConKey
jsvalTyConName :: Name
-jsvalTyConName = tcQual (mkGhcInternalModule (fsLit "GHC.Internal.Wasm.Prim.Types")) (fsLit "JSVal") jsvalTyConKey
+jsvalTyConName = tcQual gHC_INTERNAL_WASM_PRIM_TYPES (fsLit "JSVal") jsvalTyConKey
+
+unsafeUnpackJSStringUtf8ShShName :: Name
+unsafeUnpackJSStringUtf8ShShName = varQual gHC_INTERNAL_JS_PRIM (fsLit "unsafeUnpackJSStringUtf8##") unsafeUnpackJSStringUtf8ShShKey
{-
************************************************************************
@@ -2082,6 +2090,7 @@ typeSymbolKindConNameKey, typeCharKindConNameKey,
, typeNatLogTyFamNameKey
, typeConsSymbolTyFamNameKey, typeUnconsSymbolTyFamNameKey
, typeCharToNatTyFamNameKey, typeNatToCharTyFamNameKey
+ , exceptionContextTyConKey, unsafeUnpackJSStringUtf8ShShKey
:: Unique
typeSymbolKindConNameKey = mkPreludeTyConUnique 400
typeCharKindConNameKey = mkPreludeTyConUnique 401
@@ -2104,9 +2113,10 @@ constPtrTyConKey = mkPreludeTyConUnique 417
jsvalTyConKey = mkPreludeTyConUnique 418
-exceptionContextTyConKey :: Unique
exceptionContextTyConKey = mkPreludeTyConUnique 420
+unsafeUnpackJSStringUtf8ShShKey = mkPreludeMiscIdUnique 805
+
{-
************************************************************************
* *
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -470,6 +470,7 @@ data DumpFlag
| Opt_D_dump_stg_cg -- ^ STG (after stg2stg)
| Opt_D_dump_stg_tags -- ^ Result of tag inference analysis.
| Opt_D_dump_stg_final -- ^ Final STG (before cmm gen)
+ | Opt_D_dump_stg_from_js_sinker -- ^ STG after JS sinker
| Opt_D_dump_call_arity
| Opt_D_dump_exitify
| Opt_D_dump_dmdanal
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -1517,6 +1517,8 @@ dynamic_flags_deps = [
"Use `-ddump-stg-from-core` or `-ddump-stg-final` instead"
, make_ord_flag defGhcFlag "ddump-stg-tags"
(setDumpFlag Opt_D_dump_stg_tags)
+ , make_ord_flag defGhcFlag "ddump-stg-from-js-sinker"
+ (setDumpFlag Opt_D_dump_stg_from_js_sinker)
, make_ord_flag defGhcFlag "ddump-call-arity"
(setDumpFlag Opt_D_dump_call_arity)
, make_ord_flag defGhcFlag "ddump-exitify"
=====================================
compiler/GHC/StgToJS/Apply.hs
=====================================
@@ -1,6 +1,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE ViewPatterns #-}
-----------------------------------------------------------------------------
-- |
@@ -46,11 +47,13 @@ import GHC.StgToJS.Stack
import GHC.StgToJS.Symbols
import GHC.StgToJS.Types
import GHC.StgToJS.Utils
+import GHC.StgToJS.Linker.Utils (decodeModifiedUTF8)
import GHC.Types.Id
import GHC.Types.Id.Info
import GHC.Types.CostCentre
import GHC.Types.RepType (mightBeFunTy)
+import GHC.Types.Literal
import GHC.Stg.Syntax
@@ -86,7 +89,6 @@ rtsApply cfg = jBlock
, moveRegs2
]
-
-- | Generate an application of some args to an Id.
--
-- The case where args is null is common as it's used to generate the evaluation
@@ -98,6 +100,51 @@ genApp
-> [StgArg]
-> G (JStgStat, ExprResult)
genApp ctx i args
+ -- Test case T23479_2
+ -- See: https://github.com/ghcjs/ghcjs/blob/b7711fbca7c3f43a61f1dba526e6f2a2656ef44c/src/Gen2/Generator.hs#L876
+ -- Comment by Luite Stegeman <luite.stegeman at iohk.io>
+ -- Special cases for JSString literals.
+ -- We could handle unpackNBytes# here, but that's probably not common
+ -- enough to warrant a special case.
+ -- See: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588/#note_503978
+ -- Comment by Jeffrey Young <jeffrey.young at iohk.io>
+ -- We detect if the Id is unsafeUnpackJSStringUtf8## applied to a string literal,
+ -- if so then we convert the unsafeUnpack to a call to h$decode.
+ | [StgVarArg v] <- args
+ , idName i == unsafeUnpackJSStringUtf8ShShName
+ -- See: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588
+ -- Comment by Josh Meredith <josh.meredith at iohk.io>
+ -- `typex_expr` can throw an error for certain bindings so it's important
+ -- that this condition comes after matching on the function name
+ , [top] <- concatMap typex_expr (ctxTarget ctx)
+ = (,ExprInline) . (|=) top . app hdDecodeUtf8Z <$> varsForId v
+
+ -- Test case T23479_1
+ | [StgLitArg (LitString bs)] <- args
+ , Just d <- decodeModifiedUTF8 bs
+ , idName i == unsafeUnpackJSStringUtf8ShShName
+ , [top] <- concatMap typex_expr (ctxTarget ctx)
+ = return . (,ExprInline) $ top |= toJExpr d
+
+ -- Test case T24495 with single occurrence at -02 and third occurrence at -01
+ -- Moved back from removal at https://gitlab.haskell.org/ghc/ghc/-/merge_requests/12308
+ -- See commit hash b36ee57bfbecc628b7f0919e1e59b7066495034f
+ --
+ -- Case: unpackCStringAppend# "some string"# str
+ --
+ -- Generates h$appendToHsStringA(str, "some string"), which has a faster
+ -- decoding loop.
+ | [StgLitArg (LitString bs), x] <- args
+ , Just d <- decodeModifiedUTF8 bs
+ , getUnique i == unpackCStringAppendIdKey
+ , [top] <- concatMap typex_expr (ctxTarget ctx)
+ = do
+ prof <- csProf <$> getSettings
+ let profArg = if prof then [jCafCCS] else []
+ a <- genArg x
+ return ( top |= app "h$appendToHsStringA" (toJExpr d : a ++ profArg)
+ , ExprInline
+ )
-- let-no-escape
| Just n <- ctxLneBindingStackSize ctx i
=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -11,7 +11,7 @@ where
import GHC.Prelude
-import GHC.Driver.Flags (DumpFlag (Opt_D_dump_js))
+import GHC.Driver.Flags (DumpFlag (Opt_D_dump_js, Opt_D_dump_stg_from_js_sinker))
import GHC.JS.Ppr
import GHC.JS.JStg.Syntax
@@ -21,7 +21,7 @@ import GHC.JS.Transform
import GHC.JS.Optimizer
import GHC.StgToJS.Arg
-import GHC.StgToJS.Sinker
+import GHC.StgToJS.Sinker.Sinker
import GHC.StgToJS.Types
import qualified GHC.StgToJS.Object as Object
import GHC.StgToJS.Utils
@@ -81,7 +81,8 @@ stgToJS logger config stg_binds0 this_mod spt_entries foreign_stubs cccs output_
-- TODO: avoid top level lifting in core-2-core when the JS backend is
-- enabled instead of undoing it here
- -- TODO: add dump pass for optimized STG ast for JS
+ putDumpFileMaybe logger Opt_D_dump_stg_from_js_sinker "STG Optimized JS Sinker:" FormatSTG
+ (pprGenStgTopBindings (StgPprOpts False) stg_binds)
(deps,lus) <- runG config this_mod unfloated_binds $ do
ifProfilingM $ initCostCentres cccs
=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -548,6 +548,16 @@ data ModuleCode = ModuleCode
, mc_frefs :: ![ForeignJSRef]
}
+instance Outputable ModuleCode where
+ ppr m = hang (text "ModuleCode") 2 $ vcat
+ [ hcat [text "Module: ", ppr (mc_module m)]
+ , hcat [text "JS Code:", pretty True (mc_js_code m)]
+ , hcat [text "JS Exports:", pprHsBytes (mc_exports m)]
+ , hang (text "JS Closures::") 2 (vcat (fmap (text . show) (mc_closures m)))
+ , hang (text "JS Statics::") 2 (vcat (fmap (text . show) (mc_statics m)))
+ , hang (text "JS ForeignRefs::") 2 (vcat (fmap (text . show) (mc_frefs m)))
+ ]
+
-- | ModuleCode after link with other modules.
--
-- It contains less information than ModuleCode because they have been commoned
=====================================
compiler/GHC/StgToJS/Literal.hs
=====================================
@@ -18,8 +18,8 @@ import GHC.StgToJS.Ids
import GHC.StgToJS.Monad
import GHC.StgToJS.Symbols
import GHC.StgToJS.Types
+import GHC.StgToJS.Linker.Utils (decodeModifiedUTF8)
-import GHC.Data.FastString
import GHC.Types.Literal
import GHC.Types.Basic
import GHC.Types.RepType
@@ -95,9 +95,10 @@ genLit = \case
genStaticLit :: Literal -> G [StaticLit]
genStaticLit = \case
LitChar c -> return [ IntLit (fromIntegral $ ord c) ]
- LitString str
- | True -> return [ StringLit (mkFastStringByteString str), IntLit 0]
- -- \| invalid UTF8 -> return [ BinLit str, IntLit 0]
+ LitString str -> case decodeModifiedUTF8 str of
+ Just t -> return [ StringLit t, IntLit 0]
+ -- invalid UTF8
+ Nothing -> return [ BinLit str, IntLit 0]
LitNullAddr -> return [ NullLit, IntLit 0 ]
LitNumber nt v -> case nt of
LitNumInt -> return [ IntLit v ]
=====================================
compiler/GHC/StgToJS/Monad.hs
=====================================
@@ -31,6 +31,7 @@ import GHC.JS.Transform
import GHC.StgToJS.Types
import GHC.Unit.Module
+import GHC.Utils.Outputable
import GHC.Stg.Syntax
import GHC.Types.SrcLoc
@@ -159,6 +160,13 @@ data GlobalOcc = GlobalOcc
, global_count :: !Word
}
+instance Outputable GlobalOcc where
+ ppr g = hang (text "GlobalOcc") 2 $ vcat
+ [ hcat [text "Ident: ", ppr (global_ident g)]
+ , hcat [text "Id:", ppr (global_id g)]
+ , hcat [text "Count:", ppr (global_count g)]
+ ]
+
-- | Return number of occurrences of every global id used in the given JStgStat.
-- Sort by increasing occurrence count.
globalOccs :: JStgStat -> G [GlobalOcc]
=====================================
compiler/GHC/StgToJS/Sinker/Collect.hs
=====================================
@@ -0,0 +1,72 @@
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Collect
+ ( collectArgsTop
+ , collectArgs
+ , selectUsedOnce
+ )
+ where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Unique
+
+-- | fold over all id in StgArg used at the top level in an StgRhsCon
+collectArgsTop :: CgStgBinding -> [Id]
+collectArgsTop = \case
+ StgNonRec _b r -> collectArgsTopRhs r
+ StgRec bs -> concatMap (collectArgsTopRhs . snd) bs
+ where
+ collectArgsTopRhs :: CgStgRhs -> [Id]
+ collectArgsTopRhs = \case
+ StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
+ StgRhsClosure {} -> []
+
+-- | fold over all Id in StgArg in the AST
+collectArgs :: CgStgBinding -> [Id]
+collectArgs = \case
+ StgNonRec _b r -> collectArgsR r
+ StgRec bs -> concatMap (collectArgsR . snd) bs
+ where
+ collectArgsR :: CgStgRhs -> [Id]
+ collectArgsR = \case
+ StgRhsClosure _x0 _x1 _x2 _x3 e _typ -> collectArgsE e
+ StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
+
+ collectArgsAlt :: CgStgAlt -> [Id]
+ collectArgsAlt alt = collectArgsE (alt_rhs alt)
+
+ collectArgsE :: CgStgExpr -> [Id]
+ collectArgsE = \case
+ StgApp x args
+ -> x : concatMap collectArgsA args
+ StgConApp _con _mn args _ts
+ -> concatMap collectArgsA args
+ StgOpApp _x args _t
+ -> concatMap collectArgsA args
+ StgCase e _b _a alts
+ -> collectArgsE e ++ concatMap collectArgsAlt alts
+ StgLet _x b e
+ -> collectArgs b ++ collectArgsE e
+ StgLetNoEscape _x b e
+ -> collectArgs b ++ collectArgsE e
+ StgTick _i e
+ -> collectArgsE e
+ StgLit _
+ -> []
+
+collectArgsA :: StgArg -> [Id]
+collectArgsA = \case
+ StgVarArg i -> [i]
+ StgLitArg _ -> []
+
+selectUsedOnce :: (Foldable t, Uniquable a) => t a -> UniqSet a
+selectUsedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
+ where
+ g i t@(once, mult)
+ | i `elementOfUniqSet` mult = t
+ | i `elementOfUniqSet` once
+ = (delOneFromUniqSet once i, addOneToUniqSet mult i)
+ | otherwise = (addOneToUniqSet once i, mult)
=====================================
compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/Sinker.hs
=====================================
@@ -2,7 +2,7 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE LambdaCase #-}
-module GHC.StgToJS.Sinker (sinkPgm) where
+module GHC.StgToJS.Sinker.Sinker (sinkPgm) where
import GHC.Prelude
import GHC.Types.Unique.Set
@@ -14,6 +14,8 @@ import GHC.Types.Name
import GHC.Unit.Module
import GHC.Types.Literal
import GHC.Data.Graph.Directed
+import GHC.StgToJS.Sinker.Collect
+import GHC.StgToJS.Sinker.StringsUnfloat
import GHC.Utils.Misc (partitionWith)
import GHC.StgToJS.Utils
@@ -21,7 +23,7 @@ import GHC.StgToJS.Utils
import Data.Char
import Data.List (partition)
import Data.Maybe
-
+import Data.ByteString (ByteString)
-- | Unfloat some top-level unexported things
--
@@ -34,27 +36,43 @@ import Data.Maybe
sinkPgm :: Module
-> [CgStgTopBinding]
-> (UniqFM Id CgStgExpr, [CgStgTopBinding])
-sinkPgm m pgm = (sunk, map StgTopLifted pgm'' ++ stringLits)
+sinkPgm m pgm
+ = (sunk, map StgTopLifted pgm''' ++ stringLits)
where
- selectLifted (StgTopLifted b) = Left b
- selectLifted x = Right x
- (pgm', stringLits) = partitionWith selectLifted pgm
- (sunk, pgm'') = sinkPgm' m pgm'
+ selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
+ selectLifted (StgTopLifted b) = Left b
+ selectLifted (StgTopStringLit i b) = Right (i, b)
+
+ (pgm', allStringLits) = partitionWith selectLifted pgm
+ usedOnceIds = selectUsedOnce $ concatMap collectArgs pgm'
+
+ stringLitsUFM = listToUFM $ (\(i, b) -> (idName i, (i, b))) <$> allStringLits
+ (pgm'', _actuallyUnfloatedStringLitNames) =
+ unfloatStringLits
+ (idName `mapUniqSet` usedOnceIds)
+ (snd `mapUFM` stringLitsUFM)
+ pgm'
+
+ stringLits = uncurry StgTopStringLit <$> allStringLits
+
+ (sunk, pgm''') = sinkPgm' m usedOnceIds pgm''
sinkPgm'
:: Module
-- ^ the module, since we treat definitions from the current module
-- differently
+ -> IdSet
+ -- ^ the set of used once ids
-> [CgStgBinding]
-- ^ the bindings
-> (UniqFM Id CgStgExpr, [CgStgBinding])
-- ^ a map with sunken replacements for nodes, for where the replacement
-- does not fit in the 'StgBinding' AST and the new bindings
-sinkPgm' m pgm =
- let usedOnce = collectUsedOnce pgm
+sinkPgm' m usedOnceIds pgm =
+ let usedOnce = collectTopLevelUsedOnce usedOnceIds pgm
sinkables = listToUFM $
concatMap alwaysSinkable pgm ++
- filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
+ concatMap (filter ((`elementOfUniqSet` usedOnce) . fst) . onceSinkable m) pgm
isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
isSunkBind _ = False
in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
@@ -95,66 +113,10 @@ onceSinkable _ _ = []
-- | collect all idents used only once in an argument at the top level
-- and never anywhere else
-collectUsedOnce :: [CgStgBinding] -> IdSet
-collectUsedOnce binds = intersectUniqSets (usedOnce args) (usedOnce top_args)
+collectTopLevelUsedOnce :: IdSet -> [CgStgBinding] -> IdSet
+collectTopLevelUsedOnce usedOnceIds binds = intersectUniqSets usedOnceIds (selectUsedOnce top_args)
where
top_args = concatMap collectArgsTop binds
- args = concatMap collectArgs binds
- usedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
- g i t@(once, mult)
- | i `elementOfUniqSet` mult = t
- | i `elementOfUniqSet` once
- = (delOneFromUniqSet once i, addOneToUniqSet mult i)
- | otherwise = (addOneToUniqSet once i, mult)
-
--- | fold over all id in StgArg used at the top level in an StgRhsCon
-collectArgsTop :: CgStgBinding -> [Id]
-collectArgsTop = \case
- StgNonRec _b r -> collectArgsTopRhs r
- StgRec bs -> concatMap (collectArgsTopRhs . snd) bs
-
-collectArgsTopRhs :: CgStgRhs -> [Id]
-collectArgsTopRhs = \case
- StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
- StgRhsClosure {} -> []
-
--- | fold over all Id in StgArg in the AST
-collectArgs :: CgStgBinding -> [Id]
-collectArgs = \case
- StgNonRec _b r -> collectArgsR r
- StgRec bs -> concatMap (collectArgsR . snd) bs
-
-collectArgsR :: CgStgRhs -> [Id]
-collectArgsR = \case
- StgRhsClosure _x0 _x1 _x2 _x3 e _typ -> collectArgsE e
- StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
-
-collectArgsAlt :: CgStgAlt -> [Id]
-collectArgsAlt alt = collectArgsE (alt_rhs alt)
-
-collectArgsE :: CgStgExpr -> [Id]
-collectArgsE = \case
- StgApp x args
- -> x : concatMap collectArgsA args
- StgConApp _con _mn args _ts
- -> concatMap collectArgsA args
- StgOpApp _x args _t
- -> concatMap collectArgsA args
- StgCase e _b _a alts
- -> collectArgsE e ++ concatMap collectArgsAlt alts
- StgLet _x b e
- -> collectArgs b ++ collectArgsE e
- StgLetNoEscape _x b e
- -> collectArgs b ++ collectArgsE e
- StgTick _i e
- -> collectArgsE e
- StgLit _
- -> []
-
-collectArgsA :: StgArg -> [Id]
-collectArgsA = \case
- StgVarArg i -> [i]
- StgLitArg _ -> []
isLocal :: Id -> Bool
isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)
=====================================
compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
=====================================
@@ -0,0 +1,156 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+module GHC.StgToJS.Sinker.StringsUnfloat
+ ( unfloatStringLits
+ )
+ where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Types.Unique.FM
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.Literal
+import GHC.Utils.Misc (partitionWith)
+
+import Data.ByteString qualified as BS
+import Data.ByteString (ByteString)
+import Data.Bifunctor (Bifunctor (..))
+
+-- | We suppose that every string shorter than 80 symbols is safe for sink.
+-- Sinker is working on per module. It means that ALL locally defined strings
+-- in a module shorter 80 symbols will be unfloated back.
+pattern STRING_LIT_MAX_LENGTH :: Int
+pattern STRING_LIT_MAX_LENGTH = 80
+
+unfloatStringLits
+ :: UniqSet Name
+ -> UniqFM Name ByteString
+ -> [CgStgBinding]
+ -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits usedOnceStringLits stringLits =
+ unfloatStringLits' (selectStringLitsForUnfloat usedOnceStringLits stringLits)
+
+-- | We are doing attempts to unfloat string literals back to
+-- the call site. Further special JS optimizations
+-- can generate more performant operations over them.
+unfloatStringLits' :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits' stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
+ where
+ (binderWithoutChanges, binderWithUnfloatedStringLitPairs) = partitionWith substituteStringLit allBindings
+
+ binderWithUnfloatedStringLit = fst <$> binderWithUnfloatedStringLitPairs
+ actuallyUsedStringLitNames = unionManyUniqSets (snd <$> binderWithUnfloatedStringLitPairs)
+
+ substituteStringLit :: CgStgBinding -> Either CgStgBinding (CgStgBinding, UniqSet Name)
+ substituteStringLit x@(StgRec bnds)
+ | isEmptyUniqSet names = Left x
+ | otherwise = Right (StgRec bnds', names)
+ where
+ (bnds', names) = extractNames id $ do
+ (i, rhs) <- bnds
+ pure $ case processStgRhs rhs of
+ Nothing -> Left (i, rhs)
+ Just (rhs', names) -> Right ((i, rhs'), names)
+ substituteStringLit x@(StgNonRec binder rhs)
+ = maybe (Left x)
+ (\(body', names) -> Right (StgNonRec binder body', names))
+ (processStgRhs rhs)
+
+ processStgRhs :: CgStgRhs -> Maybe (CgStgRhs, UniqSet Name)
+ processStgRhs (StgRhsCon ccs dataCon mu ticks args typ)
+ | isEmptyUniqSet names = Nothing
+ | otherwise = Just (StgRhsCon ccs dataCon mu ticks unified typ, names)
+ where
+ (unified, names) = substituteArgWithNames args
+ processStgRhs (StgRhsClosure fvs ccs upd bndrs body typ)
+ = (\(body', names) -> (StgRhsClosure fvs ccs upd bndrs body' typ, names)) <$>
+ processStgExpr body
+
+ -- Recursive expressions
+ processStgExpr :: CgStgExpr -> Maybe (CgStgExpr, UniqSet Name)
+ processStgExpr (StgLit _) = Nothing
+ processStgExpr (StgTick _ _) = Nothing
+ processStgExpr (StgLet n b e) =
+ case (substituteStringLit b, processStgExpr e) of
+ (Left _, Nothing) -> Nothing
+ (Right (b', names), Nothing) -> Just (StgLet n b' e, names)
+ (Left _, Just (e', names)) -> Just (StgLet n b e', names)
+ (Right (b', names), Just (e', names')) -> Just (StgLet n b' e', names `unionUniqSets` names')
+ processStgExpr (StgLetNoEscape n b e) =
+ case (substituteStringLit b, processStgExpr e) of
+ (Left _, Nothing) -> Nothing
+ (Right (b', names), Nothing) -> Just (StgLetNoEscape n b' e, names)
+ (Left _, Just (e', names)) -> Just (StgLetNoEscape n b e', names)
+ (Right (b', names), Just (e', names')) -> Just (StgLetNoEscape n b' e', names `unionUniqSets` names')
+ -- We should keep the order: See Note [Case expression invariants]
+ processStgExpr (StgCase e bndr alt_type alts) =
+ case (isEmptyUniqSet names, processStgExpr e) of
+ (True, Nothing) -> Nothing
+ (True, Just (e', names')) -> Just (StgCase e' bndr alt_type alts, names')
+ (False, Nothing) -> Just (StgCase e bndr alt_type unified, names)
+ (False, Just (e', names')) -> Just (StgCase e' bndr alt_type unified, names `unionUniqSets` names')
+ where
+ (unified, names) = extractNames splitAlts alts
+
+ splitAlts :: CgStgAlt -> Either CgStgAlt (CgStgAlt, UniqSet Name)
+ splitAlts alt@(GenStgAlt con bndrs rhs) =
+ case processStgExpr rhs of
+ Nothing -> Left alt
+ Just (alt', names) -> Right (GenStgAlt con bndrs alt', names)
+
+ -- No args
+ processStgExpr (StgApp _ []) = Nothing
+ processStgExpr (StgConApp _ _ [] _) = Nothing
+ processStgExpr (StgOpApp _ [] _) = Nothing
+
+ -- Main targets. Preserving the order of args is important
+ processStgExpr (StgApp fn args@(_:_))
+ | isEmptyUniqSet names = Nothing
+ | otherwise = Just (StgApp fn unified, names)
+ where
+ (unified, names) = substituteArgWithNames args
+ processStgExpr (StgConApp dc n args@(_:_) tys)
+ | isEmptyUniqSet names = Nothing
+ | otherwise = Just (StgConApp dc n unified tys, names)
+ where
+ (unified, names) = substituteArgWithNames args
+ processStgExpr (StgOpApp op args@(_:_) tys)
+ | isEmptyUniqSet names = Nothing
+ | otherwise = Just (StgOpApp op unified tys, names)
+ where
+ (unified, names) = substituteArgWithNames args
+
+ substituteArg :: StgArg -> Either StgArg (StgArg, Name)
+ substituteArg a@(StgLitArg _) = Left a
+ substituteArg a@(StgVarArg i) =
+ let name = idName i
+ in case lookupUFM stringLits name of
+ Nothing -> Left a
+ Just b -> Right (StgLitArg $ LitString b, name)
+
+ substituteArgWithNames = extractNames (second (second unitUniqSet) . substituteArg)
+
+ extractNames :: (a -> Either x (x, UniqSet Name)) -> [a] -> ([x], UniqSet Name)
+ extractNames splitter target =
+ let
+ splitted = splitter <$> target
+ combined = either (, emptyUniqSet) id <$> splitted
+ unified = fst <$> combined
+ names = unionManyUniqSets (snd <$> combined)
+ in (unified, names)
+
+selectStringLitsForUnfloat :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+selectStringLitsForUnfloat usedOnceStringLits stringLits = alwaysUnfloat `plusUFM` usedOnceUnfloat
+ where
+ alwaysUnfloat = alwaysUnfloatStringLits stringLits
+ usedOnceUnfloat = selectUsedOnceStringLits usedOnceStringLits stringLits
+
+ alwaysUnfloatStringLits :: UniqFM Name ByteString -> UniqFM Name ByteString
+ alwaysUnfloatStringLits = filterUFM $ \b -> BS.length b < STRING_LIT_MAX_LENGTH
+
+ selectUsedOnceStringLits :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+ selectUsedOnceStringLits usedOnceStringLits stringLits =
+ stringLits `intersectUFM` getUniqSet usedOnceStringLits
=====================================
compiler/GHC/StgToJS/Symbols.hs
=====================================
@@ -1215,3 +1215,7 @@ hdStiStr = fsLit "h$sti"
hdStrStr :: FastString
hdStrStr = fsLit "h$str"
+------------------------------ Pack/Unpack --------------------------------------------
+
+hdDecodeUtf8Z :: FastString
+hdDecodeUtf8Z = fsLit "h$decodeUtf8z"
=====================================
compiler/GHC/StgToJS/Types.hs
=====================================
@@ -284,8 +284,8 @@ instance ToJExpr StaticLit where
toJExpr (IntLit i) = toJExpr i
toJExpr NullLit = null_
toJExpr (DoubleLit d) = toJExpr (unSaneDouble d)
- toJExpr (StringLit t) = app hdStrStr [toJExpr t]
- toJExpr (BinLit b) = app hdRawStr [toJExpr (map toInteger (BS.unpack b))]
+ toJExpr (StringLit t) = app hdEncodeModifiedUtf8Str [toJExpr t]
+ toJExpr (BinLit b) = app hdRawStringDataStr [toJExpr (map toInteger (BS.unpack b))]
toJExpr (LabelLit _isFun lbl) = global lbl
-- | A foreign reference to some JS code
@@ -297,6 +297,7 @@ data ForeignJSRef = ForeignJSRef
, foreignRefArgs :: ![FastString]
, foreignRefResult :: !FastString
}
+ deriving (Show)
-- | data used to generate one ObjBlock in our object file
data LinkableUnit = LinkableUnit
=====================================
compiler/GHC/Types/ForeignCall.hs
=====================================
@@ -156,7 +156,7 @@ data CCallConv
| StdCallConv
| PrimCallConv
| JavaScriptCallConv
- deriving (Eq, Data, Enum)
+ deriving (Show, Eq, Data, Enum)
instance Outputable CCallConv where
ppr StdCallConv = text "stdcall"
=====================================
compiler/ghc.cabal.in
=====================================
@@ -765,7 +765,9 @@ Library
GHC.StgToJS.Regs
GHC.StgToJS.Rts.Types
GHC.StgToJS.Rts.Rts
- GHC.StgToJS.Sinker
+ GHC.StgToJS.Sinker.Collect
+ GHC.StgToJS.Sinker.StringsUnfloat
+ GHC.StgToJS.Sinker.Sinker
GHC.StgToJS.Stack
GHC.StgToJS.StaticPtr
GHC.StgToJS.Symbols
=====================================
docs/users_guide/debugging.rst
=====================================
@@ -560,6 +560,11 @@ These flags dump various phases of GHC's STG pipeline.
Alias for :ghc-flag:`-ddump-stg-from-core`. Deprecated in favor of more explicit
flags: :ghc-flag:`-ddump-stg-from-core`, :ghc-flag:`-ddump-stg-final`, etc.
+.. ghc-flag:: -ddump-stg-from-js-sinker
+ :shortdesc: Show JavaScript sinker output
+ :type: dynamic
+
+ Show the output of JavaScript Sinker pass.
C-\\- representation
~~~~~~~~~~~~~~~~~~~~
=====================================
testsuite/tests/javascript/Makefile
=====================================
@@ -5,5 +5,28 @@ include $(TOP)/mk/test.mk
T24495:
'$(TEST_HC)' $(TEST_HC_OPTS) T24495.hs -v0 -O1 -dsuppress-uniques -ddump-js -ddump-to-file
./T24495
- # check that the optimization occurred
+ # check that the optimization occurred for -01 3 times (2 for cases + 1 for unfloated lits)
+ grep -c appendToHsStringA T24495.dump-js
+
+ '$(TEST_HC)' $(TEST_HC_OPTS) T24495.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+ ./T24495
+ # check that the optimization occurred for -02 1 time (1 for unfloated lits)
grep -c appendToHsStringA T24495.dump-js
+
+T23479_1:
+ '$(TEST_HC)' $(TEST_HC_OPTS) T23479_1.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+ ./T23479_1
+ # check that the optimization occurred
+ grep -c "h\$$r1 = \"test_val_1\"" T23479_1.dump-js
+ grep -c "h\$$r1 = \"test_val_2\"" T23479_1.dump-js
+ grep -c "h\$$r1 = \"test_val_3\"" T23479_1.dump-js
+ grep -c "h\$$r1 = \"test_val_80_local" T23479_1.dump-js
+ grep -c "h\$$r1 = \"test_val_80_global" T23479_1.dump-js || true
+
+T23479_2:
+ '$(TEST_HC)' $(TEST_HC_OPTS) T23479_2.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+ ./T23479_2
+ grep -c "h\$$r1 = \"test_val_1\"" T23479_2.dump-js
+ grep -c "h\$$r1 = \"test_val_80_local_once" T23479_2.dump-js
+ # check that the optimization occurred
+ grep -c "h\$$r1 = h\$$decodeUtf8z" T23479_2.dump-js
=====================================
testsuite/tests/javascript/T23479_1.hs
=====================================
@@ -0,0 +1,33 @@
+{-# LANGUAGE MagicHash #-}
+module Main where
+
+import GHC.Prim
+import GHC.JS.Prim
+
+foreign import javascript "((x) => { console.log(x); })"
+ js_log1 :: JSVal -> IO ()
+
+test_val_2 :: String
+test_val_2 = "test_val_2"
+
+test_val_80_global :: String
+test_val_80_global = "test_val_80_globaltest_val_80_globaltest_val_80_globaltest_val_80_globaltest_val"
+
+main :: IO ()
+main = do
+ -- Direct usage
+ js_log1 (JSVal (unsafeUnpackJSStringUtf8## "test_val_1"#))
+ -- Requires string sinker hit for strings shorter 80 symbols
+ js_log1 (toJSString test_val_2)
+ -- Requires rewrite hit "toJSString/literal"
+ js_log1 (toJSString test_val_3)
+ -- Locally defined strings become unfloatted at any length
+ js_log1 (toJSString test_val_80_local)
+ -- Globally defined strings with length >= 80 should not be unfloatted
+ js_log1 (toJSString test_val_80_global)
+ where
+ test_val_3 :: String
+ test_val_3 = "test_val_3"
+
+ test_val_80_local :: String
+ test_val_80_local = "test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_"
=====================================
testsuite/tests/javascript/T23479_1.stdout
=====================================
@@ -0,0 +1,10 @@
+test_val_1
+test_val_2
+test_val_3
+test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+test_val_80_globaltest_val_80_globaltest_val_80_globaltest_val_80_globaltest_val
+1
+1
+1
+1
+0
=====================================
testsuite/tests/javascript/T23479_2.hs
=====================================
@@ -0,0 +1,37 @@
+module Main where
+
+import GHC.Prim
+import GHC.JS.Prim
+
+foreign import javascript "((x) => { console.log(x); })"
+ js_log1 :: JSVal -> IO ()
+
+main :: IO ()
+main = do
+ -- When long string (>= 80) used once it is unfloatted
+ js_log1 (toJSString test_val_80_local_once)
+
+ -- When long string (>= 80) used more than once no unfloatting happened
+ js_log1 (toJSString test_val_80_local)
+ js_log1 (toJSString (testFn80 "testFn80:"))
+
+ -- Even if short string used more than once it is unfloatted anyway
+ js_log1 (toJSString test_val_1)
+ js_log1 (toJSString (testFn "testFn:"))
+ where
+ test_val_80_local_once :: String
+ test_val_80_local_once = "test_val_80_local_oncetest_val_80_local_oncetest_val_80_local_oncetest_val_80_lo"
+
+ test_val_80_local :: String
+ test_val_80_local = "test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_"
+
+ testFn80 s = s ++ test_val_80_local
+ -- We should mark this function as NOINLINE to prevent deeper optimizations for the specific test case
+ {-# NOINLINE testFn80 #-}
+
+ test_val_1 :: String
+ test_val_1 = "test_val_1"
+
+ testFn s = s ++ test_val_1
+ -- We should mark this function as NOINLINE to prevent deeper optimizations for the specific test case
+ {-# NOINLINE testFn #-}
=====================================
testsuite/tests/javascript/T23479_2.stdout
=====================================
@@ -0,0 +1,8 @@
+test_val_80_local_oncetest_val_80_local_oncetest_val_80_local_oncetest_val_80_lo
+test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+testFn80:test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+test_val_1
+testFn:test_val_1
+1
+1
+1
=====================================
testsuite/tests/javascript/T24495.hs
=====================================
@@ -1,6 +1,6 @@
{-# LANGUAGE MagicHash #-}
-{-# OPTIONS_GHC -O1 #-}
-- -O1 required to make "rest" thunk SingleEntry
+-- -O2 shows that it still do one optimization
module Main where
=====================================
testsuite/tests/javascript/T24495.stdout
=====================================
@@ -1,2 +1,4 @@
2 ab bd
-2
+3
+2 ab bd
+1
=====================================
testsuite/tests/javascript/all.T
=====================================
@@ -22,3 +22,6 @@ test('T23346', normal, compile_and_run, [''])
test('T22455', normal, compile_and_run, ['-ddisable-js-minifier'])
test('T23565', normal, compile_and_run, [''])
test('T24495', normal, makefile_test, ['T24495'])
+
+test('T23479_1', normal, makefile_test, ['T23479_1'])
+test('T23479_2', normal, makefile_test, ['T23479_2'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0cc2c9e9ab8428e6182bd9d387f5eb887cff2c6c
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0cc2c9e9ab8428e6182bd9d387f5eb887cff2c6c
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241015/66825ec7/attachment-0001.html>
More information about the ghc-commits
mailing list