[Git][ghc/ghc][wip/T23479] JS: Re-add optimization for literal strings in genApp (fixes #23479)

Serge S. Gulin (@gulin.serge) gitlab at gitlab.haskell.org
Tue Oct 15 06:36:22 UTC 2024



Serge S. Gulin pushed to branch wip/T23479 at Glasgow Haskell Compiler / GHC


Commits:
0cc2c9e9 by Serge S. Gulin at 2024-10-15T09:35:51+03:00
JS: Re-add optimization for literal strings in genApp (fixes #23479)

Based on https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588/

Co-authored-by: Sylvain Henry <sylvain at haskus.fr>
Co-authored-by: Andrei Borzenkov <root at sandwitch.dev>
Co-authored-by: Danil Berestov <goosedb at yandex.ru>

-------------------------
Metric Decrease:
    size_hello_artifact
    size_hello_unicode
-------------------------

- - - - -


24 changed files:

- compiler/GHC/Builtin/Names.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/StgToJS/Apply.hs
- compiler/GHC/StgToJS/CodeGen.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Literal.hs
- compiler/GHC/StgToJS/Monad.hs
- + compiler/GHC/StgToJS/Sinker/Collect.hs
- compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/Sinker.hs
- + compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
- compiler/GHC/StgToJS/Symbols.hs
- compiler/GHC/StgToJS/Types.hs
- compiler/GHC/Types/ForeignCall.hs
- compiler/ghc.cabal.in
- docs/users_guide/debugging.rst
- testsuite/tests/javascript/Makefile
- + testsuite/tests/javascript/T23479_1.hs
- + testsuite/tests/javascript/T23479_1.stdout
- + testsuite/tests/javascript/T23479_2.hs
- + testsuite/tests/javascript/T23479_2.stdout
- testsuite/tests/javascript/T24495.hs
- testsuite/tests/javascript/T24495.stdout
- testsuite/tests/javascript/all.T


Changes:

=====================================
compiler/GHC/Builtin/Names.hs
=====================================
@@ -522,6 +522,8 @@ basicKnownKeyNames
         , unsafeEqualityTyConName
         , unsafeReflDataConName
         , unsafeCoercePrimName
+
+        , unsafeUnpackJSStringUtf8ShShName
     ]
 
 genericTyConNames :: [Name]
@@ -590,7 +592,8 @@ gHC_INTERNAL_BASE, gHC_INTERNAL_ENUM,
     gHC_INTERNAL_ARROW, gHC_INTERNAL_DESUGAR, gHC_INTERNAL_RANDOM, gHC_INTERNAL_EXTS, gHC_INTERNAL_IS_LIST,
     gHC_INTERNAL_CONTROL_EXCEPTION_BASE, gHC_INTERNAL_TYPEERROR, gHC_INTERNAL_TYPELITS, gHC_INTERNAL_TYPELITS_INTERNAL,
     gHC_INTERNAL_TYPENATS, gHC_INTERNAL_TYPENATS_INTERNAL,
-    gHC_INTERNAL_DATA_COERCE, gHC_INTERNAL_DEBUG_TRACE, gHC_INTERNAL_UNSAFE_COERCE, gHC_INTERNAL_FOREIGN_C_CONSTPTR :: Module
+    gHC_INTERNAL_DATA_COERCE, gHC_INTERNAL_DEBUG_TRACE, gHC_INTERNAL_UNSAFE_COERCE, gHC_INTERNAL_FOREIGN_C_CONSTPTR,
+    gHC_INTERNAL_JS_PRIM, gHC_INTERNAL_WASM_PRIM_TYPES :: Module
 gHC_INTERNAL_BASE                   = mkGhcInternalModule (fsLit "GHC.Internal.Base")
 gHC_INTERNAL_ENUM                   = mkGhcInternalModule (fsLit "GHC.Internal.Enum")
 gHC_INTERNAL_GHCI                   = mkGhcInternalModule (fsLit "GHC.Internal.GHCi")
@@ -633,7 +636,7 @@ gHC_INTERNAL_RANDOM                 = mkGhcInternalModule (fsLit "GHC.Internal.S
 gHC_INTERNAL_EXTS                   = mkGhcInternalModule (fsLit "GHC.Internal.Exts")
 gHC_INTERNAL_IS_LIST                = mkGhcInternalModule (fsLit "GHC.Internal.IsList")
 gHC_INTERNAL_CONTROL_EXCEPTION_BASE = mkGhcInternalModule (fsLit "GHC.Internal.Control.Exception.Base")
-gHC_INTERNAL_EXCEPTION_CONTEXT = mkGhcInternalModule (fsLit "GHC.Internal.Exception.Context")
+gHC_INTERNAL_EXCEPTION_CONTEXT      = mkGhcInternalModule (fsLit "GHC.Internal.Exception.Context")
 gHC_INTERNAL_GENERICS               = mkGhcInternalModule (fsLit "GHC.Internal.Generics")
 gHC_INTERNAL_TYPEERROR              = mkGhcInternalModule (fsLit "GHC.Internal.TypeError")
 gHC_INTERNAL_TYPELITS               = mkGhcInternalModule (fsLit "GHC.Internal.TypeLits")
@@ -644,6 +647,8 @@ gHC_INTERNAL_DATA_COERCE            = mkGhcInternalModule (fsLit "GHC.Internal.D
 gHC_INTERNAL_DEBUG_TRACE            = mkGhcInternalModule (fsLit "GHC.Internal.Debug.Trace")
 gHC_INTERNAL_UNSAFE_COERCE          = mkGhcInternalModule (fsLit "GHC.Internal.Unsafe.Coerce")
 gHC_INTERNAL_FOREIGN_C_CONSTPTR     = mkGhcInternalModule (fsLit "GHC.Internal.Foreign.C.ConstPtr")
+gHC_INTERNAL_JS_PRIM                = mkGhcInternalModule (fsLit "GHC.Internal.JS.Prim")
+gHC_INTERNAL_WASM_PRIM_TYPES        = mkGhcInternalModule (fsLit "GHC.Internal.Wasm.Prim.Types")
 
 gHC_INTERNAL_SRCLOC :: Module
 gHC_INTERNAL_SRCLOC = mkGhcInternalModule (fsLit "GHC.Internal.SrcLoc")
@@ -1676,7 +1681,10 @@ constPtrConName =
     tcQual gHC_INTERNAL_FOREIGN_C_CONSTPTR (fsLit "ConstPtr") constPtrTyConKey
 
 jsvalTyConName :: Name
-jsvalTyConName = tcQual (mkGhcInternalModule (fsLit "GHC.Internal.Wasm.Prim.Types")) (fsLit "JSVal") jsvalTyConKey
+jsvalTyConName = tcQual gHC_INTERNAL_WASM_PRIM_TYPES (fsLit "JSVal") jsvalTyConKey
+
+unsafeUnpackJSStringUtf8ShShName :: Name
+unsafeUnpackJSStringUtf8ShShName = varQual gHC_INTERNAL_JS_PRIM (fsLit "unsafeUnpackJSStringUtf8##") unsafeUnpackJSStringUtf8ShShKey
 
 {-
 ************************************************************************
@@ -2082,6 +2090,7 @@ typeSymbolKindConNameKey, typeCharKindConNameKey,
   , typeNatLogTyFamNameKey
   , typeConsSymbolTyFamNameKey, typeUnconsSymbolTyFamNameKey
   , typeCharToNatTyFamNameKey, typeNatToCharTyFamNameKey
+  , exceptionContextTyConKey, unsafeUnpackJSStringUtf8ShShKey
   :: Unique
 typeSymbolKindConNameKey  = mkPreludeTyConUnique 400
 typeCharKindConNameKey    = mkPreludeTyConUnique 401
@@ -2104,9 +2113,10 @@ constPtrTyConKey = mkPreludeTyConUnique 417
 
 jsvalTyConKey = mkPreludeTyConUnique 418
 
-exceptionContextTyConKey :: Unique
 exceptionContextTyConKey = mkPreludeTyConUnique 420
 
+unsafeUnpackJSStringUtf8ShShKey  = mkPreludeMiscIdUnique 805
+
 {-
 ************************************************************************
 *                                                                      *


=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -470,6 +470,7 @@ data DumpFlag
    | Opt_D_dump_stg_cg        -- ^ STG (after stg2stg)
    | Opt_D_dump_stg_tags      -- ^ Result of tag inference analysis.
    | Opt_D_dump_stg_final     -- ^ Final STG (before cmm gen)
+   | Opt_D_dump_stg_from_js_sinker -- ^ STG after JS sinker
    | Opt_D_dump_call_arity
    | Opt_D_dump_exitify
    | Opt_D_dump_dmdanal


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -1517,6 +1517,8 @@ dynamic_flags_deps = [
         "Use `-ddump-stg-from-core` or `-ddump-stg-final` instead"
   , make_ord_flag defGhcFlag "ddump-stg-tags"
         (setDumpFlag Opt_D_dump_stg_tags)
+  , make_ord_flag defGhcFlag "ddump-stg-from-js-sinker"
+        (setDumpFlag Opt_D_dump_stg_from_js_sinker)
   , make_ord_flag defGhcFlag "ddump-call-arity"
         (setDumpFlag Opt_D_dump_call_arity)
   , make_ord_flag defGhcFlag "ddump-exitify"


=====================================
compiler/GHC/StgToJS/Apply.hs
=====================================
@@ -1,6 +1,7 @@
 {-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE ViewPatterns #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -46,11 +47,13 @@ import GHC.StgToJS.Stack
 import GHC.StgToJS.Symbols
 import GHC.StgToJS.Types
 import GHC.StgToJS.Utils
+import GHC.StgToJS.Linker.Utils (decodeModifiedUTF8)
 
 import GHC.Types.Id
 import GHC.Types.Id.Info
 import GHC.Types.CostCentre
 import GHC.Types.RepType (mightBeFunTy)
+import GHC.Types.Literal
 
 import GHC.Stg.Syntax
 
@@ -86,7 +89,6 @@ rtsApply cfg = jBlock
      , moveRegs2
      ]
 
-
 -- | Generate an application of some args to an Id.
 --
 -- The case where args is null is common as it's used to generate the evaluation
@@ -98,6 +100,51 @@ genApp
   -> [StgArg]
   -> G (JStgStat, ExprResult)
 genApp ctx i args
+    -- Test case T23479_2
+    -- See: https://github.com/ghcjs/ghcjs/blob/b7711fbca7c3f43a61f1dba526e6f2a2656ef44c/src/Gen2/Generator.hs#L876
+    -- Comment by Luite Stegeman <luite.stegeman at iohk.io>
+    -- Special cases for JSString literals.
+    -- We could handle unpackNBytes# here, but that's probably not common
+    -- enough to warrant a special case.
+    -- See: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588/#note_503978
+    -- Comment by Jeffrey Young  <jeffrey.young at iohk.io>
+    -- We detect if the Id is unsafeUnpackJSStringUtf8## applied to a string literal,
+    -- if so then we convert the unsafeUnpack to a call to h$decode.
+    | [StgVarArg v] <- args
+    , idName i == unsafeUnpackJSStringUtf8ShShName
+    -- See: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/10588
+    -- Comment by Josh Meredith  <josh.meredith at iohk.io>
+    -- `typex_expr` can throw an error for certain bindings so it's important
+    -- that this condition comes after matching on the function name
+    , [top] <- concatMap typex_expr (ctxTarget ctx)
+    = (,ExprInline) . (|=) top . app hdDecodeUtf8Z <$> varsForId v
+
+    -- Test case T23479_1
+    | [StgLitArg (LitString bs)] <- args
+    , Just d <- decodeModifiedUTF8 bs
+    , idName i == unsafeUnpackJSStringUtf8ShShName
+    , [top] <- concatMap typex_expr (ctxTarget ctx)
+    = return . (,ExprInline) $ top |= toJExpr d
+
+    -- Test case T24495 with single occurrence at -02 and third occurrence at -01
+    -- Moved back from removal at https://gitlab.haskell.org/ghc/ghc/-/merge_requests/12308
+    -- See commit hash b36ee57bfbecc628b7f0919e1e59b7066495034f
+    --
+    -- Case: unpackCStringAppend# "some string"# str
+    --
+    -- Generates h$appendToHsStringA(str, "some string"), which has a faster
+    -- decoding loop.
+    | [StgLitArg (LitString bs), x] <- args
+    , Just d <- decodeModifiedUTF8 bs
+    , getUnique i == unpackCStringAppendIdKey
+    , [top] <- concatMap typex_expr (ctxTarget ctx)
+    = do
+        prof <- csProf <$> getSettings
+        let profArg = if prof then [jCafCCS] else []
+        a <- genArg x
+        return ( top |= app "h$appendToHsStringA" (toJExpr d : a ++ profArg)
+               , ExprInline
+               )
 
     -- let-no-escape
     | Just n <- ctxLneBindingStackSize ctx i


=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -11,7 +11,7 @@ where
 
 import GHC.Prelude
 
-import GHC.Driver.Flags (DumpFlag (Opt_D_dump_js))
+import GHC.Driver.Flags (DumpFlag (Opt_D_dump_js, Opt_D_dump_stg_from_js_sinker))
 
 import GHC.JS.Ppr
 import GHC.JS.JStg.Syntax
@@ -21,7 +21,7 @@ import GHC.JS.Transform
 import GHC.JS.Optimizer
 
 import GHC.StgToJS.Arg
-import GHC.StgToJS.Sinker
+import GHC.StgToJS.Sinker.Sinker
 import GHC.StgToJS.Types
 import qualified GHC.StgToJS.Object as Object
 import GHC.StgToJS.Utils
@@ -81,7 +81,8 @@ stgToJS logger config stg_binds0 this_mod spt_entries foreign_stubs cccs output_
     -- TODO: avoid top level lifting in core-2-core when the JS backend is
     -- enabled instead of undoing it here
 
-    -- TODO: add dump pass for optimized STG ast for JS
+  putDumpFileMaybe logger Opt_D_dump_stg_from_js_sinker "STG Optimized JS Sinker:" FormatSTG
+    (pprGenStgTopBindings (StgPprOpts False) stg_binds)
 
   (deps,lus) <- runG config this_mod unfloated_binds $ do
     ifProfilingM $ initCostCentres cccs


=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -548,6 +548,16 @@ data ModuleCode = ModuleCode
   , mc_frefs    :: ![ForeignJSRef]
   }
 
+instance Outputable ModuleCode where
+  ppr m = hang (text "ModuleCode") 2 $ vcat
+            [ hcat [text "Module: ", ppr (mc_module m)]
+            , hcat [text "JS Code:", pretty True (mc_js_code m)]
+            , hcat [text "JS Exports:", pprHsBytes (mc_exports m)]
+            , hang (text "JS Closures::") 2 (vcat (fmap (text . show) (mc_closures m)))
+            , hang (text "JS Statics::") 2 (vcat (fmap (text . show) (mc_statics m)))
+            , hang (text "JS ForeignRefs::") 2 (vcat (fmap (text . show) (mc_frefs m)))
+            ]
+
 -- | ModuleCode after link with other modules.
 --
 -- It contains less information than ModuleCode because they have been commoned


=====================================
compiler/GHC/StgToJS/Literal.hs
=====================================
@@ -18,8 +18,8 @@ import GHC.StgToJS.Ids
 import GHC.StgToJS.Monad
 import GHC.StgToJS.Symbols
 import GHC.StgToJS.Types
+import GHC.StgToJS.Linker.Utils (decodeModifiedUTF8)
 
-import GHC.Data.FastString
 import GHC.Types.Literal
 import GHC.Types.Basic
 import GHC.Types.RepType
@@ -95,9 +95,10 @@ genLit = \case
 genStaticLit :: Literal -> G [StaticLit]
 genStaticLit = \case
   LitChar c                -> return [ IntLit (fromIntegral $ ord c) ]
-  LitString str
-    | True                 -> return [ StringLit (mkFastStringByteString str), IntLit 0]
-    -- \|  invalid UTF8         -> return [ BinLit str, IntLit 0]
+  LitString str -> case decodeModifiedUTF8 str of
+    Just t                 -> return [ StringLit t, IntLit 0]
+    -- invalid UTF8
+    Nothing                -> return [ BinLit str, IntLit 0]
   LitNullAddr              -> return [ NullLit, IntLit 0 ]
   LitNumber nt v           -> case nt of
     LitNumInt     -> return [ IntLit v ]


=====================================
compiler/GHC/StgToJS/Monad.hs
=====================================
@@ -31,6 +31,7 @@ import GHC.JS.Transform
 import GHC.StgToJS.Types
 
 import GHC.Unit.Module
+import GHC.Utils.Outputable
 import GHC.Stg.Syntax
 
 import GHC.Types.SrcLoc
@@ -159,6 +160,13 @@ data GlobalOcc = GlobalOcc
   , global_count :: !Word
   }
 
+instance Outputable GlobalOcc where
+  ppr g = hang (text "GlobalOcc") 2 $ vcat
+            [ hcat [text "Ident: ", ppr (global_ident g)]
+            , hcat [text "Id:", ppr (global_id g)]
+            , hcat [text "Count:", ppr (global_count g)]
+            ]
+
 -- | Return number of occurrences of every global id used in the given JStgStat.
 -- Sort by increasing occurrence count.
 globalOccs :: JStgStat -> G [GlobalOcc]


=====================================
compiler/GHC/StgToJS/Sinker/Collect.hs
=====================================
@@ -0,0 +1,72 @@
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Collect
+  ( collectArgsTop
+  , collectArgs
+  , selectUsedOnce
+  )
+  where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Unique
+
+-- | fold over all id in StgArg used at the top level in an StgRhsCon
+collectArgsTop :: CgStgBinding -> [Id]
+collectArgsTop = \case
+  StgNonRec _b r -> collectArgsTopRhs r
+  StgRec bs      -> concatMap (collectArgsTopRhs . snd) bs
+  where
+    collectArgsTopRhs :: CgStgRhs -> [Id]
+    collectArgsTopRhs = \case
+      StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
+      StgRhsClosure {}                        -> []
+
+-- | fold over all Id in StgArg in the AST
+collectArgs :: CgStgBinding -> [Id]
+collectArgs = \case
+  StgNonRec _b r -> collectArgsR r
+  StgRec bs      -> concatMap (collectArgsR . snd) bs
+  where
+    collectArgsR :: CgStgRhs -> [Id]
+    collectArgsR = \case
+      StgRhsClosure _x0 _x1 _x2 _x3 e _typ     -> collectArgsE e
+      StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
+
+    collectArgsAlt :: CgStgAlt -> [Id]
+    collectArgsAlt alt = collectArgsE (alt_rhs alt)
+
+    collectArgsE :: CgStgExpr -> [Id]
+    collectArgsE = \case
+      StgApp x args
+        -> x : concatMap collectArgsA args
+      StgConApp _con _mn args _ts
+        -> concatMap collectArgsA args
+      StgOpApp _x args _t
+        -> concatMap collectArgsA args
+      StgCase e _b _a alts
+        -> collectArgsE e ++ concatMap collectArgsAlt alts
+      StgLet _x b e
+        -> collectArgs b ++ collectArgsE e
+      StgLetNoEscape _x b e
+        -> collectArgs b ++ collectArgsE e
+      StgTick _i e
+        -> collectArgsE e
+      StgLit _
+        -> []
+
+collectArgsA :: StgArg -> [Id]
+collectArgsA = \case
+  StgVarArg i -> [i]
+  StgLitArg _ -> []
+
+selectUsedOnce :: (Foldable t, Uniquable a) => t a -> UniqSet a
+selectUsedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
+  where
+    g i t@(once, mult)
+      | i `elementOfUniqSet` mult = t
+      | i `elementOfUniqSet` once
+        = (delOneFromUniqSet once i, addOneToUniqSet mult i)
+      | otherwise = (addOneToUniqSet once i, mult)


=====================================
compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/Sinker.hs
=====================================
@@ -2,7 +2,7 @@
 {-# LANGUAGE RankNTypes #-}
 {-# LANGUAGE LambdaCase #-}
 
-module GHC.StgToJS.Sinker (sinkPgm) where
+module GHC.StgToJS.Sinker.Sinker (sinkPgm) where
 
 import GHC.Prelude
 import GHC.Types.Unique.Set
@@ -14,6 +14,8 @@ import GHC.Types.Name
 import GHC.Unit.Module
 import GHC.Types.Literal
 import GHC.Data.Graph.Directed
+import GHC.StgToJS.Sinker.Collect
+import GHC.StgToJS.Sinker.StringsUnfloat
 
 import GHC.Utils.Misc (partitionWith)
 import GHC.StgToJS.Utils
@@ -21,7 +23,7 @@ import GHC.StgToJS.Utils
 import Data.Char
 import Data.List (partition)
 import Data.Maybe
-
+import Data.ByteString (ByteString)
 
 -- | Unfloat some top-level unexported things
 --
@@ -34,27 +36,43 @@ import Data.Maybe
 sinkPgm :: Module
         -> [CgStgTopBinding]
         -> (UniqFM Id CgStgExpr, [CgStgTopBinding])
-sinkPgm m pgm = (sunk, map StgTopLifted pgm'' ++ stringLits)
+sinkPgm m pgm
+  = (sunk, map StgTopLifted pgm''' ++ stringLits)
   where
-    selectLifted (StgTopLifted b) = Left b
-    selectLifted x                = Right x
-    (pgm', stringLits) = partitionWith selectLifted pgm
-    (sunk, pgm'')      = sinkPgm' m pgm'
+    selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
+    selectLifted (StgTopLifted b)      = Left b
+    selectLifted (StgTopStringLit i b) = Right (i, b)
+
+    (pgm', allStringLits) = partitionWith selectLifted pgm
+    usedOnceIds = selectUsedOnce $ concatMap collectArgs pgm'
+
+    stringLitsUFM = listToUFM $ (\(i, b) -> (idName i, (i, b))) <$> allStringLits
+    (pgm'', _actuallyUnfloatedStringLitNames) =
+      unfloatStringLits
+        (idName `mapUniqSet` usedOnceIds)
+        (snd `mapUFM` stringLitsUFM)
+        pgm'
+
+    stringLits = uncurry StgTopStringLit <$> allStringLits
+
+    (sunk, pgm''') = sinkPgm' m usedOnceIds pgm''
 
 sinkPgm'
   :: Module
        -- ^ the module, since we treat definitions from the current module
        -- differently
+  -> IdSet
+       -- ^ the set of used once ids
   -> [CgStgBinding]
        -- ^ the bindings
   -> (UniqFM Id CgStgExpr, [CgStgBinding])
        -- ^ a map with sunken replacements for nodes, for where the replacement
        -- does not fit in the 'StgBinding' AST and the new bindings
-sinkPgm' m pgm =
-  let usedOnce = collectUsedOnce pgm
+sinkPgm' m usedOnceIds pgm =
+  let usedOnce = collectTopLevelUsedOnce usedOnceIds pgm
       sinkables = listToUFM $
           concatMap alwaysSinkable pgm ++
-          filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
+          concatMap (filter ((`elementOfUniqSet` usedOnce) . fst) . onceSinkable m) pgm
       isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
       isSunkBind _                                      = False
   in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
@@ -95,66 +113,10 @@ onceSinkable _ _ = []
 
 -- | collect all idents used only once in an argument at the top level
 --   and never anywhere else
-collectUsedOnce :: [CgStgBinding] -> IdSet
-collectUsedOnce binds = intersectUniqSets (usedOnce args) (usedOnce top_args)
+collectTopLevelUsedOnce :: IdSet -> [CgStgBinding] -> IdSet
+collectTopLevelUsedOnce usedOnceIds binds = intersectUniqSets usedOnceIds (selectUsedOnce top_args)
   where
     top_args = concatMap collectArgsTop binds
-    args     = concatMap collectArgs    binds
-    usedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
-    g i t@(once, mult)
-      | i `elementOfUniqSet` mult = t
-      | i `elementOfUniqSet` once
-        = (delOneFromUniqSet once i, addOneToUniqSet mult i)
-      | otherwise = (addOneToUniqSet once i, mult)
-
--- | fold over all id in StgArg used at the top level in an StgRhsCon
-collectArgsTop :: CgStgBinding -> [Id]
-collectArgsTop = \case
-  StgNonRec _b r -> collectArgsTopRhs r
-  StgRec bs      -> concatMap (collectArgsTopRhs . snd) bs
-
-collectArgsTopRhs :: CgStgRhs -> [Id]
-collectArgsTopRhs = \case
-  StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
-  StgRhsClosure {}                        -> []
-
--- | fold over all Id in StgArg in the AST
-collectArgs :: CgStgBinding -> [Id]
-collectArgs = \case
-  StgNonRec _b r -> collectArgsR r
-  StgRec bs      -> concatMap (collectArgsR . snd) bs
-
-collectArgsR :: CgStgRhs -> [Id]
-collectArgsR = \case
-  StgRhsClosure _x0 _x1 _x2 _x3 e _typ     -> collectArgsE e
-  StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
-
-collectArgsAlt :: CgStgAlt -> [Id]
-collectArgsAlt alt = collectArgsE (alt_rhs alt)
-
-collectArgsE :: CgStgExpr -> [Id]
-collectArgsE = \case
-  StgApp x args
-    -> x : concatMap collectArgsA args
-  StgConApp _con _mn args _ts
-    -> concatMap collectArgsA args
-  StgOpApp _x args _t
-    -> concatMap collectArgsA args
-  StgCase e _b _a alts
-    -> collectArgsE e ++ concatMap collectArgsAlt alts
-  StgLet _x b e
-    -> collectArgs b ++ collectArgsE e
-  StgLetNoEscape _x b e
-    -> collectArgs b ++ collectArgsE e
-  StgTick _i e
-    -> collectArgsE e
-  StgLit _
-    -> []
-
-collectArgsA :: StgArg -> [Id]
-collectArgsA = \case
-  StgVarArg i -> [i]
-  StgLitArg _ -> []
 
 isLocal :: Id -> Bool
 isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)


=====================================
compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
=====================================
@@ -0,0 +1,156 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+module GHC.StgToJS.Sinker.StringsUnfloat
+  ( unfloatStringLits
+  )
+  where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Types.Unique.FM
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.Literal
+import GHC.Utils.Misc (partitionWith)
+
+import Data.ByteString qualified as BS
+import Data.ByteString (ByteString)
+import Data.Bifunctor (Bifunctor (..))
+
+-- | We suppose that every string shorter than 80 symbols is safe for sink.
+-- Sinker is working on per module. It means that ALL locally defined strings
+-- in a module shorter 80 symbols will be unfloated back.
+pattern STRING_LIT_MAX_LENGTH :: Int
+pattern STRING_LIT_MAX_LENGTH = 80
+
+unfloatStringLits
+  :: UniqSet Name
+  -> UniqFM Name ByteString
+  -> [CgStgBinding]
+  -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits usedOnceStringLits stringLits =
+  unfloatStringLits' (selectStringLitsForUnfloat usedOnceStringLits stringLits)
+
+-- | We are doing attempts to unfloat string literals back to
+-- the call site. Further special JS optimizations
+-- can generate more performant operations over them.
+unfloatStringLits' :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits' stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
+  where
+    (binderWithoutChanges, binderWithUnfloatedStringLitPairs) = partitionWith substituteStringLit allBindings
+
+    binderWithUnfloatedStringLit = fst <$> binderWithUnfloatedStringLitPairs
+    actuallyUsedStringLitNames = unionManyUniqSets (snd <$> binderWithUnfloatedStringLitPairs)
+
+    substituteStringLit :: CgStgBinding -> Either CgStgBinding (CgStgBinding, UniqSet Name)
+    substituteStringLit x@(StgRec bnds)
+      | isEmptyUniqSet names = Left x
+      | otherwise = Right (StgRec bnds', names)
+      where
+        (bnds', names) = extractNames id $ do
+          (i, rhs) <- bnds
+          pure $ case processStgRhs rhs of
+            Nothing -> Left (i, rhs)
+            Just (rhs', names) -> Right ((i, rhs'), names)
+    substituteStringLit x@(StgNonRec binder rhs)
+      = maybe (Left x)
+        (\(body', names) -> Right (StgNonRec binder body', names))
+        (processStgRhs rhs)
+
+    processStgRhs :: CgStgRhs -> Maybe (CgStgRhs, UniqSet Name)
+    processStgRhs (StgRhsCon ccs dataCon mu ticks args typ)
+      | isEmptyUniqSet names = Nothing
+      | otherwise = Just (StgRhsCon ccs dataCon mu ticks unified typ, names)
+      where
+        (unified, names) = substituteArgWithNames args
+    processStgRhs (StgRhsClosure fvs ccs upd bndrs body typ)
+      = (\(body', names) -> (StgRhsClosure fvs ccs upd bndrs body' typ, names)) <$>
+        processStgExpr body
+
+    -- Recursive expressions
+    processStgExpr :: CgStgExpr -> Maybe (CgStgExpr, UniqSet Name)
+    processStgExpr (StgLit _) = Nothing
+    processStgExpr (StgTick _ _) = Nothing
+    processStgExpr (StgLet n b e) =
+      case (substituteStringLit b, processStgExpr e) of
+        (Left _, Nothing) -> Nothing
+        (Right (b', names), Nothing) -> Just (StgLet n b' e, names)
+        (Left _, Just (e', names)) -> Just (StgLet n b e', names)
+        (Right (b', names), Just (e', names')) -> Just (StgLet n b' e', names `unionUniqSets` names')
+    processStgExpr (StgLetNoEscape n b e) =
+      case (substituteStringLit b, processStgExpr e) of
+        (Left _, Nothing) -> Nothing
+        (Right (b', names), Nothing) -> Just (StgLetNoEscape n b' e, names)
+        (Left _, Just (e', names)) -> Just (StgLetNoEscape n b e', names)
+        (Right (b', names), Just (e', names')) -> Just (StgLetNoEscape n b' e', names `unionUniqSets` names')
+    -- We should keep the order: See Note [Case expression invariants]
+    processStgExpr (StgCase e bndr alt_type alts) =
+      case (isEmptyUniqSet names, processStgExpr e) of
+        (True, Nothing) -> Nothing
+        (True, Just (e', names')) -> Just (StgCase e' bndr alt_type alts, names')
+        (False, Nothing) -> Just (StgCase e bndr alt_type unified, names)
+        (False, Just (e', names')) -> Just (StgCase e' bndr alt_type unified, names `unionUniqSets` names')
+      where
+        (unified, names) = extractNames splitAlts alts
+
+        splitAlts :: CgStgAlt -> Either CgStgAlt (CgStgAlt, UniqSet Name)
+        splitAlts alt@(GenStgAlt con bndrs rhs) =
+          case processStgExpr rhs of
+            Nothing -> Left alt
+            Just (alt', names) -> Right (GenStgAlt con bndrs alt', names)
+
+    -- No args
+    processStgExpr (StgApp _ []) = Nothing
+    processStgExpr (StgConApp _ _ [] _) = Nothing
+    processStgExpr (StgOpApp _ [] _) = Nothing
+
+    -- Main targets. Preserving the order of args is important
+    processStgExpr (StgApp fn args@(_:_))
+      | isEmptyUniqSet names = Nothing
+      | otherwise = Just (StgApp fn unified, names)
+      where
+        (unified, names) = substituteArgWithNames args
+    processStgExpr (StgConApp dc n args@(_:_) tys)
+      | isEmptyUniqSet names = Nothing
+      | otherwise = Just (StgConApp dc n unified tys, names)
+      where
+        (unified, names) = substituteArgWithNames args
+    processStgExpr (StgOpApp op args@(_:_) tys)
+      | isEmptyUniqSet names = Nothing
+      | otherwise = Just (StgOpApp op unified tys, names)
+      where
+        (unified, names) = substituteArgWithNames args
+
+    substituteArg :: StgArg -> Either StgArg (StgArg, Name)
+    substituteArg a@(StgLitArg _) = Left a
+    substituteArg a@(StgVarArg i) =
+      let name = idName i
+      in case lookupUFM stringLits name of
+        Nothing -> Left a
+        Just b -> Right (StgLitArg $ LitString b, name)
+
+    substituteArgWithNames = extractNames (second (second unitUniqSet) . substituteArg)
+
+    extractNames :: (a -> Either x (x, UniqSet Name)) -> [a] -> ([x], UniqSet Name)
+    extractNames splitter target =
+      let
+        splitted = splitter <$> target
+        combined = either (, emptyUniqSet) id <$> splitted
+        unified = fst <$> combined
+        names = unionManyUniqSets (snd <$> combined)
+      in (unified, names)
+
+selectStringLitsForUnfloat :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+selectStringLitsForUnfloat usedOnceStringLits stringLits = alwaysUnfloat `plusUFM` usedOnceUnfloat
+  where
+    alwaysUnfloat = alwaysUnfloatStringLits stringLits
+    usedOnceUnfloat = selectUsedOnceStringLits usedOnceStringLits stringLits
+
+    alwaysUnfloatStringLits :: UniqFM Name ByteString -> UniqFM Name ByteString
+    alwaysUnfloatStringLits = filterUFM $ \b -> BS.length b < STRING_LIT_MAX_LENGTH
+
+    selectUsedOnceStringLits :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+    selectUsedOnceStringLits usedOnceStringLits stringLits =
+      stringLits `intersectUFM` getUniqSet usedOnceStringLits


=====================================
compiler/GHC/StgToJS/Symbols.hs
=====================================
@@ -1215,3 +1215,7 @@ hdStiStr = fsLit "h$sti"
 
 hdStrStr :: FastString
 hdStrStr = fsLit "h$str"
+------------------------------ Pack/Unpack --------------------------------------------
+
+hdDecodeUtf8Z :: FastString
+hdDecodeUtf8Z = fsLit "h$decodeUtf8z"


=====================================
compiler/GHC/StgToJS/Types.hs
=====================================
@@ -284,8 +284,8 @@ instance ToJExpr StaticLit where
   toJExpr (IntLit i)            = toJExpr i
   toJExpr NullLit               = null_
   toJExpr (DoubleLit d)         = toJExpr (unSaneDouble d)
-  toJExpr (StringLit t)         = app hdStrStr [toJExpr t]
-  toJExpr (BinLit b)            = app hdRawStr [toJExpr (map toInteger (BS.unpack b))]
+  toJExpr (StringLit t)         = app hdEncodeModifiedUtf8Str [toJExpr t]
+  toJExpr (BinLit b)            = app hdRawStringDataStr      [toJExpr (map toInteger (BS.unpack b))]
   toJExpr (LabelLit _isFun lbl) = global lbl
 
 -- | A foreign reference to some JS code
@@ -297,6 +297,7 @@ data ForeignJSRef = ForeignJSRef
   , foreignRefArgs     :: ![FastString]
   , foreignRefResult   :: !FastString
   }
+  deriving (Show)
 
 -- | data used to generate one ObjBlock in our object file
 data LinkableUnit = LinkableUnit


=====================================
compiler/GHC/Types/ForeignCall.hs
=====================================
@@ -156,7 +156,7 @@ data CCallConv
   | StdCallConv
   | PrimCallConv
   | JavaScriptCallConv
-  deriving (Eq, Data, Enum)
+  deriving (Show, Eq, Data, Enum)
 
 instance Outputable CCallConv where
   ppr StdCallConv = text "stdcall"


=====================================
compiler/ghc.cabal.in
=====================================
@@ -765,7 +765,9 @@ Library
         GHC.StgToJS.Regs
         GHC.StgToJS.Rts.Types
         GHC.StgToJS.Rts.Rts
-        GHC.StgToJS.Sinker
+        GHC.StgToJS.Sinker.Collect
+        GHC.StgToJS.Sinker.StringsUnfloat
+        GHC.StgToJS.Sinker.Sinker
         GHC.StgToJS.Stack
         GHC.StgToJS.StaticPtr
         GHC.StgToJS.Symbols


=====================================
docs/users_guide/debugging.rst
=====================================
@@ -560,6 +560,11 @@ These flags dump various phases of GHC's STG pipeline.
     Alias for :ghc-flag:`-ddump-stg-from-core`. Deprecated in favor of more explicit
     flags: :ghc-flag:`-ddump-stg-from-core`, :ghc-flag:`-ddump-stg-final`, etc.
 
+.. ghc-flag:: -ddump-stg-from-js-sinker
+    :shortdesc: Show JavaScript sinker output
+    :type: dynamic
+
+    Show the output of JavaScript Sinker pass.
 
 C-\\- representation
 ~~~~~~~~~~~~~~~~~~~~


=====================================
testsuite/tests/javascript/Makefile
=====================================
@@ -5,5 +5,28 @@ include $(TOP)/mk/test.mk
 T24495:
 	'$(TEST_HC)' $(TEST_HC_OPTS) T24495.hs -v0 -O1 -dsuppress-uniques -ddump-js -ddump-to-file
 	./T24495
-	# check that the optimization occurred
+	# check that the optimization occurred for -01 3 times (2 for cases + 1 for unfloated lits)
+	grep -c appendToHsStringA T24495.dump-js
+
+	'$(TEST_HC)' $(TEST_HC_OPTS) T24495.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+	./T24495
+	# check that the optimization occurred for -02 1 time (1 for unfloated lits)
 	grep -c appendToHsStringA T24495.dump-js
+
+T23479_1:
+	'$(TEST_HC)' $(TEST_HC_OPTS) T23479_1.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+	./T23479_1
+	# check that the optimization occurred
+	grep -c "h\$$r1 = \"test_val_1\"" T23479_1.dump-js
+	grep -c "h\$$r1 = \"test_val_2\"" T23479_1.dump-js
+	grep -c "h\$$r1 = \"test_val_3\"" T23479_1.dump-js
+	grep -c "h\$$r1 = \"test_val_80_local" T23479_1.dump-js
+	grep -c "h\$$r1 = \"test_val_80_global" T23479_1.dump-js || true
+
+T23479_2:
+	'$(TEST_HC)' $(TEST_HC_OPTS) T23479_2.hs -v0 -O2 -dsuppress-uniques -ddump-js -ddump-to-file
+	./T23479_2
+	grep -c "h\$$r1 = \"test_val_1\"" T23479_2.dump-js
+	grep -c "h\$$r1 = \"test_val_80_local_once" T23479_2.dump-js
+	# check that the optimization occurred
+	grep -c "h\$$r1 = h\$$decodeUtf8z" T23479_2.dump-js


=====================================
testsuite/tests/javascript/T23479_1.hs
=====================================
@@ -0,0 +1,33 @@
+{-# LANGUAGE MagicHash #-}
+module Main where
+
+import GHC.Prim
+import GHC.JS.Prim
+
+foreign import javascript "((x) => { console.log(x); })"
+  js_log1 :: JSVal -> IO ()
+
+test_val_2 :: String
+test_val_2 = "test_val_2"
+
+test_val_80_global :: String
+test_val_80_global = "test_val_80_globaltest_val_80_globaltest_val_80_globaltest_val_80_globaltest_val"
+
+main :: IO ()
+main = do
+  -- Direct usage
+  js_log1 (JSVal (unsafeUnpackJSStringUtf8## "test_val_1"#))
+  -- Requires string sinker hit for strings shorter 80 symbols
+  js_log1 (toJSString test_val_2)
+  -- Requires rewrite hit "toJSString/literal"
+  js_log1 (toJSString test_val_3)
+  -- Locally defined strings become unfloatted at any length
+  js_log1 (toJSString test_val_80_local)
+  -- Globally defined strings with length >= 80 should not be unfloatted
+  js_log1 (toJSString test_val_80_global)
+  where
+    test_val_3 :: String
+    test_val_3 = "test_val_3"
+
+    test_val_80_local :: String
+    test_val_80_local = "test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_"


=====================================
testsuite/tests/javascript/T23479_1.stdout
=====================================
@@ -0,0 +1,10 @@
+test_val_1
+test_val_2
+test_val_3
+test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+test_val_80_globaltest_val_80_globaltest_val_80_globaltest_val_80_globaltest_val
+1
+1
+1
+1
+0


=====================================
testsuite/tests/javascript/T23479_2.hs
=====================================
@@ -0,0 +1,37 @@
+module Main where
+
+import GHC.Prim
+import GHC.JS.Prim
+
+foreign import javascript "((x) => { console.log(x); })"
+  js_log1 :: JSVal -> IO ()
+
+main :: IO ()
+main = do
+  -- When long string (>= 80) used once it is unfloatted
+  js_log1 (toJSString test_val_80_local_once)
+
+  -- When long string (>= 80) used more than once no unfloatting happened
+  js_log1 (toJSString test_val_80_local)
+  js_log1 (toJSString (testFn80 "testFn80:"))
+
+  -- Even if short string used more than once it is unfloatted anyway
+  js_log1 (toJSString test_val_1)
+  js_log1 (toJSString (testFn "testFn:"))
+  where
+    test_val_80_local_once :: String
+    test_val_80_local_once = "test_val_80_local_oncetest_val_80_local_oncetest_val_80_local_oncetest_val_80_lo"
+
+    test_val_80_local :: String
+    test_val_80_local = "test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_"
+
+    testFn80 s = s ++ test_val_80_local
+    -- We should mark this function as NOINLINE to prevent deeper optimizations for the specific test case
+    {-# NOINLINE testFn80 #-}
+
+    test_val_1 :: String
+    test_val_1 = "test_val_1"
+
+    testFn s = s ++ test_val_1
+    -- We should mark this function as NOINLINE to prevent deeper optimizations for the specific test case
+    {-# NOINLINE testFn #-}


=====================================
testsuite/tests/javascript/T23479_2.stdout
=====================================
@@ -0,0 +1,8 @@
+test_val_80_local_oncetest_val_80_local_oncetest_val_80_local_oncetest_val_80_lo
+test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+testFn80:test_val_80_localtest_val_80_localtest_val_80_localtest_val_80_localtest_val_80_
+test_val_1
+testFn:test_val_1
+1
+1
+1


=====================================
testsuite/tests/javascript/T24495.hs
=====================================
@@ -1,6 +1,6 @@
 {-# LANGUAGE MagicHash #-}
-{-# OPTIONS_GHC -O1 #-}
 -- -O1 required to make "rest" thunk SingleEntry
+-- -O2 shows that it still do one optimization
 
 module Main where
 


=====================================
testsuite/tests/javascript/T24495.stdout
=====================================
@@ -1,2 +1,4 @@
 2 ab bd
-2
+3
+2 ab bd
+1


=====================================
testsuite/tests/javascript/all.T
=====================================
@@ -22,3 +22,6 @@ test('T23346', normal, compile_and_run, [''])
 test('T22455', normal, compile_and_run, ['-ddisable-js-minifier'])
 test('T23565', normal, compile_and_run, [''])
 test('T24495', normal, makefile_test, ['T24495'])
+
+test('T23479_1', normal, makefile_test, ['T23479_1'])
+test('T23479_2', normal, makefile_test, ['T23479_2'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0cc2c9e9ab8428e6182bd9d387f5eb887cff2c6c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0cc2c9e9ab8428e6182bd9d387f5eb887cff2c6c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241015/66825ec7/attachment-0001.html>


More information about the ghc-commits mailing list