[Git][ghc/ghc][wip/T23479] Add limitations to unfloat string lits

Serge S. Gulin (@gulin.serge) gitlab at gitlab.haskell.org
Tue Sep 17 21:13:42 UTC 2024



Serge S. Gulin pushed to branch wip/T23479 at Glasgow Haskell Compiler / GHC


Commits:
251595d8 by Serge S. Gulin at 2024-09-18T00:13:30+03:00
Add limitations to unfloat string lits

- - - - -


5 changed files:

- compiler/GHC/StgToJS/CodeGen.hs
- + compiler/GHC/StgToJS/Sinker/Collect.hs
- + compiler/GHC/StgToJS/Sinker/Sinker.hs
- compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
- compiler/ghc.cabal.in


Changes:

=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -21,7 +21,7 @@ import GHC.JS.Transform
 import GHC.JS.Optimizer
 
 import GHC.StgToJS.Arg
-import GHC.StgToJS.Sinker
+import GHC.StgToJS.Sinker.Sinker
 import GHC.StgToJS.Types
 import qualified GHC.StgToJS.Object as Object
 import GHC.StgToJS.Utils


=====================================
compiler/GHC/StgToJS/Sinker/Collect.hs
=====================================
@@ -0,0 +1,72 @@
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Collect
+  ( collectArgsTop
+  , collectArgs
+  , selectUsedOnce
+  )
+  where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Unique
+
+-- | fold over all id in StgArg used at the top level in an StgRhsCon
+collectArgsTop :: CgStgBinding -> [Id]
+collectArgsTop = \case
+  StgNonRec _b r -> collectArgsTopRhs r
+  StgRec bs      -> concatMap (collectArgsTopRhs . snd) bs
+  where
+    collectArgsTopRhs :: CgStgRhs -> [Id]
+    collectArgsTopRhs = \case
+      StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
+      StgRhsClosure {}                        -> []
+
+-- | fold over all Id in StgArg in the AST
+collectArgs :: CgStgBinding -> [Id]
+collectArgs = \case
+  StgNonRec _b r -> collectArgsR r
+  StgRec bs      -> concatMap (collectArgsR . snd) bs
+  where
+    collectArgsR :: CgStgRhs -> [Id]
+    collectArgsR = \case
+      StgRhsClosure _x0 _x1 _x2 _x3 e _typ     -> collectArgsE e
+      StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
+
+    collectArgsAlt :: CgStgAlt -> [Id]
+    collectArgsAlt alt = collectArgsE (alt_rhs alt)
+
+    collectArgsE :: CgStgExpr -> [Id]
+    collectArgsE = \case
+      StgApp x args
+        -> x : concatMap collectArgsA args
+      StgConApp _con _mn args _ts
+        -> concatMap collectArgsA args
+      StgOpApp _x args _t
+        -> concatMap collectArgsA args
+      StgCase e _b _a alts
+        -> collectArgsE e ++ concatMap collectArgsAlt alts
+      StgLet _x b e
+        -> collectArgs b ++ collectArgsE e
+      StgLetNoEscape _x b e
+        -> collectArgs b ++ collectArgsE e
+      StgTick _i e
+        -> collectArgsE e
+      StgLit _
+        -> []
+
+collectArgsA :: StgArg -> [Id]
+collectArgsA = \case
+  StgVarArg i -> [i]
+  StgLitArg _ -> []
+
+selectUsedOnce :: (Foldable t, Uniquable a) => t a -> UniqSet a
+selectUsedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
+  where
+    g i t@(once, mult)
+      | i `elementOfUniqSet` mult = t
+      | i `elementOfUniqSet` once
+        = (delOneFromUniqSet once i, addOneToUniqSet mult i)
+      | otherwise = (addOneToUniqSet once i, mult)


=====================================
compiler/GHC/StgToJS/Sinker/Sinker.hs
=====================================
@@ -0,0 +1,158 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Sinker (sinkPgm) where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Types.Unique.FM
+import GHC.Types.Var.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Unit.Module
+import GHC.Types.Literal
+import GHC.Data.Graph.Directed
+import GHC.StgToJS.Sinker.Collect
+import GHC.StgToJS.Sinker.StringsUnfloat
+
+import GHC.Utils.Misc (partitionWith)
+import GHC.StgToJS.Utils
+
+import Data.Char
+import Data.List (partition)
+import Data.Maybe
+import Data.Bifunctor (Bifunctor (..))
+import GHC.Utils.Outputable (showSDocUnsafe, showPprUnsafe)
+import qualified GHC.Utils.Trace as DT
+import Data.ByteString (ByteString)
+
+
+-- | Unfloat some top-level unexported things
+--
+-- GHC floats constants to the top level. This is fine in native code, but with JS
+-- they occupy some global variable name. We can unfloat some unexported things:
+--
+-- - global constructors, as long as they're referenced only once by another global
+--      constructor and are not in a recursive binding group
+-- - literals (small literals may also be sunk if they are used more than once)
+sinkPgm :: Module
+        -> [CgStgTopBinding]
+        -> (UniqFM Id CgStgExpr, [CgStgTopBinding])
+sinkPgm m pgm
+  = ( tracePpr "<sunk>" id sunk
+    , traceDoc "<pgm'''>" id (map StgTopLifted pgm''')
+      ++ traceDoc "<stringLits>" id stringLits
+    )
+  where
+    selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
+    selectLifted (StgTopLifted b)      = Left b
+    selectLifted (StgTopStringLit i b) = Right (i, b)
+
+    (pgm', allStringLits) = partitionWith selectLifted pgm
+    usedOnceIds = selectUsedOnce $ concatMap collectArgs pgm'
+
+    stringLitsUFM = listToUFM $ (\(i, b) -> (idName i, (i, b))) <$> allStringLits
+    (pgm'', usedStringLitNames) =
+      unfloatStringLits
+        (idName `mapUniqSet` usedOnceIds)
+        (snd `mapUFM` stringLitsUFM)
+        pgm'
+
+    stringLitsWithoutUnfolded =
+      nonDetEltsUFM
+      $ delListFromUFM stringLitsUFM
+      $ nonDetEltsUniqSet usedStringLitNames
+
+    stringLits = uncurry StgTopStringLit <$> stringLitsWithoutUnfolded
+
+    (sunk, pgm''') = sinkPgm' m usedOnceIds $ traceDoc "<pgm'>" (map StgTopLifted) pgm''
+
+sinkPgm'
+  :: Module
+       -- ^ the module, since we treat definitions from the current module
+       -- differently
+  -> IdSet
+       -- ^ the set of used once ids
+  -> [CgStgBinding]
+       -- ^ the bindings
+  -> (UniqFM Id CgStgExpr, [CgStgBinding])
+       -- ^ a map with sunken replacements for nodes, for where the replacement
+       -- does not fit in the 'StgBinding' AST and the new bindings
+sinkPgm' m usedOnceIds pgm =
+  let usedOnce = tracePpr "<usedOnce>" id (collectTopLevelUsedOnce usedOnceIds pgm)
+      sinkables = listToUFM $
+          (tracePpr "<alwaysSinkable>" id (concatMap alwaysSinkable pgm)) ++
+          filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
+      isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
+      isSunkBind _                                      = False
+  in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
+
+-- | always sinkable, values that may be duplicated in the generated code (e.g.
+-- small literals)
+alwaysSinkable :: CgStgBinding -> [(Id, CgStgExpr)]
+alwaysSinkable (StgRec {})       = []
+alwaysSinkable (StgNonRec b rhs) = case rhs of
+  StgRhsClosure _ _ _ _ e@(StgLit l) _
+    | isSmallSinkableLit l
+    , isLocal (tracePpr "<StgRhsClosure>" id b)
+    -> [(b,e)]
+  StgRhsCon _ccs dc cnum _ticks as@[StgLitArg l] _typ
+    | isSmallSinkableLit l
+    , isLocal (tracePpr "<StgRhsCon>" id b)
+    , isUnboxableCon dc
+    -> [(b,StgConApp dc cnum as [])]
+  _ -> []
+
+isSmallSinkableLit :: Literal -> Bool
+isSmallSinkableLit (LitChar c)     = ord c < 100000
+isSmallSinkableLit (LitNumber _ i) = abs i < 100000
+isSmallSinkableLit _               = False
+
+
+-- | once sinkable: may be sunk, but duplication is not ok
+onceSinkable :: Module -> CgStgBinding -> [(Id, CgStgExpr)]
+onceSinkable _m (StgNonRec b rhs)
+  | Just e <- getSinkable rhs
+  , isLocal b = [(b,e)]
+  where
+    getSinkable = \case
+      StgRhsCon _ccs dc cnum _ticks args _typ -> Just (StgConApp dc cnum args [])
+      StgRhsClosure _ _ _ _ e@(StgLit{}) _typ -> Just e
+      _                                       -> Nothing
+onceSinkable _ _ = []
+
+-- | collect all idents used only once in an argument at the top level
+--   and never anywhere else
+collectTopLevelUsedOnce :: IdSet -> [CgStgBinding] -> IdSet
+collectTopLevelUsedOnce usedOnceIds binds = intersectUniqSets usedOnceIds (selectUsedOnce top_args)
+  where
+    top_args = concatMap collectArgsTop binds
+
+isLocal :: Id -> Bool
+isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)
+
+-- | since we have sequential initialization, topsort the non-recursive
+-- constructor bindings
+topSortDecls :: Module -> [CgStgBinding] -> [CgStgBinding]
+topSortDecls _m binds = rest ++ nr'
+  where
+    (nr, rest) = partition isNonRec binds
+    isNonRec StgNonRec{} = True
+    isNonRec _           = False
+    vs   = map getV nr
+    keys = mkUniqSet (map node_key vs)
+    getV e@(StgNonRec b _) = DigraphNode e b []
+    getV _                 = error "topSortDecls: getV, unexpected binding"
+    collectDeps (StgNonRec b (StgRhsCon _cc _dc _cnum _ticks args _typ)) =
+      [ (i, b) | StgVarArg i <- args, i `elementOfUniqSet` keys ]
+    collectDeps _ = []
+    g = graphFromVerticesAndAdjacency vs (concatMap collectDeps nr)
+    nr' | (not . null) [()| CyclicSCC _ <- stronglyConnCompG g]
+            = error "topSortDecls: unexpected cycle"
+        | otherwise = map node_payload (topologicalSortG g)
+
+
+traceDoc head m v = (DT.trace . (++) head . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts) (m v) v
+tracePpr head m v = (DT.trace . (++) head . showPprUnsafe) (m v) v


=====================================
compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
=====================================
@@ -1,204 +1,36 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RankNTypes #-}
 {-# LANGUAGE LambdaCase #-}
 
-module GHC.StgToJS.Sinker (sinkPgm) where
+module GHC.StgToJS.Sinker.StringsUnfloat
+  ( unfloatStringLits
+  )
+  where
 
 import GHC.Prelude
 import GHC.Types.Unique.Set
 import GHC.Types.Unique.FM
-import GHC.Types.Var.Set
 import GHC.Stg.Syntax
 import GHC.Types.Id
 import GHC.Types.Name
-import GHC.Unit.Module
 import GHC.Types.Literal
-import GHC.Data.Graph.Directed
-
 import GHC.Utils.Misc (partitionWith)
-import GHC.StgToJS.Utils
 
-import Data.Char
-import Data.List (partition)
-import Data.Maybe
-import Data.Bifunctor (Bifunctor (..))
-import GHC.Utils.Outputable (showSDocUnsafe, showPprUnsafe)
-import qualified GHC.Utils.Trace as DT
+import Data.ByteString qualified as BS
 import Data.ByteString (ByteString)
+import Data.Bifunctor (Bifunctor (..))
 
-
--- | Unfloat some top-level unexported things
---
--- GHC floats constants to the top level. This is fine in native code, but with JS
--- they occupy some global variable name. We can unfloat some unexported things:
---
--- - global constructors, as long as they're referenced only once by another global
---      constructor and are not in a recursive binding group
--- - literals (small literals may also be sunk if they are used more than once)
-sinkPgm :: Module
-        -> [CgStgTopBinding]
-        -> (UniqFM Id CgStgExpr, [CgStgTopBinding])
-sinkPgm m pgm
-  = ( (DT.trace . (++) "<sunk>" . showPprUnsafe) sunk sunk
-    , traceDoc "<pgm'''>" (map StgTopLifted pgm''') (map StgTopLifted pgm''')
-      ++ traceDoc "<stringLits>" stringLits stringLits
-    )
-  where
-    traceDoc head = DT.trace . (++) head . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts
-
-    selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
-    selectLifted (StgTopLifted b)      = Left b
-    selectLifted (StgTopStringLit i b) = Right (i, b)
-
-    (pgm', stringLits') = partitionWith selectLifted pgm
-    stringLits = uncurry StgTopStringLit <$> stringLits'
-
-    (pgm'', _usedStringLitNames) = unfloatStringLits (listToUFM $ first idName <$> stringLits') pgm'
-    (sunk, pgm''') = sinkPgm' m $ (DT.trace . (++) "<pgm'>" . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts) (map StgTopLifted pgm'') pgm''
-
-sinkPgm'
-  :: Module
-       -- ^ the module, since we treat definitions from the current module
-       -- differently
+unfloatStringLits
+  :: UniqSet Name
+  -> UniqFM Name ByteString
   -> [CgStgBinding]
-       -- ^ the bindings
-  -> (UniqFM Id CgStgExpr, [CgStgBinding])
-       -- ^ a map with sunken replacements for nodes, for where the replacement
-       -- does not fit in the 'StgBinding' AST and the new bindings
-sinkPgm' m pgm =
-  let usedOnce = collectUsedOnce pgm
-      sinkables = listToUFM $
-          concatMap alwaysSinkable pgm ++
-          filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
-      isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
-      isSunkBind _                                      = False
-  in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
-
--- | always sinkable, values that may be duplicated in the generated code (e.g.
--- small literals)
-alwaysSinkable :: CgStgBinding -> [(Id, CgStgExpr)]
-alwaysSinkable (StgRec {})       = []
-alwaysSinkable (StgNonRec b rhs) = case rhs of
-  StgRhsClosure _ _ _ _ e@(StgLit l) _
-    | isSmallSinkableLit l
-    , isLocal b
-    -> [(b,e)]
-  StgRhsCon _ccs dc cnum _ticks as@[StgLitArg l] _typ
-    | isSmallSinkableLit l
-    , isLocal b
-    , isUnboxableCon dc
-    -> [(b,StgConApp dc cnum as [])]
-  _ -> []
-
-isSmallSinkableLit :: Literal -> Bool
-isSmallSinkableLit (LitChar c)     = ord c < 100000
-isSmallSinkableLit (LitNumber _ i) = abs i < 100000
-isSmallSinkableLit _               = False
-
-
--- | once sinkable: may be sunk, but duplication is not ok
-onceSinkable :: Module -> CgStgBinding -> [(Id, CgStgExpr)]
-onceSinkable _m (StgNonRec b rhs)
-  | Just e <- getSinkable rhs
-  , isLocal b = [(b,e)]
-  where
-    getSinkable = \case
-      StgRhsCon _ccs dc cnum _ticks args _typ -> Just (StgConApp dc cnum args [])
-      StgRhsClosure _ _ _ _ e@(StgLit{}) _typ -> Just e
-      _                                       -> Nothing
-onceSinkable _ _ = []
-
--- | collect all idents used only once in an argument at the top level
---   and never anywhere else
-collectUsedOnce :: [CgStgBinding] -> IdSet
-collectUsedOnce binds = intersectUniqSets (usedOnce args) (usedOnce top_args)
-  where
-    top_args = concatMap collectArgsTop binds
-    args     = concatMap collectArgs    binds
-    usedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
-    g i t@(once, mult)
-      | i `elementOfUniqSet` mult = t
-      | i `elementOfUniqSet` once
-        = (delOneFromUniqSet once i, addOneToUniqSet mult i)
-      | otherwise = (addOneToUniqSet once i, mult)
-
--- | fold over all id in StgArg used at the top level in an StgRhsCon
-collectArgsTop :: CgStgBinding -> [Id]
-collectArgsTop = \case
-  StgNonRec _b r -> collectArgsTopRhs r
-  StgRec bs      -> concatMap (collectArgsTopRhs . snd) bs
-
-collectArgsTopRhs :: CgStgRhs -> [Id]
-collectArgsTopRhs = \case
-  StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
-  StgRhsClosure {}                        -> []
-
--- | fold over all Id in StgArg in the AST
-collectArgs :: CgStgBinding -> [Id]
-collectArgs = \case
-  StgNonRec _b r -> collectArgsR r
-  StgRec bs      -> concatMap (collectArgsR . snd) bs
-
-collectArgsR :: CgStgRhs -> [Id]
-collectArgsR = \case
-  StgRhsClosure _x0 _x1 _x2 _x3 e _typ     -> collectArgsE e
-  StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
-
-collectArgsAlt :: CgStgAlt -> [Id]
-collectArgsAlt alt = collectArgsE (alt_rhs alt)
-
-collectArgsE :: CgStgExpr -> [Id]
-collectArgsE = \case
-  StgApp x args
-    -> x : concatMap collectArgsA args
-  StgConApp _con _mn args _ts
-    -> concatMap collectArgsA args
-  StgOpApp _x args _t
-    -> concatMap collectArgsA args
-  StgCase e _b _a alts
-    -> collectArgsE e ++ concatMap collectArgsAlt alts
-  StgLet _x b e
-    -> collectArgs b ++ collectArgsE e
-  StgLetNoEscape _x b e
-    -> collectArgs b ++ collectArgsE e
-  StgTick _i e
-    -> collectArgsE e
-  StgLit _
-    -> []
-
-collectArgsA :: StgArg -> [Id]
-collectArgsA = \case
-  StgVarArg i -> [i]
-  StgLitArg _ -> []
-
-isLocal :: Id -> Bool
-isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)
-
--- | since we have sequential initialization, topsort the non-recursive
--- constructor bindings
-topSortDecls :: Module -> [CgStgBinding] -> [CgStgBinding]
-topSortDecls _m binds = rest ++ nr'
-  where
-    (nr, rest) = partition isNonRec binds
-    isNonRec StgNonRec{} = True
-    isNonRec _           = False
-    vs   = map getV nr
-    keys = mkUniqSet (map node_key vs)
-    getV e@(StgNonRec b _) = DigraphNode e b []
-    getV _                 = error "topSortDecls: getV, unexpected binding"
-    collectDeps (StgNonRec b (StgRhsCon _cc _dc _cnum _ticks args _typ)) =
-      [ (i, b) | StgVarArg i <- args, i `elementOfUniqSet` keys ]
-    collectDeps _ = []
-    g = graphFromVerticesAndAdjacency vs (concatMap collectDeps nr)
-    nr' | (not . null) [()| CyclicSCC _ <- stronglyConnCompG g]
-            = error "topSortDecls: unexpected cycle"
-        | otherwise = map node_payload (topologicalSortG g)
+  -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits usedOnceStringLits stringLits =
+  unfloatStringLits' (selectStringLitsForUnfloat usedOnceStringLits stringLits)
 
 -- | We are doing attempts to unfloat string literals back to
 -- the call site. Further special JS optimizations
 -- can generate more performant operations over them.
-unfloatStringLits :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
-unfloatStringLits stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
+unfloatStringLits' :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits' stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
   where
     (binderWithoutChanges, binderWithUnfloatedStringLitPairs) = partitionWith substituteStringLit allBindings
 
@@ -302,3 +134,16 @@ unfloatStringLits stringLits allBindings = (binderWithoutChanges ++ binderWithUn
         unified = fst <$> combined
         names = unionManyUniqSets (snd <$> combined)
       in (unified, names)
+
+selectStringLitsForUnfloat :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+selectStringLitsForUnfloat usedOnceStringLits stringLits = alwaysUnfloat `plusUFM` usedOnceUnfloat
+  where
+    alwaysUnfloat = alwaysUnfloatStringLits stringLits
+    usedOnceUnfloat = selectUsedOnceStringLits usedOnceStringLits stringLits
+
+    alwaysUnfloatStringLits :: UniqFM Name ByteString -> UniqFM Name ByteString
+    alwaysUnfloatStringLits = filterUFM $ \b -> BS.length b < 3
+
+    selectUsedOnceStringLits :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+    selectUsedOnceStringLits usedOnceStringLits stringLits =
+      stringLits `intersectUFM` getUniqSet usedOnceStringLits


=====================================
compiler/ghc.cabal.in
=====================================
@@ -761,7 +761,9 @@ Library
         GHC.StgToJS.Regs
         GHC.StgToJS.Rts.Types
         GHC.StgToJS.Rts.Rts
-        GHC.StgToJS.Sinker
+        GHC.StgToJS.Sinker.Collect
+        GHC.StgToJS.Sinker.StringsUnfloat
+        GHC.StgToJS.Sinker.Sinker
         GHC.StgToJS.Stack
         GHC.StgToJS.StaticPtr
         GHC.StgToJS.Symbols



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/251595d8a2cf80a9655f4e3e9d1a49e5434f7656

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/251595d8a2cf80a9655f4e3e9d1a49e5434f7656
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240917/324ca0bb/attachment-0001.html>


More information about the ghc-commits mailing list