[Git][ghc/ghc][wip/T23479] Add limitations to unfloat string lits
Serge S. Gulin (@gulin.serge)
gitlab at gitlab.haskell.org
Tue Sep 17 21:13:42 UTC 2024
Serge S. Gulin pushed to branch wip/T23479 at Glasgow Haskell Compiler / GHC
Commits:
251595d8 by Serge S. Gulin at 2024-09-18T00:13:30+03:00
Add limitations to unfloat string lits
- - - - -
5 changed files:
- compiler/GHC/StgToJS/CodeGen.hs
- + compiler/GHC/StgToJS/Sinker/Collect.hs
- + compiler/GHC/StgToJS/Sinker/Sinker.hs
- compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
- compiler/ghc.cabal.in
Changes:
=====================================
compiler/GHC/StgToJS/CodeGen.hs
=====================================
@@ -21,7 +21,7 @@ import GHC.JS.Transform
import GHC.JS.Optimizer
import GHC.StgToJS.Arg
-import GHC.StgToJS.Sinker
+import GHC.StgToJS.Sinker.Sinker
import GHC.StgToJS.Types
import qualified GHC.StgToJS.Object as Object
import GHC.StgToJS.Utils
=====================================
compiler/GHC/StgToJS/Sinker/Collect.hs
=====================================
@@ -0,0 +1,72 @@
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Collect
+ ( collectArgsTop
+ , collectArgs
+ , selectUsedOnce
+ )
+ where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Unique
+
+-- | fold over all id in StgArg used at the top level in an StgRhsCon
+collectArgsTop :: CgStgBinding -> [Id]
+collectArgsTop = \case
+ StgNonRec _b r -> collectArgsTopRhs r
+ StgRec bs -> concatMap (collectArgsTopRhs . snd) bs
+ where
+ collectArgsTopRhs :: CgStgRhs -> [Id]
+ collectArgsTopRhs = \case
+ StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
+ StgRhsClosure {} -> []
+
+-- | fold over all Id in StgArg in the AST
+collectArgs :: CgStgBinding -> [Id]
+collectArgs = \case
+ StgNonRec _b r -> collectArgsR r
+ StgRec bs -> concatMap (collectArgsR . snd) bs
+ where
+ collectArgsR :: CgStgRhs -> [Id]
+ collectArgsR = \case
+ StgRhsClosure _x0 _x1 _x2 _x3 e _typ -> collectArgsE e
+ StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
+
+ collectArgsAlt :: CgStgAlt -> [Id]
+ collectArgsAlt alt = collectArgsE (alt_rhs alt)
+
+ collectArgsE :: CgStgExpr -> [Id]
+ collectArgsE = \case
+ StgApp x args
+ -> x : concatMap collectArgsA args
+ StgConApp _con _mn args _ts
+ -> concatMap collectArgsA args
+ StgOpApp _x args _t
+ -> concatMap collectArgsA args
+ StgCase e _b _a alts
+ -> collectArgsE e ++ concatMap collectArgsAlt alts
+ StgLet _x b e
+ -> collectArgs b ++ collectArgsE e
+ StgLetNoEscape _x b e
+ -> collectArgs b ++ collectArgsE e
+ StgTick _i e
+ -> collectArgsE e
+ StgLit _
+ -> []
+
+collectArgsA :: StgArg -> [Id]
+collectArgsA = \case
+ StgVarArg i -> [i]
+ StgLitArg _ -> []
+
+selectUsedOnce :: (Foldable t, Uniquable a) => t a -> UniqSet a
+selectUsedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
+ where
+ g i t@(once, mult)
+ | i `elementOfUniqSet` mult = t
+ | i `elementOfUniqSet` once
+ = (delOneFromUniqSet once i, addOneToUniqSet mult i)
+ | otherwise = (addOneToUniqSet once i, mult)
=====================================
compiler/GHC/StgToJS/Sinker/Sinker.hs
=====================================
@@ -0,0 +1,158 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE LambdaCase #-}
+
+module GHC.StgToJS.Sinker.Sinker (sinkPgm) where
+
+import GHC.Prelude
+import GHC.Types.Unique.Set
+import GHC.Types.Unique.FM
+import GHC.Types.Var.Set
+import GHC.Stg.Syntax
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Unit.Module
+import GHC.Types.Literal
+import GHC.Data.Graph.Directed
+import GHC.StgToJS.Sinker.Collect
+import GHC.StgToJS.Sinker.StringsUnfloat
+
+import GHC.Utils.Misc (partitionWith)
+import GHC.StgToJS.Utils
+
+import Data.Char
+import Data.List (partition)
+import Data.Maybe
+import Data.Bifunctor (Bifunctor (..))
+import GHC.Utils.Outputable (showSDocUnsafe, showPprUnsafe)
+import qualified GHC.Utils.Trace as DT
+import Data.ByteString (ByteString)
+
+
+-- | Unfloat some top-level unexported things
+--
+-- GHC floats constants to the top level. This is fine in native code, but with JS
+-- they occupy some global variable name. We can unfloat some unexported things:
+--
+-- - global constructors, as long as they're referenced only once by another global
+-- constructor and are not in a recursive binding group
+-- - literals (small literals may also be sunk if they are used more than once)
+sinkPgm :: Module
+ -> [CgStgTopBinding]
+ -> (UniqFM Id CgStgExpr, [CgStgTopBinding])
+sinkPgm m pgm
+ = ( tracePpr "<sunk>" id sunk
+ , traceDoc "<pgm'''>" id (map StgTopLifted pgm''')
+ ++ traceDoc "<stringLits>" id stringLits
+ )
+ where
+ selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
+ selectLifted (StgTopLifted b) = Left b
+ selectLifted (StgTopStringLit i b) = Right (i, b)
+
+ (pgm', allStringLits) = partitionWith selectLifted pgm
+ usedOnceIds = selectUsedOnce $ concatMap collectArgs pgm'
+
+ stringLitsUFM = listToUFM $ (\(i, b) -> (idName i, (i, b))) <$> allStringLits
+ (pgm'', usedStringLitNames) =
+ unfloatStringLits
+ (idName `mapUniqSet` usedOnceIds)
+ (snd `mapUFM` stringLitsUFM)
+ pgm'
+
+ stringLitsWithoutUnfolded =
+ nonDetEltsUFM
+ $ delListFromUFM stringLitsUFM
+ $ nonDetEltsUniqSet usedStringLitNames
+
+ stringLits = uncurry StgTopStringLit <$> stringLitsWithoutUnfolded
+
+ (sunk, pgm''') = sinkPgm' m usedOnceIds $ traceDoc "<pgm'>" (map StgTopLifted) pgm''
+
+sinkPgm'
+ :: Module
+ -- ^ the module, since we treat definitions from the current module
+ -- differently
+ -> IdSet
+ -- ^ the set of used once ids
+ -> [CgStgBinding]
+ -- ^ the bindings
+ -> (UniqFM Id CgStgExpr, [CgStgBinding])
+ -- ^ a map with sunken replacements for nodes, for where the replacement
+ -- does not fit in the 'StgBinding' AST and the new bindings
+sinkPgm' m usedOnceIds pgm =
+ let usedOnce = tracePpr "<usedOnce>" id (collectTopLevelUsedOnce usedOnceIds pgm)
+ sinkables = listToUFM $
+ (tracePpr "<alwaysSinkable>" id (concatMap alwaysSinkable pgm)) ++
+ filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
+ isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
+ isSunkBind _ = False
+ in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
+
+-- | always sinkable, values that may be duplicated in the generated code (e.g.
+-- small literals)
+alwaysSinkable :: CgStgBinding -> [(Id, CgStgExpr)]
+alwaysSinkable (StgRec {}) = []
+alwaysSinkable (StgNonRec b rhs) = case rhs of
+ StgRhsClosure _ _ _ _ e@(StgLit l) _
+ | isSmallSinkableLit l
+ , isLocal (tracePpr "<StgRhsClosure>" id b)
+ -> [(b,e)]
+ StgRhsCon _ccs dc cnum _ticks as@[StgLitArg l] _typ
+ | isSmallSinkableLit l
+ , isLocal (tracePpr "<StgRhsCon>" id b)
+ , isUnboxableCon dc
+ -> [(b,StgConApp dc cnum as [])]
+ _ -> []
+
+isSmallSinkableLit :: Literal -> Bool
+isSmallSinkableLit (LitChar c) = ord c < 100000
+isSmallSinkableLit (LitNumber _ i) = abs i < 100000
+isSmallSinkableLit _ = False
+
+
+-- | once sinkable: may be sunk, but duplication is not ok
+onceSinkable :: Module -> CgStgBinding -> [(Id, CgStgExpr)]
+onceSinkable _m (StgNonRec b rhs)
+ | Just e <- getSinkable rhs
+ , isLocal b = [(b,e)]
+ where
+ getSinkable = \case
+ StgRhsCon _ccs dc cnum _ticks args _typ -> Just (StgConApp dc cnum args [])
+ StgRhsClosure _ _ _ _ e@(StgLit{}) _typ -> Just e
+ _ -> Nothing
+onceSinkable _ _ = []
+
+-- | collect all idents used only once in an argument at the top level
+-- and never anywhere else
+collectTopLevelUsedOnce :: IdSet -> [CgStgBinding] -> IdSet
+collectTopLevelUsedOnce usedOnceIds binds = intersectUniqSets usedOnceIds (selectUsedOnce top_args)
+ where
+ top_args = concatMap collectArgsTop binds
+
+isLocal :: Id -> Bool
+isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)
+
+-- | since we have sequential initialization, topsort the non-recursive
+-- constructor bindings
+topSortDecls :: Module -> [CgStgBinding] -> [CgStgBinding]
+topSortDecls _m binds = rest ++ nr'
+ where
+ (nr, rest) = partition isNonRec binds
+ isNonRec StgNonRec{} = True
+ isNonRec _ = False
+ vs = map getV nr
+ keys = mkUniqSet (map node_key vs)
+ getV e@(StgNonRec b _) = DigraphNode e b []
+ getV _ = error "topSortDecls: getV, unexpected binding"
+ collectDeps (StgNonRec b (StgRhsCon _cc _dc _cnum _ticks args _typ)) =
+ [ (i, b) | StgVarArg i <- args, i `elementOfUniqSet` keys ]
+ collectDeps _ = []
+ g = graphFromVerticesAndAdjacency vs (concatMap collectDeps nr)
+ nr' | (not . null) [()| CyclicSCC _ <- stronglyConnCompG g]
+ = error "topSortDecls: unexpected cycle"
+ | otherwise = map node_payload (topologicalSortG g)
+
+
+traceDoc head m v = (DT.trace . (++) head . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts) (m v) v
+tracePpr head m v = (DT.trace . (++) head . showPprUnsafe) (m v) v
=====================================
compiler/GHC/StgToJS/Sinker.hs → compiler/GHC/StgToJS/Sinker/StringsUnfloat.hs
=====================================
@@ -1,204 +1,36 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE LambdaCase #-}
-module GHC.StgToJS.Sinker (sinkPgm) where
+module GHC.StgToJS.Sinker.StringsUnfloat
+ ( unfloatStringLits
+ )
+ where
import GHC.Prelude
import GHC.Types.Unique.Set
import GHC.Types.Unique.FM
-import GHC.Types.Var.Set
import GHC.Stg.Syntax
import GHC.Types.Id
import GHC.Types.Name
-import GHC.Unit.Module
import GHC.Types.Literal
-import GHC.Data.Graph.Directed
-
import GHC.Utils.Misc (partitionWith)
-import GHC.StgToJS.Utils
-import Data.Char
-import Data.List (partition)
-import Data.Maybe
-import Data.Bifunctor (Bifunctor (..))
-import GHC.Utils.Outputable (showSDocUnsafe, showPprUnsafe)
-import qualified GHC.Utils.Trace as DT
+import Data.ByteString qualified as BS
import Data.ByteString (ByteString)
+import Data.Bifunctor (Bifunctor (..))
-
--- | Unfloat some top-level unexported things
---
--- GHC floats constants to the top level. This is fine in native code, but with JS
--- they occupy some global variable name. We can unfloat some unexported things:
---
--- - global constructors, as long as they're referenced only once by another global
--- constructor and are not in a recursive binding group
--- - literals (small literals may also be sunk if they are used more than once)
-sinkPgm :: Module
- -> [CgStgTopBinding]
- -> (UniqFM Id CgStgExpr, [CgStgTopBinding])
-sinkPgm m pgm
- = ( (DT.trace . (++) "<sunk>" . showPprUnsafe) sunk sunk
- , traceDoc "<pgm'''>" (map StgTopLifted pgm''') (map StgTopLifted pgm''')
- ++ traceDoc "<stringLits>" stringLits stringLits
- )
- where
- traceDoc head = DT.trace . (++) head . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts
-
- selectLifted :: CgStgTopBinding -> Either CgStgBinding (Id, ByteString)
- selectLifted (StgTopLifted b) = Left b
- selectLifted (StgTopStringLit i b) = Right (i, b)
-
- (pgm', stringLits') = partitionWith selectLifted pgm
- stringLits = uncurry StgTopStringLit <$> stringLits'
-
- (pgm'', _usedStringLitNames) = unfloatStringLits (listToUFM $ first idName <$> stringLits') pgm'
- (sunk, pgm''') = sinkPgm' m $ (DT.trace . (++) "<pgm'>" . showSDocUnsafe . pprGenStgTopBindings shortStgPprOpts) (map StgTopLifted pgm'') pgm''
-
-sinkPgm'
- :: Module
- -- ^ the module, since we treat definitions from the current module
- -- differently
+unfloatStringLits
+ :: UniqSet Name
+ -> UniqFM Name ByteString
-> [CgStgBinding]
- -- ^ the bindings
- -> (UniqFM Id CgStgExpr, [CgStgBinding])
- -- ^ a map with sunken replacements for nodes, for where the replacement
- -- does not fit in the 'StgBinding' AST and the new bindings
-sinkPgm' m pgm =
- let usedOnce = collectUsedOnce pgm
- sinkables = listToUFM $
- concatMap alwaysSinkable pgm ++
- filter ((`elementOfUniqSet` usedOnce) . fst) (concatMap (onceSinkable m) pgm)
- isSunkBind (StgNonRec b _e) | elemUFM b sinkables = True
- isSunkBind _ = False
- in (sinkables, filter (not . isSunkBind) $ topSortDecls m pgm)
-
--- | always sinkable, values that may be duplicated in the generated code (e.g.
--- small literals)
-alwaysSinkable :: CgStgBinding -> [(Id, CgStgExpr)]
-alwaysSinkable (StgRec {}) = []
-alwaysSinkable (StgNonRec b rhs) = case rhs of
- StgRhsClosure _ _ _ _ e@(StgLit l) _
- | isSmallSinkableLit l
- , isLocal b
- -> [(b,e)]
- StgRhsCon _ccs dc cnum _ticks as@[StgLitArg l] _typ
- | isSmallSinkableLit l
- , isLocal b
- , isUnboxableCon dc
- -> [(b,StgConApp dc cnum as [])]
- _ -> []
-
-isSmallSinkableLit :: Literal -> Bool
-isSmallSinkableLit (LitChar c) = ord c < 100000
-isSmallSinkableLit (LitNumber _ i) = abs i < 100000
-isSmallSinkableLit _ = False
-
-
--- | once sinkable: may be sunk, but duplication is not ok
-onceSinkable :: Module -> CgStgBinding -> [(Id, CgStgExpr)]
-onceSinkable _m (StgNonRec b rhs)
- | Just e <- getSinkable rhs
- , isLocal b = [(b,e)]
- where
- getSinkable = \case
- StgRhsCon _ccs dc cnum _ticks args _typ -> Just (StgConApp dc cnum args [])
- StgRhsClosure _ _ _ _ e@(StgLit{}) _typ -> Just e
- _ -> Nothing
-onceSinkable _ _ = []
-
--- | collect all idents used only once in an argument at the top level
--- and never anywhere else
-collectUsedOnce :: [CgStgBinding] -> IdSet
-collectUsedOnce binds = intersectUniqSets (usedOnce args) (usedOnce top_args)
- where
- top_args = concatMap collectArgsTop binds
- args = concatMap collectArgs binds
- usedOnce = fst . foldr g (emptyUniqSet, emptyUniqSet)
- g i t@(once, mult)
- | i `elementOfUniqSet` mult = t
- | i `elementOfUniqSet` once
- = (delOneFromUniqSet once i, addOneToUniqSet mult i)
- | otherwise = (addOneToUniqSet once i, mult)
-
--- | fold over all id in StgArg used at the top level in an StgRhsCon
-collectArgsTop :: CgStgBinding -> [Id]
-collectArgsTop = \case
- StgNonRec _b r -> collectArgsTopRhs r
- StgRec bs -> concatMap (collectArgsTopRhs . snd) bs
-
-collectArgsTopRhs :: CgStgRhs -> [Id]
-collectArgsTopRhs = \case
- StgRhsCon _ccs _dc _mu _ticks args _typ -> concatMap collectArgsA args
- StgRhsClosure {} -> []
-
--- | fold over all Id in StgArg in the AST
-collectArgs :: CgStgBinding -> [Id]
-collectArgs = \case
- StgNonRec _b r -> collectArgsR r
- StgRec bs -> concatMap (collectArgsR . snd) bs
-
-collectArgsR :: CgStgRhs -> [Id]
-collectArgsR = \case
- StgRhsClosure _x0 _x1 _x2 _x3 e _typ -> collectArgsE e
- StgRhsCon _ccs _con _mu _ticks args _typ -> concatMap collectArgsA args
-
-collectArgsAlt :: CgStgAlt -> [Id]
-collectArgsAlt alt = collectArgsE (alt_rhs alt)
-
-collectArgsE :: CgStgExpr -> [Id]
-collectArgsE = \case
- StgApp x args
- -> x : concatMap collectArgsA args
- StgConApp _con _mn args _ts
- -> concatMap collectArgsA args
- StgOpApp _x args _t
- -> concatMap collectArgsA args
- StgCase e _b _a alts
- -> collectArgsE e ++ concatMap collectArgsAlt alts
- StgLet _x b e
- -> collectArgs b ++ collectArgsE e
- StgLetNoEscape _x b e
- -> collectArgs b ++ collectArgsE e
- StgTick _i e
- -> collectArgsE e
- StgLit _
- -> []
-
-collectArgsA :: StgArg -> [Id]
-collectArgsA = \case
- StgVarArg i -> [i]
- StgLitArg _ -> []
-
-isLocal :: Id -> Bool
-isLocal i = isNothing (nameModule_maybe . idName $ i) && not (isExportedId i)
-
--- | since we have sequential initialization, topsort the non-recursive
--- constructor bindings
-topSortDecls :: Module -> [CgStgBinding] -> [CgStgBinding]
-topSortDecls _m binds = rest ++ nr'
- where
- (nr, rest) = partition isNonRec binds
- isNonRec StgNonRec{} = True
- isNonRec _ = False
- vs = map getV nr
- keys = mkUniqSet (map node_key vs)
- getV e@(StgNonRec b _) = DigraphNode e b []
- getV _ = error "topSortDecls: getV, unexpected binding"
- collectDeps (StgNonRec b (StgRhsCon _cc _dc _cnum _ticks args _typ)) =
- [ (i, b) | StgVarArg i <- args, i `elementOfUniqSet` keys ]
- collectDeps _ = []
- g = graphFromVerticesAndAdjacency vs (concatMap collectDeps nr)
- nr' | (not . null) [()| CyclicSCC _ <- stronglyConnCompG g]
- = error "topSortDecls: unexpected cycle"
- | otherwise = map node_payload (topologicalSortG g)
+ -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits usedOnceStringLits stringLits =
+ unfloatStringLits' (selectStringLitsForUnfloat usedOnceStringLits stringLits)
-- | We are doing attempts to unfloat string literals back to
-- the call site. Further special JS optimizations
-- can generate more performant operations over them.
-unfloatStringLits :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
-unfloatStringLits stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
+unfloatStringLits' :: UniqFM Name ByteString -> [CgStgBinding] -> ([CgStgBinding], UniqSet Name)
+unfloatStringLits' stringLits allBindings = (binderWithoutChanges ++ binderWithUnfloatedStringLit, actuallyUsedStringLitNames)
where
(binderWithoutChanges, binderWithUnfloatedStringLitPairs) = partitionWith substituteStringLit allBindings
@@ -302,3 +134,16 @@ unfloatStringLits stringLits allBindings = (binderWithoutChanges ++ binderWithUn
unified = fst <$> combined
names = unionManyUniqSets (snd <$> combined)
in (unified, names)
+
+selectStringLitsForUnfloat :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+selectStringLitsForUnfloat usedOnceStringLits stringLits = alwaysUnfloat `plusUFM` usedOnceUnfloat
+ where
+ alwaysUnfloat = alwaysUnfloatStringLits stringLits
+ usedOnceUnfloat = selectUsedOnceStringLits usedOnceStringLits stringLits
+
+ alwaysUnfloatStringLits :: UniqFM Name ByteString -> UniqFM Name ByteString
+ alwaysUnfloatStringLits = filterUFM $ \b -> BS.length b < 3
+
+ selectUsedOnceStringLits :: UniqSet Name -> UniqFM Name ByteString -> UniqFM Name ByteString
+ selectUsedOnceStringLits usedOnceStringLits stringLits =
+ stringLits `intersectUFM` getUniqSet usedOnceStringLits
=====================================
compiler/ghc.cabal.in
=====================================
@@ -761,7 +761,9 @@ Library
GHC.StgToJS.Regs
GHC.StgToJS.Rts.Types
GHC.StgToJS.Rts.Rts
- GHC.StgToJS.Sinker
+ GHC.StgToJS.Sinker.Collect
+ GHC.StgToJS.Sinker.StringsUnfloat
+ GHC.StgToJS.Sinker.Sinker
GHC.StgToJS.Stack
GHC.StgToJS.StaticPtr
GHC.StgToJS.Symbols
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/251595d8a2cf80a9655f4e3e9d1a49e5434f7656
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/251595d8a2cf80a9655f4e3e9d1a49e5434f7656
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240917/324ca0bb/attachment-0001.html>
More information about the ghc-commits
mailing list