[Git][ghc/ghc][wip/T24251a] 2 commits: Eliminate redundant cases in CSE
Simon Peyton Jones (@simonpj)
gitlab at gitlab.haskell.org
Sun Mar 24 22:12:06 UTC 2024
Simon Peyton Jones pushed to branch wip/T24251a at Glasgow Haskell Compiler / GHC
Commits:
7fffd774 by Simon Peyton Jones at 2024-03-24T22:11:08+00:00
Eliminate redundant cases in CSE
Addresses programs like this
f xs = xs `seq`
(let t = reverse $ reverse $ reverse $ reverse $ reverse $ reverse xs in
case xs of
[] -> (t,True)
(_:_) -> (t,False))
Also including the case where t is a join point.
Relates to #24251. Test in T24251a.
(And see Simon's GHC Log 13 March.)
Also added a perf test for #21741
- - - - -
b2a487e4 by Simon Peyton Jones at 2024-03-24T22:11:46+00:00
Account for bottoming functions in OccurAnal
This fixes #24582, a small but long-standing bug
- - - - -
14 changed files:
- compiler/GHC/Core/Opt/CSE.hs
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Types/Id.hs
- + testsuite/tests/perf/compiler/T24582.hs
- testsuite/tests/perf/compiler/all.T
- + testsuite/tests/perf/should_run/T21741.hs
- + testsuite/tests/perf/should_run/T21741.stdout
- testsuite/tests/perf/should_run/all.T
- + testsuite/tests/simplCore/should_compile/T24251a.hs
- + testsuite/tests/simplCore/should_compile/T24251a.stderr
- testsuite/tests/simplCore/should_compile/all.T
Changes:
=====================================
compiler/GHC/Core/Opt/CSE.hs
=====================================
@@ -14,13 +14,15 @@ import GHC.Types.Var.Env ( mkInScopeSet )
import GHC.Types.Id ( Id, idType, idHasRules, zapStableUnfolding
, idInlineActivation, setInlineActivation
, zapIdOccInfo, zapIdUsageInfo, idInlinePragma
- , isJoinId, idJoinPointHood, idUnfolding )
-import GHC.Core.Utils ( mkAltExpr
- , exprIsTickedString
+ , isJoinId, idJoinPointHood, idUnfolding
+ , zapIdUnfolding, isDeadBinder )
+import GHC.Core.Utils ( mkAltExpr, exprIsTickedString
, stripTicksE, stripTicksT, mkTicks )
import GHC.Core.FVs ( exprFreeVars )
import GHC.Core.Type ( tyConAppArgs )
import GHC.Core
+import GHC.Core.Utils ( exprIsTrivial )
+import GHC.Core.Opt.OccurAnal( scrutBinderSwap_maybe )
import GHC.Utils.Outputable
import GHC.Types.Basic
import GHC.Types.Tickish
@@ -714,25 +716,33 @@ cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts
cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr
cseCase env scrut bndr ty alts
- = Case scrut1 bndr3 ty' $
- combineAlts (map cse_alt alts)
+ | Just body <- caseElim scrut bndr alts
+ , -- See Note [Eliminating redundant cases]
+ let zapped_bndr = zapIdUnfolding bndr -- Wrinkle (ERC1)
+ = cseExpr env (Let (NonRec zapped_bndr scrut) body)
+
+ | otherwise
+ = Case scrut' bndr' ty' alts'
+
where
ty' = substTyUnchecked (csEnvSubst env) ty
- (cse_done, scrut1) = try_for_cse env scrut
+ (cse_done, scrut') = try_for_cse env scrut
bndr1 = zapIdOccInfo bndr
-- Zapping the OccInfo is needed because the extendCSEnv
-- in cse_alt may mean that a dead case binder
-- becomes alive, and Lint rejects that
(env1, bndr2) = addBinder env bndr1
- (alt_env, bndr3) = extendCSEnvWithBinding env1 bndr bndr2 scrut1 cse_done
+ (alt_env, bndr') = extendCSEnvWithBinding env1 bndr bndr2 scrut' cse_done
-- extendCSEnvWithBinding: see Note [CSE for case expressions]
+ alts' = combineAlts (map cse_alt alts)
+
con_target :: OutExpr
con_target = lookupSubst alt_env bndr
arg_tys :: [OutType]
- arg_tys = tyConAppArgs (idType bndr3)
+ arg_tys = tyConAppArgs (idType bndr')
-- See Note [CSE for case alternatives]
cse_alt (Alt (DataAlt con) args rhs)
@@ -747,6 +757,45 @@ cseCase env scrut bndr ty alts
where
(env', args') = addBinders alt_env args
+caseElim :: InExpr -> InId -> [InAlt] -> Maybe InExpr
+-- Can we eliminate the case altogether? If so return the body.
+-- Note [Eliminating redundant cases]
+caseElim scrut case_bndr alts
+ | [Alt _ bndrs rhs] <- alts
+ , Just (scrut_var, _) <- scrutBinderSwap_maybe scrut
+ , all isDeadBinder bndrs
+ , isEvaldSoon (scrut_var, case_bndr) rhs
+ = Just rhs
+
+ | otherwise
+ = Nothing
+
+isEvaldSoon :: (OutId, OutId) -> OutExpr -> Bool
+-- (isEvaldSoon (v1,v2) e) is True if either v1 or v2 is evaluated "soon" by e
+isEvaldSoon (v1,v2) expr
+ = go expr
+ where
+ hit :: Var -> Bool
+ hit v = v==v1 || v==v2
+
+ go (Var v) = hit v
+ go (Let _ e) = go e
+ go (Tick _ e) = go e
+ go (Cast e _) = go e
+
+ go (Case scrut cb _ alts)
+ = go scrut ||
+ (exprIsTrivial scrut &&
+ all go_alt alts &&
+ not (hit cb) && -- Check for
+ all ok_alt alts) -- shadowing
+ -- ok_alt only runs if things look good
+
+ go _ = False -- Lit, App, Lam, Coercion, Type
+
+ go_alt (Alt _ _ rhs) = go rhs
+ ok_alt (Alt _ cbs _) = not (any hit cbs)
+
combineAlts :: [OutAlt] -> [OutAlt]
-- See Note [Combine case alternatives]
combineAlts alts
@@ -798,6 +847,64 @@ turning K2 into 'x' increases the number of live variables. But
* The next run of the simplifier will turn 'x' back into K2, so we won't
permanently bloat the free-var count.
+Note [Eliminating redundant cases]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ case x of x' { DEFAULT ->
+ case y of y' { DEFAULT ->
+ let v = <thunk> in
+ case x' of { True -> e1; False -> e2 }
+ body }}}
+
+The initial `seq` of `x` is redundant. But the Simplifier is generally
+careful not to drop that outer `case x` based on its demand-info: see Note
+[Case-to-let for strictly-used binders] in GHC.Core.Opt.Simplify.Iteration.
+
+Instead we peek at the body of the case, using `isEvaldSoon`, to see if `x` is
+evaluated "soon" in the code path that follows. If so we transform the
+`case` to a `let`
+ let x' = x in
+ case y of y' ... etc...
+
+The notion of "soon" is a bit squishy, and is implemented by `isEvaldSoon`.
+We allow interchanging eval's (as in the `case x` vs `case y` above. But
+what about
+ case x of x' { DEFAULT ->
+ case (f y) of y' { DEFAULT ->
+ case x' of { True -> e1; False -> e2 }
+If we drop the `seq` on `x` we fall vicitm of #21741. There is nothing
+wrong /semantically/ with dropping the `seq`, but the case of #21741 it causes
+a big space leak.
+
+So the conditions in `isEvaldSoon` are quite narrow: the two evals are
+separated only by lets and other evals on /variables/.
+
+Wrinkle (ERC1):
+ x' will have an (OtherCon []) unfolding on it. We want to zap that
+ unfolding before turning it into (let x' = x in ...).
+
+Wrinkle (ERC2):
+ You might wonder if case-merging in the Simplifer doesn't cover this.
+ See GHC.Core.Opt.Simplify.Utils.tryCaseMerge. and Note [Merge Nested Cases]
+ in that same module. But no, it is defeated by the 'let v = <thunk>` in our
+ example above, and I didn't want to make it more complicated.
+
+ Mabye case-merging should be made simpler, or even moved outright here into CSE.
+
+Wrinkle (ERC3):
+ Why is this done in CSE? Becuase the "peeking" is tiresome and potentially a bit
+ expensive (quadratic in deep nests) so we don't want too often. CSE runs seldom,
+ it is a pretty simple pass, and it's easy to "drop in" this extra optimisation.
+ Also eliminating redundant cases is a bit like commoning up duplicated work.
+
+Wrinkle (ERC4):
+ You might wonder whether we want to do this "optimisation" /at all/. After all, as
+ Note [Case-to-let for strictly-used binders] point out, dropping the eval is
+ not a huge deal, because the inner eval should just be a multi-way jump (no
+ actual eval). But droppping the eval removes clutter, and I found that not dropping
+ made some functions look a bit bigger, and hence they didn't get inlined.
+
+ This is small beer though: I don't think it's an /important/ transformation.
Note [Combine case alternatives]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
=====================================
compiler/GHC/Core/Opt/OccurAnal.hs
=====================================
@@ -983,7 +983,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
| mb_join@(JoinPoint {}) <- idJoinPointHood bndr
= -- Analyse the RHS and /then/ the body
let -- Analyse the rhs first, generating rhs_uds
- !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env ire mb_join bndr rhs
+ !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
rhs_uds = foldr1 orUDs rhs_uds_s -- NB: orUDs. See (W4) of
-- Note [Occurrence analysis for join points]
@@ -1009,7 +1009,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
-- => join arity O of Note [Join arity prediction based on joinRhsArity]
(tagged_bndr, mb_join) = tagNonRecBinder lvl occ bndr
- !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env ire mb_join tagged_bndr rhs
+ !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
in WUD (foldr andUDs body_uds rhs_uds_s) -- Note `andUDs`
(combine [NonRec final_bndr rhs'] body)
@@ -1024,10 +1024,10 @@ occAnalNonRecBody env bndr thing_inside
in WUD inner_uds (occ, res)
-----------------
-occAnalNonRecRhs :: OccEnv -> ImpRuleEdges -> JoinPointHood
- -> Id -> CoreExpr
+occAnalNonRecRhs :: OccEnv -> TopLevelFlag -> ImpRuleEdges
+ -> JoinPointHood -> Id -> CoreExpr
-> ([UsageDetails], Id, CoreExpr)
-occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
+occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
| null rules, null imp_rule_infos
= -- Fast path for common case of no rules. This is only worth
-- 0.1% perf on average, but it's also only a line or two of code
@@ -1046,7 +1046,7 @@ occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
-- j will never be scrutinised.
env1 | is_join_point = setTailCtxt env
| otherwise = setNonTailCtxt rhs_ctxt env -- Zap occ_join_points
- rhs_ctxt = mkNonRecRhsCtxt bndr unf
+ rhs_ctxt = mkNonRecRhsCtxt lvl bndr unf
-- See Note [Sources of one-shot information]
rhs_env = addOneShotsFromDmd bndr env1
@@ -1092,9 +1092,9 @@ occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
[ l `andUDs` adjustTailArity mb_join r
| (_,l,r) <- rules_w_uds ]
-mkNonRecRhsCtxt :: Id -> Unfolding -> OccEncl
+mkNonRecRhsCtxt :: TopLevelFlag -> Id -> Unfolding -> OccEncl
-- Precondition: Id is not a join point
-mkNonRecRhsCtxt bndr unf
+mkNonRecRhsCtxt lvl bndr unf
| certainly_inline = OccVanilla -- See Note [Cascading inlines]
| otherwise = OccRhs
where
@@ -1103,11 +1103,12 @@ mkNonRecRhsCtxt bndr unf
-- has set the OccInfo for this binder before calling occAnalNonRecRhs
case idOccInfo bndr of
OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
- -> active && not_stable
+ -> active && not stable_unf && not top_bottoming
_ -> False
active = isAlwaysActive (idInlineActivation bndr)
- not_stable = not (isStableUnfolding unf)
+ stable_unf = isStableUnfolding unf
+ top_bottoming = isTopLevel lvl && isDeadEndId bndr
-----------------
occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
@@ -2410,7 +2411,7 @@ float ==>
This is worse than the slow cascade, so we only want to say "certainly_inline"
if it really is certain. Look at the note with preInlineUnconditionally
-for the various clauses.
+for the various clauses. See #24582 for an example of the two getting out of sync.
************************************************************************
=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -17,7 +17,7 @@ module GHC.Core.Opt.Simplify.Env (
seDoEtaReduction, seEtaExpand, seFloatEnable, seInline, seNames,
seOptCoercionOpts, sePedanticBottoms, sePhase, sePlatform, sePreInline,
seRuleOpts, seRules, seUnfoldingOpts,
- mkSimplEnv, extendIdSubst,
+ mkSimplEnv, extendIdSubst, extendCvIdSubst,
extendTvSubst, extendCvSubst,
zapSubstEnv, setSubstEnv, bumpCaseDepth,
getInScope, setInScopeFromE, setInScopeFromF,
@@ -550,6 +550,10 @@ extendCvSubst env@(SimplEnv {seCvSubst = csubst}) var co
= assert (isCoVar var) $
env {seCvSubst = extendVarEnv csubst var co}
+extendCvIdSubst :: SimplEnv -> Id -> OutExpr -> SimplEnv
+extendCvIdSubst env bndr (Coercion co) = extendCvSubst env bndr co
+extendCvIdSubst env bndr rhs = extendIdSubst env bndr (DoneEx rhs NotJoinPoint)
+
---------------------
getInScope :: SimplEnv -> InScopeSet
getInScope env = seInScope env
=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -412,9 +412,7 @@ simplAuxBind env bndr new_rhs
-- have no NOLINE pragmas, nor RULEs
| exprIsTrivial new_rhs -- Short-cut for let x = y in ...
= return ( emptyFloats env
- , case new_rhs of
- Coercion co -> extendCvSubst env bndr co
- _ -> extendIdSubst env bndr (DoneEx new_rhs NotJoinPoint) )
+ , extendCvIdSubst env bndr new_rhs ) -- bndr can be a CoVar
| otherwise
= do { -- ANF-ise the RHS
@@ -751,7 +749,7 @@ prepareRhs env top_lvl occ rhs0
| is_expandable = anfise rhs0
| otherwise = return (emptyLetFloats, rhs0)
where
- -- We can' use exprIsExpandable because the WHOLE POINT is that
+ -- We can't use exprIsExpandable because the WHOLE POINT is that
-- we want to treat (K <big>) as expandable, because we are just
-- about "anfise" the <big> expression. exprIsExpandable would
-- just say no!
=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -2394,6 +2394,9 @@ the outer case scrutinises the same variable as the outer case. This
transformation is called Case Merging. It avoids that the same
variable is scrutinised multiple times.
+See also Note [Eliminating redundant cases] in GHC.Core.Opt.CSE, especially
+wrinkle (ERC2).
+
Wrinkles
(MC1) `tryCaseMerge` "looks though" an inner single-alternative case-on-variable.
=====================================
compiler/GHC/Types/Id.hs
=====================================
@@ -625,7 +625,7 @@ isImplicitId id
idIsFrom :: Module -> Id -> Bool
idIsFrom mod id = nameIsLocalOrFrom mod (idName id)
-isDeadBinder :: Id -> Bool
+isDeadBinder :: Var -> Bool
isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr)
| otherwise = False -- TyVars count as not dead
=====================================
testsuite/tests/perf/compiler/T24582.hs
=====================================
@@ -0,0 +1,18 @@
+{-# OPTIONS_GHC -fmax-simplifier-iterations=20 #-}
+-- This module made the Simplifier iterate for ever
+
+module T24582(woo) where
+
+
+foo :: String -> Int -> a
+{-# NOINLINE foo #-}
+foo s _ = error s
+
+f :: (Int->Int) -> Int
+{-# NOINLINE f #-}
+f g = g 3
+
+x :: Int -> a
+x = foo "urk"
+
+woo = f x
=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -716,3 +716,8 @@ test ('LookupFusion',
test('T24471',
[req_th, collect_compiler_stats('all', 5)],
multimod_compile, ['T24471', '-v0 -O'])
+
+test ('T24582',
+ [ collect_compiler_stats('bytes allocated',5) ],
+ compile,
+ ['-O'])
=====================================
testsuite/tests/perf/should_run/T21741.hs
=====================================
@@ -0,0 +1,25 @@
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import System.Environment
+
+f :: [Int] -> Int
+f xs = g (length xs) (even $ mySum xs)
+{-# NOINLINE f #-}
+
+g :: Int -> Bool -> Int
+g 0 _ = 0
+g n !b = length xs + mySum xs + if b then 0 else 1
+ where
+ xs = [0..n]
+{-# NOINLINE g #-}
+
+mySum :: [Int] -> Int
+mySum = go 0
+ where
+ go acc (x:xs) = go (x+acc) xs
+ go acc _ = acc
+
+main = do
+ (n:_) <- map read <$> getArgs
+ print $ f [0..n]
=====================================
testsuite/tests/perf/should_run/T21741.stdout
=====================================
@@ -0,0 +1 @@
+50000025000003
=====================================
testsuite/tests/perf/should_run/all.T
=====================================
@@ -413,3 +413,9 @@ test('T21839r',
# perf doesn't regress further, so it is not marked as such.
test('T18964', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O'])
test('T23021', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O2'])
+
+# GC bytes copied goes up a lot if the space leak returns
+test('T21741', [collect_stats('copied_bytes', 2),
+ extra_run_opts('10000000'),
+ only_ways(['normal'])],
+ compile_and_run, ['-O'])
=====================================
testsuite/tests/simplCore/should_compile/T24251a.hs
=====================================
@@ -0,0 +1,9 @@
+module T24251a where
+
+f xs = xs `seq`
+ (let t = reverse (reverse (reverse (reverse xs))) in
+ case xs of
+ [] -> (t,True)
+ (_:_) -> (t,False))
+
+-- We start with an eval of xs, but that should disappear.
=====================================
testsuite/tests/simplCore/should_compile/T24251a.stderr
=====================================
@@ -0,0 +1,18 @@
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+ = {terms: 33, types: 60, coercions: 0, joins: 0/1}
+
+$wf
+ = \ @a xs ->
+ let {
+ t = reverse1 (reverse1 (reverse1 (reverse1 xs []) []) []) [] } in
+ case xs of {
+ [] -> (# t, True #);
+ : ds ds1 -> (# t, False #)
+ }
+
+f = \ @a xs -> case $wf xs of { (# ww, ww1 #) -> (ww, ww1) }
+
+
+
=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -515,3 +515,4 @@ test('T24229a', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typea
test('T24229b', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques -dppr-cols=99999'])
test('T24370', normal, compile, ['-O'])
test('T24551', normal, compile, ['-O -dcore-lint'])
+test('T24251a', normal, compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds -dsuppress-all'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a59e74bd3ca28748e678dd6858eb604c8f011a5e...b2a487e4f6c46069441eed762deae4732203e335
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a59e74bd3ca28748e678dd6858eb604c8f011a5e...b2a487e4f6c46069441eed762deae4732203e335
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240324/0700752a/attachment-0001.html>
More information about the ghc-commits
mailing list