[Git][ghc/ghc][wip/T24251a] 2 commits: Eliminate redundant cases in CSE

Simon Peyton Jones (@simonpj) gitlab at gitlab.haskell.org
Sun Mar 24 22:12:06 UTC 2024



Simon Peyton Jones pushed to branch wip/T24251a at Glasgow Haskell Compiler / GHC


Commits:
7fffd774 by Simon Peyton Jones at 2024-03-24T22:11:08+00:00
Eliminate redundant cases in CSE

Addresses programs like this

  f xs = xs `seq`
         (let t = reverse $ reverse $ reverse $ reverse $ reverse $ reverse xs in
          case xs of
            [] ->  (t,True)
            (_:_) -> (t,False))

Also including the case where t is a join point.

Relates to #24251.  Test in T24251a.

(And see Simon's GHC Log 13 March.)

Also added a perf test for #21741

- - - - -
b2a487e4 by Simon Peyton Jones at 2024-03-24T22:11:46+00:00
Account for bottoming functions in OccurAnal

This fixes #24582, a small but long-standing bug

- - - - -


14 changed files:

- compiler/GHC/Core/Opt/CSE.hs
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Types/Id.hs
- + testsuite/tests/perf/compiler/T24582.hs
- testsuite/tests/perf/compiler/all.T
- + testsuite/tests/perf/should_run/T21741.hs
- + testsuite/tests/perf/should_run/T21741.stdout
- testsuite/tests/perf/should_run/all.T
- + testsuite/tests/simplCore/should_compile/T24251a.hs
- + testsuite/tests/simplCore/should_compile/T24251a.stderr
- testsuite/tests/simplCore/should_compile/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/CSE.hs
=====================================
@@ -14,13 +14,15 @@ import GHC.Types.Var.Env ( mkInScopeSet )
 import GHC.Types.Id     ( Id, idType, idHasRules, zapStableUnfolding
                         , idInlineActivation, setInlineActivation
                         , zapIdOccInfo, zapIdUsageInfo, idInlinePragma
-                        , isJoinId, idJoinPointHood, idUnfolding )
-import GHC.Core.Utils   ( mkAltExpr
-                        , exprIsTickedString
+                        , isJoinId, idJoinPointHood, idUnfolding
+                        , zapIdUnfolding, isDeadBinder )
+import GHC.Core.Utils   ( mkAltExpr, exprIsTickedString
                         , stripTicksE, stripTicksT, mkTicks )
 import GHC.Core.FVs     ( exprFreeVars )
 import GHC.Core.Type    ( tyConAppArgs )
 import GHC.Core
+import GHC.Core.Utils   ( exprIsTrivial )
+import GHC.Core.Opt.OccurAnal( scrutBinderSwap_maybe )
 import GHC.Utils.Outputable
 import GHC.Types.Basic
 import GHC.Types.Tickish
@@ -714,25 +716,33 @@ cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts
 
 cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr
 cseCase env scrut bndr ty alts
-  = Case scrut1 bndr3 ty' $
-    combineAlts (map cse_alt alts)
+  | Just body <- caseElim scrut bndr alts
+  , -- See Note [Eliminating redundant cases]
+    let zapped_bndr = zapIdUnfolding bndr -- Wrinkle (ERC1)
+  = cseExpr env (Let (NonRec zapped_bndr scrut) body)
+
+  | otherwise
+  = Case scrut' bndr' ty' alts'
+
   where
     ty' = substTyUnchecked (csEnvSubst env) ty
-    (cse_done, scrut1) = try_for_cse env scrut
+    (cse_done, scrut') = try_for_cse env scrut
 
     bndr1 = zapIdOccInfo bndr
       -- Zapping the OccInfo is needed because the extendCSEnv
       -- in cse_alt may mean that a dead case binder
       -- becomes alive, and Lint rejects that
     (env1, bndr2)    = addBinder env bndr1
-    (alt_env, bndr3) = extendCSEnvWithBinding env1 bndr bndr2 scrut1 cse_done
+    (alt_env, bndr') = extendCSEnvWithBinding env1 bndr bndr2 scrut' cse_done
          -- extendCSEnvWithBinding: see Note [CSE for case expressions]
 
+    alts' = combineAlts (map cse_alt alts)
+
     con_target :: OutExpr
     con_target = lookupSubst alt_env bndr
 
     arg_tys :: [OutType]
-    arg_tys = tyConAppArgs (idType bndr3)
+    arg_tys = tyConAppArgs (idType bndr')
 
     -- See Note [CSE for case alternatives]
     cse_alt (Alt (DataAlt con) args rhs)
@@ -747,6 +757,45 @@ cseCase env scrut bndr ty alts
         where
           (env', args') = addBinders alt_env args
 
+caseElim :: InExpr -> InId -> [InAlt] -> Maybe InExpr
+-- Can we eliminate the case altogether?  If so return the body.
+--   Note [Eliminating redundant cases]
+caseElim scrut case_bndr alts
+  | [Alt _ bndrs rhs] <- alts
+  , Just (scrut_var, _) <- scrutBinderSwap_maybe scrut
+  , all isDeadBinder bndrs
+  , isEvaldSoon (scrut_var, case_bndr) rhs
+  = Just rhs
+
+  | otherwise
+  = Nothing
+
+isEvaldSoon :: (OutId, OutId) -> OutExpr -> Bool
+-- (isEvaldSoon (v1,v2) e) is True if either v1 or v2 is evaluated "soon" by e
+isEvaldSoon (v1,v2) expr
+  = go expr
+  where
+    hit :: Var -> Bool
+    hit v = v==v1 || v==v2
+
+    go (Var v)    = hit v
+    go (Let _ e)  = go e
+    go (Tick _ e) = go e
+    go (Cast e _) = go e
+
+    go (Case scrut cb _ alts)
+      = go scrut ||
+        (exprIsTrivial scrut &&
+         all go_alt alts &&
+         not (hit cb)    &&  -- Check for
+         all ok_alt alts)    --   shadowing
+         -- ok_alt only runs if things look good
+
+    go _ = False  -- Lit, App, Lam, Coercion, Type
+
+    go_alt (Alt _ _ rhs) = go rhs
+    ok_alt (Alt _ cbs _) = not (any hit cbs)
+
 combineAlts :: [OutAlt] -> [OutAlt]
 -- See Note [Combine case alternatives]
 combineAlts alts
@@ -798,6 +847,64 @@ turning K2 into 'x' increases the number of live variables.  But
 * The next run of the simplifier will turn 'x' back into K2, so we won't
   permanently bloat the free-var count.
 
+Note [Eliminating redundant cases]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+   case x of x' { DEFAULT ->
+   case y of y' { DEFAULT ->
+   let v = <thunk> in
+   case x' of { True -> e1; False -> e2 }
+   body }}}
+
+The initial `seq` of `x` is redundant.  But the Simplifier is generally
+careful not to drop that outer `case x` based on its demand-info: see Note
+[Case-to-let for strictly-used binders] in GHC.Core.Opt.Simplify.Iteration.
+
+Instead we peek at the body of the case, using `isEvaldSoon`, to see if `x` is
+evaluated "soon" in the code path that follows.  If so we transform the
+`case` to a `let`
+   let x' = x in
+   case y of y' ... etc...
+
+The notion of "soon" is a bit squishy, and is implemented by `isEvaldSoon`.
+We allow interchanging eval's (as in the `case x` vs `case y` above.  But
+what about
+   case x of x' { DEFAULT ->
+   case (f y) of y' { DEFAULT ->
+   case x' of { True -> e1; False -> e2 }
+If we drop the `seq` on `x` we fall vicitm of #21741.  There is nothing
+wrong /semantically/ with dropping the `seq`, but the case of #21741 it causes
+a big space leak.
+
+So the conditions in `isEvaldSoon` are quite narrow: the two evals are
+separated only by lets and other evals on /variables/.
+
+Wrinkle (ERC1):
+   x' will have an (OtherCon []) unfolding on it.  We want to zap that
+   unfolding before turning it into (let x' = x in ...).
+
+Wrinkle (ERC2):
+  You might wonder if case-merging in the Simplifer doesn't cover this.
+  See GHC.Core.Opt.Simplify.Utils.tryCaseMerge. and Note [Merge Nested Cases]
+  in that same module.  But no, it is defeated by the 'let v = <thunk>` in our
+  example above, and I didn't want to make it more complicated.
+
+  Mabye case-merging should be made simpler, or even moved outright here into CSE.
+
+Wrinkle (ERC3):
+  Why is this done in CSE?  Becuase the "peeking" is tiresome and potentially a bit
+  expensive (quadratic in deep nests) so we don't want too often.  CSE runs seldom,
+  it is a pretty simple pass, and it's easy to "drop in" this extra optimisation.
+  Also eliminating redundant cases is a bit like commoning up duplicated work.
+
+Wrinkle (ERC4):
+  You might wonder whether we want to do this "optimisation" /at all/. After all, as
+  Note [Case-to-let for strictly-used binders] point out, dropping the eval is
+  not a huge deal, because the inner eval should just be a multi-way jump (no
+  actual eval).  But droppping the eval removes clutter, and I found that not dropping
+  made some functions look a bit bigger, and hence they didn't get inlined.
+
+  This is small beer though: I don't think it's an /important/ transformation.
 
 Note [Combine case alternatives]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


=====================================
compiler/GHC/Core/Opt/OccurAnal.hs
=====================================
@@ -983,7 +983,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
   | mb_join@(JoinPoint {}) <- idJoinPointHood bndr
   = -- Analyse the RHS and /then/ the body
     let -- Analyse the rhs first, generating rhs_uds
-        !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env ire mb_join bndr rhs
+        !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
         rhs_uds = foldr1 orUDs rhs_uds_s   -- NB: orUDs.  See (W4) of
                                            -- Note [Occurrence analysis for join points]
 
@@ -1009,7 +1009,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
         -- => join arity O of Note [Join arity prediction based on joinRhsArity]
         (tagged_bndr, mb_join) = tagNonRecBinder lvl occ bndr
 
-        !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env ire mb_join tagged_bndr rhs
+        !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
     in WUD (foldr andUDs body_uds rhs_uds_s)      -- Note `andUDs`
            (combine [NonRec final_bndr rhs'] body)
 
@@ -1024,10 +1024,10 @@ occAnalNonRecBody env bndr thing_inside
     in WUD inner_uds (occ, res)
 
 -----------------
-occAnalNonRecRhs :: OccEnv -> ImpRuleEdges -> JoinPointHood
-                 -> Id -> CoreExpr
+occAnalNonRecRhs :: OccEnv -> TopLevelFlag -> ImpRuleEdges
+                -> JoinPointHood -> Id -> CoreExpr
                  -> ([UsageDetails], Id, CoreExpr)
-occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
+occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
   | null rules, null imp_rule_infos
   =  -- Fast path for common case of no rules. This is only worth
      -- 0.1% perf on average, but it's also only a line or two of code
@@ -1046,7 +1046,7 @@ occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
     -- j will never be scrutinised.
     env1 | is_join_point = setTailCtxt env
          | otherwise     = setNonTailCtxt rhs_ctxt env  -- Zap occ_join_points
-    rhs_ctxt = mkNonRecRhsCtxt bndr unf
+    rhs_ctxt = mkNonRecRhsCtxt lvl bndr unf
 
     -- See Note [Sources of one-shot information]
     rhs_env = addOneShotsFromDmd bndr env1
@@ -1092,9 +1092,9 @@ occAnalNonRecRhs !env imp_rule_edges mb_join bndr rhs
                    [ l `andUDs` adjustTailArity mb_join r
                    | (_,l,r) <- rules_w_uds ]
 
-mkNonRecRhsCtxt :: Id -> Unfolding -> OccEncl
+mkNonRecRhsCtxt :: TopLevelFlag -> Id -> Unfolding -> OccEncl
 -- Precondition: Id is not a join point
-mkNonRecRhsCtxt bndr unf
+mkNonRecRhsCtxt lvl bndr unf
   | certainly_inline = OccVanilla -- See Note [Cascading inlines]
   | otherwise        = OccRhs
   where
@@ -1103,11 +1103,12 @@ mkNonRecRhsCtxt bndr unf
         -- has set the OccInfo for this binder before calling occAnalNonRecRhs
         case idOccInfo bndr of
           OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
-            -> active && not_stable
+            -> active && not stable_unf && not top_bottoming
           _ -> False
 
     active     = isAlwaysActive (idInlineActivation bndr)
-    not_stable = not (isStableUnfolding unf)
+    stable_unf = isStableUnfolding unf
+    top_bottoming = isTopLevel lvl && isDeadEndId bndr
 
 -----------------
 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
@@ -2410,7 +2411,7 @@ float ==>
 
 This is worse than the slow cascade, so we only want to say "certainly_inline"
 if it really is certain.  Look at the note with preInlineUnconditionally
-for the various clauses.
+for the various clauses.  See #24582 for an example of the two getting out of sync.
 
 
 ************************************************************************


=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -17,7 +17,7 @@ module GHC.Core.Opt.Simplify.Env (
         seDoEtaReduction, seEtaExpand, seFloatEnable, seInline, seNames,
         seOptCoercionOpts, sePedanticBottoms, sePhase, sePlatform, sePreInline,
         seRuleOpts, seRules, seUnfoldingOpts,
-        mkSimplEnv, extendIdSubst,
+        mkSimplEnv, extendIdSubst, extendCvIdSubst,
         extendTvSubst, extendCvSubst,
         zapSubstEnv, setSubstEnv, bumpCaseDepth,
         getInScope, setInScopeFromE, setInScopeFromF,
@@ -550,6 +550,10 @@ extendCvSubst env@(SimplEnv {seCvSubst = csubst}) var co
   = assert (isCoVar var) $
     env {seCvSubst = extendVarEnv csubst var co}
 
+extendCvIdSubst :: SimplEnv -> Id -> OutExpr -> SimplEnv
+extendCvIdSubst env bndr (Coercion co) = extendCvSubst env bndr co
+extendCvIdSubst env bndr rhs           = extendIdSubst env bndr (DoneEx rhs NotJoinPoint)
+
 ---------------------
 getInScope :: SimplEnv -> InScopeSet
 getInScope env = seInScope env


=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -412,9 +412,7 @@ simplAuxBind env bndr new_rhs
   -- have no NOLINE pragmas, nor RULEs
   | exprIsTrivial new_rhs  -- Short-cut for let x = y in ...
   = return ( emptyFloats env
-           , case new_rhs of
-                Coercion co -> extendCvSubst env bndr co
-                _           -> extendIdSubst env bndr (DoneEx new_rhs NotJoinPoint) )
+           , extendCvIdSubst env bndr new_rhs )  -- bndr can be a CoVar
 
   | otherwise
   = do  { -- ANF-ise the RHS
@@ -751,7 +749,7 @@ prepareRhs env top_lvl occ rhs0
   | is_expandable = anfise rhs0
   | otherwise     = return (emptyLetFloats, rhs0)
   where
-    -- We can' use exprIsExpandable because the WHOLE POINT is that
+    -- We can't use exprIsExpandable because the WHOLE POINT is that
     -- we want to treat (K <big>) as expandable, because we are just
     -- about "anfise" the <big> expression.  exprIsExpandable would
     -- just say no!


=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -2394,6 +2394,9 @@ the outer case scrutinises the same variable as the outer case. This
 transformation is called Case Merging.  It avoids that the same
 variable is scrutinised multiple times.
 
+See also Note [Eliminating redundant cases] in GHC.Core.Opt.CSE, especially
+wrinkle (ERC2).
+
 Wrinkles
 
 (MC1) `tryCaseMerge` "looks though" an inner single-alternative case-on-variable.


=====================================
compiler/GHC/Types/Id.hs
=====================================
@@ -625,7 +625,7 @@ isImplicitId id
 idIsFrom :: Module -> Id -> Bool
 idIsFrom mod id = nameIsLocalOrFrom mod (idName id)
 
-isDeadBinder :: Id -> Bool
+isDeadBinder :: Var -> Bool
 isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr)
                   | otherwise = False   -- TyVars count as not dead
 


=====================================
testsuite/tests/perf/compiler/T24582.hs
=====================================
@@ -0,0 +1,18 @@
+{-# OPTIONS_GHC  -fmax-simplifier-iterations=20 #-}
+-- This module made the Simplifier iterate for ever
+
+module T24582(woo) where
+
+
+foo :: String -> Int -> a
+{-# NOINLINE foo #-}
+foo s _ = error s
+
+f :: (Int->Int) -> Int
+{-# NOINLINE f #-}
+f g = g 3
+
+x :: Int -> a
+x = foo "urk"
+
+woo = f x


=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -716,3 +716,8 @@ test ('LookupFusion',
 test('T24471',
      [req_th, collect_compiler_stats('all', 5)],
      multimod_compile, ['T24471', '-v0 -O'])
+
+test ('T24582',
+      [ collect_compiler_stats('bytes allocated',5) ],
+      compile,
+      ['-O'])


=====================================
testsuite/tests/perf/should_run/T21741.hs
=====================================
@@ -0,0 +1,25 @@
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import System.Environment
+
+f :: [Int] -> Int
+f xs = g (length xs) (even $ mySum xs)
+{-# NOINLINE f #-}
+
+g :: Int -> Bool -> Int
+g 0 _ = 0
+g n !b = length xs + mySum xs + if b then 0 else 1
+  where
+    xs = [0..n]
+{-# NOINLINE g #-}
+
+mySum :: [Int] -> Int
+mySum = go 0
+  where
+    go acc (x:xs) = go (x+acc) xs
+    go acc _      = acc
+
+main = do
+  (n:_) <- map read <$> getArgs
+  print $ f [0..n]


=====================================
testsuite/tests/perf/should_run/T21741.stdout
=====================================
@@ -0,0 +1 @@
+50000025000003


=====================================
testsuite/tests/perf/should_run/all.T
=====================================
@@ -413,3 +413,9 @@ test('T21839r',
 # perf doesn't regress further, so it is not marked as such.
 test('T18964', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O'])
 test('T23021', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O2'])
+
+# GC bytes copied goes up a lot if the space leak returns
+test('T21741', [collect_stats('copied_bytes', 2),
+                extra_run_opts('10000000'),
+                only_ways(['normal'])],
+      compile_and_run, ['-O'])


=====================================
testsuite/tests/simplCore/should_compile/T24251a.hs
=====================================
@@ -0,0 +1,9 @@
+module T24251a where
+
+f xs = xs `seq`
+       (let t = reverse (reverse (reverse (reverse xs))) in
+        case xs of
+          [] ->  (t,True)
+          (_:_) -> (t,False))
+
+-- We start with an eval of xs, but that should disappear.


=====================================
testsuite/tests/simplCore/should_compile/T24251a.stderr
=====================================
@@ -0,0 +1,18 @@
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+  = {terms: 33, types: 60, coercions: 0, joins: 0/1}
+
+$wf
+  = \ @a xs ->
+      let {
+        t = reverse1 (reverse1 (reverse1 (reverse1 xs []) []) []) [] } in
+      case xs of {
+        [] -> (# t, True #);
+        : ds ds1 -> (# t, False #)
+      }
+
+f = \ @a xs -> case $wf xs of { (# ww, ww1 #) -> (ww, ww1) }
+
+
+


=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -515,3 +515,4 @@ test('T24229a', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typea
 test('T24229b', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques -dppr-cols=99999'])
 test('T24370', normal, compile, ['-O'])
 test('T24551', normal, compile, ['-O -dcore-lint'])
+test('T24251a', normal, compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds -dsuppress-all'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a59e74bd3ca28748e678dd6858eb604c8f011a5e...b2a487e4f6c46069441eed762deae4732203e335

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a59e74bd3ca28748e678dd6858eb604c8f011a5e...b2a487e4f6c46069441eed762deae4732203e335
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240324/0700752a/attachment-0001.html>


More information about the ghc-commits mailing list