[Git][ghc/ghc][wip/T25055] Address #25055, by disabling case-of-runRW# in Gentle phase
Simon Peyton Jones (@simonpj)
gitlab at gitlab.haskell.org
Wed Jul 17 11:49:14 UTC 2024
Simon Peyton Jones pushed to branch wip/T25055 at Glasgow Haskell Compiler / GHC
Commits:
8f09f5e5 by Simon Peyton Jones at 2024-07-17T12:48:34+01:00
Address #25055, by disabling case-of-runRW# in Gentle phase
See Note [Case-of-case and full laziness]
in GHC.Driver.Config.Core.Opt.Simplify
- - - - -
6 changed files:
- compiler/GHC/Core/Opt/Arity.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
- + testsuite/tests/perf/should_run/T25055.hs
- + testsuite/tests/perf/should_run/T25055.stdout
- testsuite/tests/perf/should_run/all.T
Changes:
=====================================
compiler/GHC/Core/Opt/Arity.hs
=====================================
@@ -860,7 +860,7 @@ data ArityOpts = ArityOpts
-- | The Arity returned is the number of value args the
-- expression can be applied to without doing much work
-exprEtaExpandArity :: ArityOpts -> CoreExpr -> Maybe SafeArityType
+exprEtaExpandArity :: HasDebugCallStack => ArityOpts -> CoreExpr -> Maybe SafeArityType
-- exprEtaExpandArity is used when eta expanding
-- e ==> \xy -> e x y
-- Nothing if the expression has arity 0
=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -2342,34 +2342,44 @@ rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
(ApplyToVal { sc_arg = arg, sc_env = arg_se
, sc_cont = cont, sc_hole_ty = fun_ty })
| fun_id `hasKey` runRWKey
- , [ TyArg {}, TyArg {} ] <- rev_args
- -- Do this even if (contIsStop cont)
+ , [ TyArg { as_arg_ty = hole_ty }, TyArg {} ] <- rev_args
+ -- Do this even if (contIsStop cont), or if seCaseCase is off.
-- See Note [No eta-expansion in runRW#]
= do { let arg_env = arg_se `setInScopeFromE` env
- ty' = contResultType cont
+
+ overall_res_ty = contResultType cont
+ -- hole_ty is the type of the current runRW# application
+ (outer_cont, new_runrw_res_ty, inner_cont)
+ | seCaseCase env = (mkBoringStop overall_res_ty, overall_res_ty, cont)
+ | otherwise = (cont, hole_ty, mkBoringStop hole_ty)
+ -- Only when case-of-case is on. See GHC.Driver.Config.Core.Opt.Simplify
+ -- Note [Case-of-case and full laziness]
-- If the argument is a literal lambda already, take a short cut
- -- This isn't just efficiency; if we don't do this we get a beta-redex
- -- every time, so the simplifier keeps doing more iterations.
+ -- This isn't just efficiency:
+ -- * If we don't do this we get a beta-redex every time, so the
+ -- simplifier keeps doing more iterations.
+ -- * Even more important: see Note [No eta-expansion in runRW#]
; arg' <- case arg of
Lam s body -> do { (env', s') <- simplBinder arg_env s
- ; body' <- simplExprC env' body cont
+ ; body' <- simplExprC env' body inner_cont
; return (Lam s' body') }
-- Important: do not try to eta-expand this lambda
-- See Note [No eta-expansion in runRW#]
+
_ -> do { s' <- newId (fsLit "s") ManyTy realWorldStatePrimTy
; let (m,_,_) = splitFunTy fun_ty
env' = arg_env `addNewInScopeIds` [s']
cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s'
- , sc_env = env', sc_cont = cont
- , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy ty' }
+ , sc_env = env', sc_cont = inner_cont
+ , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy new_runrw_res_ty }
-- cont' applies to s', then K
; body' <- simplExprC env' arg cont'
; return (Lam s' body') }
- ; let rr' = getRuntimeRep ty'
- call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
- ; return (emptyFloats env, call') }
+ ; let rr' = getRuntimeRep new_runrw_res_ty
+ call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg new_runrw_res_ty, arg']
+ ; rebuild env call' outer_cont }
---------- Simplify value arguments --------------------
rebuildCall env fun_info
@@ -2382,7 +2392,8 @@ rebuildCall env fun_info
-- Strict arguments
| isStrictArgInfo fun_info
- , seCaseCase env
+ , seCaseCase env -- Only when case-of-case is on. See GHC.Driver.Config.Core.Opt.Simplify
+ -- Note [Case-of-case and full laziness]
= -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
simplExprF (arg_se `setInScopeFromE` env) arg
(StrictArg { sc_fun = fun_info, sc_fun_ty = fun_ty
@@ -3195,7 +3206,9 @@ doCaseToLet scrut case_bndr
--------------------------------------------------
reallyRebuildCase env scrut case_bndr alts cont
- | not (seCaseCase env)
+ | not (seCaseCase env) -- Only when case-of-case is on.
+ -- See GHC.Driver.Config.Core.Opt.Simplify
+ -- Note [Case-of-case and full laziness]
= do { case_expr <- simplAlts env scrut case_bndr alts
(mkBoringStop (contHoleType cont))
; rebuild env case_expr cont }
=====================================
compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
=====================================
@@ -80,6 +80,7 @@ initGentleSimplMode :: DynFlags -> SimplMode
initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle")
{ -- Don't do case-of-case transformations.
-- This makes full laziness work better
+ -- See Note [Case-of-case and full laziness]
sm_case_case = False
}
@@ -89,3 +90,37 @@ floatEnable dflags =
(True, True) -> FloatEnabled
(True, False)-> FloatNestedOnly
(False, _) -> FloatDisabled
+
+
+{- Note [Case-of-case and full laziness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case-of-case can hide opportunities for let-floating (full laziness).
+For example
+ rec { f = \y. case (expensive x) of (a,b) -> blah }
+We might hope to float the (expensive x) out of the \y-loop.
+But if we inline `expensive` we might get
+ \y. case (case x of I# x' -> body) of (a,b) -> blah
+Now if we do case-of-case we get
+ \y. case x if I# x2 ->
+ case body of (a,b) -> blah
+
+Sadly, at this point `body` mentions `x2`, so we can't float it out of the
+\y-loop.
+
+Solution: don't do case-of-case in the "gentle" simplification phase that
+precedes the first float-out transformation. Implementation:
+
+ * `sm_case_case` field in SimplMode
+
+ * Consult `sm_case_case` (via `seCaseCase`) before doing case-of-case
+ in GHC.Core.Opt.Simplify.Iteration.rebuildCall.
+
+Wrinkles
+
+* This applies equally to the case-of-runRW# transformation:
+ case (runRW# (\s. body)) of (a,b) -> blah
+ --->
+ runRW# (\s. case body of (a,b) -> blah)
+ Again, don't do this when `sm_case_case` is off. See #25055 for
+ a motivating example.
+-}
=====================================
testsuite/tests/perf/should_run/T25055.hs
=====================================
@@ -0,0 +1,54 @@
+{-# OPTIONS_GHC -Wall #-}
+-- based on https://byorgey.github.io/blog/posts/2024/06/21/cpih-product-divisors.html
+
+module Main( main ) where
+
+import Control.Monad
+import Control.Monad.ST
+import Data.Array.ST
+import Data.Array.Unboxed
+import Data.Foldable
+
+smallest :: Int -> UArray Int Int
+smallest maxN = runSTUArray $ do
+ arr <- newGenArray (2,maxN) initA
+ for_ [5, 7 .. maxN] $ \k -> do
+ k' <- readArray arr k
+ when (k == k') $ do
+ for_ [k*k, k*(k+2) .. maxN] $ \oddMultipleOfK -> do
+ modifyArray' arr oddMultipleOfK (min k)
+ return arr
+ where
+ initA i
+ | even i = return 2
+ | i `rem` 3 == 0 = return 3
+ | otherwise = return i
+
+factor :: STUArray s Int Int -> Int -> Int -> ST s ()
+-- With #25055 the program ran slow as it appear below, but
+-- fast if you (a) comment out 'let p = smallest maxN ! m'
+-- (b) un-comment the commented-out bindings for p and sm
+factor countsArr maxN n = go n
+ where
+ -- sm = smallest maxN
+
+ go 1 = return ()
+ go m = do
+ -- let p = sm ! m
+ let p = smallest maxN ! m
+ modifyArray' countsArr p (+1)
+ go (m `div` p)
+
+
+counts :: Int -> [Int] -> UArray Int Int
+counts maxN ns = runSTUArray $ do
+ cs <- newArray (2,maxN) 0
+ for_ ns (factor cs maxN)
+ return cs
+
+solve :: [Int] -> Int
+solve = product . map (+ 1) . elems . counts 1000000
+
+main :: IO ()
+main =
+ print $ solve [1..100]
=====================================
testsuite/tests/perf/should_run/T25055.stdout
=====================================
@@ -0,0 +1 @@
+39001250856960000
=====================================
testsuite/tests/perf/should_run/all.T
=====================================
@@ -413,3 +413,4 @@ test('T21839r',
# perf doesn't regress further, so it is not marked as such.
test('T18964', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O'])
test('T23021', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O2'])
+test('T25055', [collect_stats('bytes allocated', 2), only_ways(['normal'])], compile_and_run, ['-O2'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8f09f5e501163e80271fcd1406d34569b8ea83f7
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8f09f5e501163e80271fcd1406d34569b8ea83f7
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240717/4268ad9b/attachment-0001.html>
More information about the ghc-commits
mailing list