[Git][ghc/ghc][master] Address #25055, by disabling case-of-runRW# in Gentle phase

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Wed Jul 24 16:23:41 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
de5d9852 by Simon Peyton Jones at 2024-07-24T12:23:22-04:00
Address #25055, by disabling case-of-runRW# in Gentle phase

See Note [Case-of-case and full laziness]
in GHC.Driver.Config.Core.Opt.Simplify

- - - - -


6 changed files:

- compiler/GHC/Core/Opt/Arity.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
- + testsuite/tests/perf/should_run/T25055.hs
- + testsuite/tests/perf/should_run/T25055.stdout
- testsuite/tests/perf/should_run/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/Arity.hs
=====================================
@@ -860,7 +860,7 @@ data ArityOpts = ArityOpts
 
 -- | The Arity returned is the number of value args the
 -- expression can be applied to without doing much work
-exprEtaExpandArity :: ArityOpts -> CoreExpr -> Maybe SafeArityType
+exprEtaExpandArity :: HasDebugCallStack => ArityOpts -> CoreExpr -> Maybe SafeArityType
 -- exprEtaExpandArity is used when eta expanding
 --      e  ==>  \xy -> e x y
 -- Nothing if the expression has arity 0


=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -2342,34 +2342,44 @@ rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
             (ApplyToVal { sc_arg = arg, sc_env = arg_se
                         , sc_cont = cont, sc_hole_ty = fun_ty })
   | fun_id `hasKey` runRWKey
-  , [ TyArg {}, TyArg {} ] <- rev_args
-  -- Do this even if (contIsStop cont)
+  , [ TyArg { as_arg_ty = hole_ty }, TyArg {} ] <- rev_args
+  -- Do this even if (contIsStop cont), or if seCaseCase is off.
   -- See Note [No eta-expansion in runRW#]
   = do { let arg_env = arg_se `setInScopeFromE` env
-             ty'   = contResultType cont
+
+             overall_res_ty  = contResultType cont
+             -- hole_ty is the type of the current runRW# application
+             (outer_cont, new_runrw_res_ty, inner_cont)
+                | seCaseCase env = (mkBoringStop overall_res_ty, overall_res_ty, cont)
+                | otherwise      = (cont, hole_ty, mkBoringStop hole_ty)
+                -- Only when case-of-case is on. See GHC.Driver.Config.Core.Opt.Simplify
+                --    Note [Case-of-case and full laziness]
 
        -- If the argument is a literal lambda already, take a short cut
-       -- This isn't just efficiency; if we don't do this we get a beta-redex
-       -- every time, so the simplifier keeps doing more iterations.
+       -- This isn't just efficiency:
+       --    * If we don't do this we get a beta-redex every time, so the
+       --      simplifier keeps doing more iterations.
+       --    * Even more important: see Note [No eta-expansion in runRW#]
        ; arg' <- case arg of
            Lam s body -> do { (env', s') <- simplBinder arg_env s
-                            ; body' <- simplExprC env' body cont
+                            ; body' <- simplExprC env' body inner_cont
                             ; return (Lam s' body') }
                             -- Important: do not try to eta-expand this lambda
                             -- See Note [No eta-expansion in runRW#]
+
            _ -> do { s' <- newId (fsLit "s") ManyTy realWorldStatePrimTy
                    ; let (m,_,_) = splitFunTy fun_ty
                          env'  = arg_env `addNewInScopeIds` [s']
                          cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s'
-                                            , sc_env = env', sc_cont = cont
-                                            , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy ty' }
+                                            , sc_env = env', sc_cont = inner_cont
+                                            , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy new_runrw_res_ty }
                                 -- cont' applies to s', then K
                    ; body' <- simplExprC env' arg cont'
                    ; return (Lam s' body') }
 
-       ; let rr'   = getRuntimeRep ty'
-             call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
-       ; return (emptyFloats env, call') }
+       ; let rr'   = getRuntimeRep new_runrw_res_ty
+             call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg new_runrw_res_ty, arg']
+       ; rebuild env call' outer_cont }
 
 ---------- Simplify value arguments --------------------
 rebuildCall env fun_info
@@ -2382,7 +2392,8 @@ rebuildCall env fun_info
 
   -- Strict arguments
   | isStrictArgInfo fun_info
-  , seCaseCase env
+  , seCaseCase env    -- Only when case-of-case is on. See GHC.Driver.Config.Core.Opt.Simplify
+                      --    Note [Case-of-case and full laziness]
   = -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
     simplExprF (arg_se `setInScopeFromE` env) arg
                (StrictArg { sc_fun = fun_info, sc_fun_ty = fun_ty
@@ -3195,7 +3206,9 @@ doCaseToLet scrut case_bndr
 --------------------------------------------------
 
 reallyRebuildCase env scrut case_bndr alts cont
-  | not (seCaseCase env)
+  | not (seCaseCase env)    -- Only when case-of-case is on.
+                            -- See GHC.Driver.Config.Core.Opt.Simplify
+                            --    Note [Case-of-case and full laziness]
   = do { case_expr <- simplAlts env scrut case_bndr alts
                                 (mkBoringStop (contHoleType cont))
        ; rebuild env case_expr cont }


=====================================
compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
=====================================
@@ -80,6 +80,7 @@ initGentleSimplMode :: DynFlags -> SimplMode
 initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle")
   { -- Don't do case-of-case transformations.
     -- This makes full laziness work better
+    -- See Note [Case-of-case and full laziness]
     sm_case_case = False
   }
 
@@ -89,3 +90,37 @@ floatEnable dflags =
     (True, True) -> FloatEnabled
     (True, False)-> FloatNestedOnly
     (False, _)   -> FloatDisabled
+
+
+{- Note [Case-of-case and full laziness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case-of-case can hide opportunities for let-floating (full laziness).
+For example
+   rec { f = \y. case (expensive x) of (a,b) -> blah }
+We might hope to float the (expensive x) out of the \y-loop.
+But if we inline `expensive` we might get
+   \y. case (case x of I# x' -> body) of (a,b) -> blah
+Now if we do case-of-case we get
+   \y. case x if I# x2 ->
+       case body of (a,b) -> blah
+
+Sadly, at this point `body` mentions `x2`, so we can't float it out of the
+\y-loop.
+
+Solution: don't do case-of-case in the "gentle" simplification phase that
+precedes the first float-out transformation.  Implementation:
+
+  * `sm_case_case` field in SimplMode
+
+  * Consult `sm_case_case` (via `seCaseCase`) before doing case-of-case
+    in GHC.Core.Opt.Simplify.Iteration.rebuildCall.
+
+Wrinkles
+
+* This applies equally to the case-of-runRW# transformation:
+    case (runRW# (\s. body)) of (a,b) -> blah
+    --->
+    runRW# (\s. case body of (a,b) -> blah)
+  Again, don't do this when `sm_case_case` is off.  See #25055 for
+  a motivating example.
+-}


=====================================
testsuite/tests/perf/should_run/T25055.hs
=====================================
@@ -0,0 +1,62 @@
+{-# OPTIONS_GHC -Wall  #-}
+-- based on https://byorgey.github.io/blog/posts/2024/06/21/cpih-product-divisors.html
+
+
+import Control.Monad
+import Control.Monad.ST
+import Data.Array.ST
+import Data.Array.Unboxed
+import Data.Foldable
+
+-- This repro code turned out to be delicate wrt integer overflow
+-- See comments in #25055
+-- So, for reproducibility we use Int32, to make sure the code works on
+--    32 bit machines with no overflow issues
+import GHC.Int
+
+smallest :: Int32 -> UArray Int32 Int32
+smallest maxN = runSTUArray $ do
+  arr <- newGenArray (2,maxN) initA
+  for_ [5, 7 .. maxN] $ \k -> do
+      k' <- readArray arr k
+      when (k == k') $ do
+        -- for type Int32 when k = 46349, k * k is negative
+        -- for_ [k*k, k*(k+2) .. maxN] $ \oddMultipleOfK -> do
+        for_ [k, k + 2 .. maxN] $ \oddMultipleOfK -> do
+          modifyArray' arr oddMultipleOfK (min k)
+  return arr
+    where
+      initA i
+        | even i          = return 2
+        | i `rem` 3 == 0  = return 3
+        | otherwise       = return i
+
+factor :: STUArray s Int32 Int32 -> Int32 -> Int32 -> ST s ()
+-- With #25055 the program ran slow as it appear below, but
+-- fast if you (a) comment out 'let p = smallest maxN ! m'
+--             (b) un-comment the commented-out bindings for p and sm
+factor countsArr maxN n  = go n
+  where
+    -- sm = smallest maxN
+
+    go 1 = return ()
+    go m = do
+      -- let p = sm ! m
+      let p = smallest maxN ! m
+      modifyArray' countsArr p (+1)
+      go (m `div` p)
+
+
+counts :: Int32 -> [Int32] ->  UArray Int32 Int32
+counts maxN ns  = runSTUArray $ do
+  cs <- newArray (2,maxN) 0
+  for_ ns (factor cs maxN)
+  return cs
+
+solve :: [Int32] -> Int32
+solve = product . map (+ 1) . elems . counts 1000000
+
+main :: IO ()
+main =
+  -- print $ maximum $ elems $ smallest 1000000
+  print $ solve [1..100]


=====================================
testsuite/tests/perf/should_run/T25055.stdout
=====================================
@@ -0,0 +1 @@
+1188495


=====================================
testsuite/tests/perf/should_run/all.T
=====================================
@@ -413,3 +413,4 @@ test('T21839r',
 # perf doesn't regress further, so it is not marked as such.
 test('T18964', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O'])
 test('T23021', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O2'])
+test('T25055', [collect_stats('bytes allocated', 2), only_ways(['normal'])], compile_and_run, ['-O2'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/de5d9852dbdd367611bf9e45e69c723d26351992

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/de5d9852dbdd367611bf9e45e69c723d26351992
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240724/5ccf7042/attachment-0001.html>


More information about the ghc-commits mailing list