[Git][ghc/ghc][master] Add flags for switching off speculative evaluation.

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu Jan 9 22:29:54 UTC 2025



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
23099752 by Luite Stegeman at 2025-01-08T00:33:33+01:00
Add flags for switching off speculative evaluation.

We found that speculative evaluation can increase the amount of
allocations in some circumstances. This patch adds new flags for
selectively disabling speculative evaluation, allowing us to
test the effect of the optimization.

The new flags are:

  -fspec-eval
     globally enable speculative evaluation

  -fspec-eval-dictfun
     enable speculative evaluation for dictionary functions (no effect
     if speculative evaluation is globally disabled)

The new flags are on by default for all optimisation levels.

See #25284

- - - - -


12 changed files:

- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Driver/Config/CoreToStg/Prep.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- docs/users_guide/using-optimisation.rst
- + testsuite/tests/core-to-stg/T25284/A.hs
- + testsuite/tests/core-to-stg/T25284/B.hs
- + testsuite/tests/core-to-stg/T25284/Cls.hs
- + testsuite/tests/core-to-stg/T25284/Main.hs
- + testsuite/tests/core-to-stg/T25284/T25284.stdout
- + testsuite/tests/core-to-stg/T25284/all.T


Changes:

=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -2051,6 +2051,16 @@ conceptually.
 See also Note [Floats and FloatDecision] for how we maintain whole groups of
 floats and how far they go.
 
+Note [Controlling Speculative Evaluation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most of the time, speculative evaluation has a positive effect on performance,
+but we have found a case where speculative evaluation of dictionary functions
+leads to a performance regression #25284.
+
+Therefore we have some flags to control it. See the optimization section in
+the User's Guide for the description of these flags and when to use them.
+
 Note [Floats and FloatDecision]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We have a special datatype `Floats` for modelling a telescope of `FloatingBind`
@@ -2275,7 +2285,15 @@ mkNonRecFloat env lev bndr rhs
       }
 
     is_hnf      = exprIsHNF rhs
-    ok_for_spec = exprOkForSpecEval (not . is_rec_call) rhs
+    cfg         = cpe_config env
+
+    ok_for_spec = exprOkForSpecEval call_ok_for_spec rhs
+    -- See Note [Controlling Speculative Evaluation]
+    call_ok_for_spec x
+      | is_rec_call x                           = False
+      | not (cp_specEval cfg)                   = False
+      | not (cp_specEvalDFun cfg) && isDFunId x = False
+      | otherwise                               = True
     is_rec_call = (`elemUnVarSet` cpe_rec_ids env)
 
     -- See Note [Pin evaluatedness on floats]
@@ -2517,6 +2535,11 @@ data CorePrepConfig = CorePrepConfig
   -- ^ Configuration for arity analysis ('exprEtaExpandArity').
   -- See Note [Eta expansion of arguments in CorePrep]
   -- When 'Nothing' (e.g., -O0, -O1), use the cheaper 'exprArity' instead
+  , cp_specEval                :: !Bool
+  -- ^ Whether to perform speculative evaluation
+  -- See Note [Controlling Speculative Evaluation]
+  , cp_specEvalDFun            :: !Bool
+  -- ^ Whether to perform speculative evaluation on DFuns
   }
 
 data CorePrepEnv


=====================================
compiler/GHC/Driver/Config/CoreToStg/Prep.hs
=====================================
@@ -24,6 +24,8 @@ initCorePrepConfig hsc_env = do
       , cp_arityOpts = if gopt Opt_DoCleverArgEtaExpansion dflags
                        then Just (initArityOpts dflags)
                        else Nothing
+      , cp_specEval  = gopt Opt_SpecEval dflags
+      , cp_specEvalDFun = gopt Opt_SpecEvalDictFun dflags
       }
 
 initCorePrepPgmConfig :: DynFlags -> [Var] -> CorePrepPgmConfig


=====================================
compiler/GHC/Driver/DynFlags.hs
=====================================
@@ -1287,6 +1287,8 @@ optLevelFlags -- see Note [Documenting optimisation flags]
 --   RegsGraph suffers performance regression. See #7679
 --  , ([2],     Opt_StaticArgumentTransformation)
 --   Static Argument Transformation needs investigation. See #9374
+    , ([0,1,2], Opt_SpecEval)
+    , ([0,1,2], Opt_SpecEvalDictFun)
     ]
 
 


=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -674,6 +674,9 @@ data GeneralFlag
    | Opt_NumConstantFolding
    | Opt_CoreConstantFolding
    | Opt_FastPAPCalls                  -- #6084
+   | Opt_SpecEval
+   | Opt_SpecEvalDictFun   -- See Note [Controlling Speculative Evaluation]
+
 
    -- Inference flags
    | Opt_DoTagInferenceChecks
@@ -912,6 +915,8 @@ optimisationFlags = EnumSet.fromList
    , Opt_WorkerWrapper
    , Opt_WorkerWrapperUnlift
    , Opt_SolveConstantDicts
+   , Opt_SpecEval
+   , Opt_SpecEvalDictFun
    ]
 
 -- | The set of flags which affect code generation and can change a program's


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2544,6 +2544,8 @@ fFlagsDeps = [
   flagSpec "num-constant-folding"             Opt_NumConstantFolding,
   flagSpec "core-constant-folding"            Opt_CoreConstantFolding,
   flagSpec "fast-pap-calls"                   Opt_FastPAPCalls,
+  flagSpec "spec-eval"                        Opt_SpecEval,
+  flagSpec "spec-eval-dictfun"                Opt_SpecEvalDictFun,
   flagSpec "cmm-control-flow"                 Opt_CmmControlFlow,
   flagSpec "show-warning-groups"              Opt_ShowWarnGroups,
   flagSpec "hide-source-paths"                Opt_HideSourcePaths,


=====================================
docs/users_guide/using-optimisation.rst
=====================================
@@ -405,6 +405,55 @@ as such you shouldn't need to set any of them explicitly. A flag
     intermediate language, where it is able to common up some subexpressions
     that differ in their types, but not their representation.
 
+.. ghc-flag:: -fspec-eval
+    :shortdesc: Enables speculative evaluation.
+    :type: dynamic
+    :category:
+    :reverse: -fno-spec-eval
+
+    :default: on
+    :since: 9.14.1
+
+    Enables speculative evaluation which usually results in fewer allocations.
+    Enabling speculative evaluation should not cause performance regressions.
+    If you encounter any, please open a ticket.
+
+    Note that disabling this flag will switch off speculative evaluation
+    completely, causing :ghc-flag:`-fspec-eval-dictfun` to have
+    no effect.
+
+.. ghc-flag:: -fspec-eval-dictfun
+    :shortdesc: Enables speculative evaluation of dictionary functions.
+    :type: dynamic
+    :category:
+    :reverse: -fno-spec-eval-dictfun
+
+    :default: on
+    :since: 9.14.1
+
+    Enables speculative (strict) evaluation of dictionary functions.
+
+    This is best explained with an example ::
+
+        instance C a => D a where ...
+
+        g :: D a => a -> Int
+        g x = ...
+
+        f :: C a => a -> Int
+        f x = g x
+
+    Function `f` has to pass a `D a` dictionary to `g`, and uses a dictionary
+    function `C a => D a` to compute it. If speculative evaluation for
+    dictionary functions is enabled, this dictionary is computed
+    strictly.
+
+    Speculative evalation of dictionary functions can lead to slightly better
+    performance, because a thunk is avoided. However, it results in unnecessary
+    computation and allocation if the dictionary goes unused. This causes
+    a significant increase in allocation if the dictionary is large.
+    See (:ghc-ticket:`25284`).
+
 .. ghc-flag:: -fdicts-cheap
     :shortdesc: Make dictionary-valued expressions seem cheap to the optimiser.
     :type: dynamic


=====================================
testsuite/tests/core-to-stg/T25284/A.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fspec-eval-dictfun #-}
+module A (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary strictly because of speculative evaluation
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b


=====================================
testsuite/tests/core-to-stg/T25284/B.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fno-spec-eval-dictfun #-}
+module B (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary lazily
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b


=====================================
testsuite/tests/core-to-stg/T25284/Cls.hs
=====================================
@@ -0,0 +1,40 @@
+{-# LANGUAGE UndecidableInstances #-}
+
+module Cls where
+
+class HasConst a where constVal :: a
+
+instance Cls.HasConst Word where constVal = 123
+
+instance Cls.HasConst Int where constVal = 456
+
+-- this class has a big dictionary
+class HasConst10 a where
+  constA :: a
+  constInt1 :: a -> Int
+  constInt1 _ = 1
+  constInt2 :: a -> Int
+  constInt2 _ = 2
+  constInt3 :: a -> Int
+  constInt3 _ = 3
+  constInt4 :: a -> Int
+  constInt4 _ = 4
+  constInt5 :: a -> Int
+  constInt5 _ = 5
+  constInt6 :: a -> Int
+  constInt6 _ = 6
+  constInt7 :: a -> Int
+  constInt7 _ = 7
+  constInt8 :: a -> Int
+  constInt8 _ = 8
+  constInt9 :: a -> Int
+  constInt9 _ = 9
+
+instance HasConst a => HasConst10 a where
+    constA = constVal
+
+-- this doesn't use the big dictionary most of the time
+printConst :: forall a. (Show a, HasConst10 a)
+           => a -> Int -> IO ()
+printConst x 5000  = print @a constA >> print (constInt8 x)
+printConst _  _    = pure ()


=====================================
testsuite/tests/core-to-stg/T25284/Main.hs
=====================================
@@ -0,0 +1,57 @@
+{-
+
+  This tests that speculative evaluation for dictionary functions works as
+  expected, with a large dictionary that goes unused.
+
+   - Module A: dictfun speculative evaluation enabled
+   - Module B: dictfun speculative evaluation disabled
+
+  Speculative evaluation causes the unused large dictionary to be allocated
+  strictly in module A, so we expect more allocations than in module B.
+
+ -}
+module Main where
+
+import qualified A
+import qualified B
+import qualified Cls
+
+import Data.Word
+import System.Mem (performGC)
+import GHC.Stats
+import Control.Monad
+
+{-# NOINLINE getAllocated #-}
+getAllocated :: IO Word64
+getAllocated = do
+  performGC
+  allocated_bytes <$> getRTSStats
+
+main :: IO ()
+main = do
+    -- warm up (just in case)
+    _       <- testMain A.testX
+    _       <- testMain B.testX
+
+    -- for real
+    a_alloc <- testMain A.testX
+    b_alloc <- testMain B.testX
+
+    -- expect B to allocate less than A
+    let alloc_ratio :: Double
+        alloc_ratio = fromIntegral b_alloc / fromIntegral a_alloc
+    putStrLn ("expected alloc: " ++ show (alloc_ratio < 0.7))
+
+iter :: (Int -> IO ()) -> Int -> Int -> IO ()
+iter m !i !j
+  | i < j = m i >> iter m (i+1) j
+  | otherwise = pure ()
+
+{-# NOINLINE testMain #-}
+testMain :: (forall b. (Show b, Cls.HasConst b) => b -> Int -> IO ())
+         -> IO Word64
+testMain f = do
+  alloc0 <- getAllocated
+  iter (\i -> f (0::Int) i >> f (0::Word) i) 1 100000
+  alloc1 <- getAllocated
+  pure (alloc1 - alloc0)


=====================================
testsuite/tests/core-to-stg/T25284/T25284.stdout
=====================================
@@ -0,0 +1,17 @@
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+expected alloc: True


=====================================
testsuite/tests/core-to-stg/T25284/all.T
=====================================
@@ -0,0 +1,6 @@
+test('T25284',
+  [js_skip, # allocation counters aren't available on the JS backend
+   extra_files(['Main.hs', 'A.hs', 'B.hs', 'Cls.hs']),
+   extra_run_opts('+RTS -T -RTS')],
+  multimod_compile_and_run,
+  ['Main', ''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2309975247543a4f77009ea5c3c7a8ebe06dc60b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/2309975247543a4f77009ea5c3c7a8ebe06dc60b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250109/7abc912c/attachment-0001.html>


More information about the ghc-commits mailing list