[Git][ghc/ghc][wip/21611-move-corem] 5 commits: Removed CoreDoNothing and CoreDoPasses
Dominik Peteler (@mmhat)
gitlab at gitlab.haskell.org
Sat Aug 13 00:07:05 UTC 2022
Dominik Peteler pushed to branch wip/21611-move-corem at Glasgow Haskell Compiler / GHC
Commits:
11c0b085 by Dominik Peteler at 2022-08-13T02:06:16+02:00
Removed CoreDoNothing and CoreDoPasses
Rewrote the getCoreToDo function using a Writer monad. This makes these
data constructors superfluous.
- - - - -
6507925f by Dominik Peteler at 2022-08-13T02:06:26+02:00
Renamed endPassIO to endPass
- - - - -
8de857c4 by Dominik Peteler at 2022-08-13T02:06:27+02:00
Renamed hscSimplify/hscSimplify' to optimizeCoreIO/optimizeCoreHsc
- - - - -
5f7bcd6d by Dominik Peteler at 2022-08-13T02:06:27+02:00
Run simplifyPgm in SimplCountM
- - - - -
dd0a2286 by Dominik Peteler at 2022-08-13T02:06:28+02:00
Added note on the architecture of the Core optimizer
- - - - -
12 changed files:
- compiler/GHC.hs
- compiler/GHC/Core/EndPass.hs
- compiler/GHC/Core/Lint.hs
- compiler/GHC/Core/Opt.hs
- compiler/GHC/Core/Opt/Config.hs
- compiler/GHC/Core/Opt/Simplify.hs
- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Driver/Config/Core/EndPass.hs
- compiler/GHC/Driver/Config/Core/Opt.hs
- compiler/GHC/Driver/Core/Opt.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/HsToCore.hs
Changes:
=====================================
compiler/GHC.hs
=====================================
@@ -1285,7 +1285,7 @@ compileCore simplify fn = do
hsc_env <- getSession
simpl_guts <- liftIO $ do
plugins <- readIORef (tcg_th_coreplugins tcg)
- hscSimplify hsc_env plugins mod_guts
+ optimizeCoreIO hsc_env plugins mod_guts
tidy_guts <- liftIO $ hscTidy hsc_env simpl_guts
return $ Left tidy_guts
else
=====================================
compiler/GHC/Core/EndPass.hs
=====================================
@@ -10,7 +10,7 @@ compilation pass that returns Core. Heavily leverages `GHC.Core.Lint`.
module GHC.Core.EndPass (
EndPassConfig (..),
- endPassIO,
+ endPass,
dumpPassResult
) where
@@ -57,12 +57,14 @@ data EndPassConfig = EndPassConfig
, ep_passDetails :: !SDoc
}
-endPassIO :: Logger
- -> EndPassConfig
- -> CoreProgram -> [CoreRule]
- -> IO ()
--- Used by the IO-is CorePrep too
-endPassIO logger cfg binds rules
+-- | Check the correctness of a Core program after running an optimization pass.
+-- Used by CorePrep too.
+-- See Note [The architecture of the Core optimizer].
+endPass :: Logger
+ -> EndPassConfig
+ -> CoreProgram -> [CoreRule]
+ -> IO ()
+endPass logger cfg binds rules
= do { dumpPassResult logger (ep_dumpCoreSizes cfg) (ep_printUnqual cfg) mb_flag
(renderWithContext defaultSDocContext (ep_prettyPass cfg))
(ep_passDetails cfg) binds rules
=====================================
compiler/GHC/Core/Lint.hs
=====================================
@@ -9,6 +9,7 @@
A ``lint'' pass to check for Core correctness.
See Note [Core Lint guarantee].
+See Note [The architecture of the Core optimizer].
-}
module GHC.Core.Lint (
=====================================
compiler/GHC/Core/Opt.hs
=====================================
@@ -1,7 +1,7 @@
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
-\section[SimplCore]{Driver for simplifying @Core@ programs}
+\section[GHC.Core.Opt]{Driver for optimizing @Core@ programs}
-}
{-# LANGUAGE CPP #-}
@@ -11,7 +11,7 @@ module GHC.Core.Opt ( CoreOptEnv (..), runCorePasses ) where
import GHC.Prelude
import GHC.Core
-import GHC.Core.EndPass ( EndPassConfig, endPassIO )
+import GHC.Core.EndPass ( EndPassConfig, endPass )
import GHC.Core.Opt.CSE ( cseProgram )
import GHC.Core.Ppr ( pprCoreBindings )
import GHC.Core.Lint ( LintAnnotationsConfig, DebugSetting(..), lintAnnots )
@@ -30,7 +30,7 @@ import GHC.Core.Opt.Simplify.Monad
import GHC.Core.Opt.SpecConstr ( specConstrProgram )
import GHC.Core.Opt.Specialise ( specProgram )
import GHC.Core.Opt.StaticArgs ( doStaticArgs )
-import GHC.Core.Opt.Stats ( SimplCountM, addCounts )
+import GHC.Core.Opt.Stats ( SimplCountM )
import GHC.Core.Opt.WorkWrap ( wwTopBinds )
import GHC.Core.LateCC ( addLateCostCentresMG )
import GHC.Core.Rules ( extendRuleBaseList, extendRuleEnv )
@@ -87,6 +87,7 @@ data CoreOptEnv = CoreOptEnv
-- creation of the '[CoreToDo]') happens in
-- 'GHC.Driver.Config.Core.Opt'. Then this function "executes" that
-- plan.
+-- See Note [The architecture of the Core optimizer].
runCorePasses :: CoreOptEnv
-> [CoreToDo]
-> ModGuts
@@ -95,8 +96,6 @@ runCorePasses env passes guts
= foldM do_pass guts passes
where
do_pass :: ModGuts -> CoreToDo -> SimplCountM ModGuts
- do_pass res CoreDoNothing = return res
- do_pass guts (CoreDoPasses ps) = runCorePasses env ps guts
do_pass guts pass = do
let end_pass_cfg = co_endPassCfg env pass
let lint_anno_cfg = co_lintAnnotationsCfg env pass
@@ -106,7 +105,7 @@ runCorePasses env passes guts
withTiming (co_logger env) (ppr pass <+> brackets (ppr this_mod)) (const ()) $ do
guts' <- lintAnnots (co_logger env) lint_anno_cfg doCorePassWithoutDebug guts
- liftIO $ endPassIO (co_logger env) end_pass_cfg (mg_binds guts') (mg_rules guts')
+ liftIO $ endPass (co_logger env) end_pass_cfg (mg_binds guts') (mg_rules guts')
return guts'
this_mod = mg_module guts
@@ -128,10 +127,8 @@ doCorePass env pass guts = do
let !read_ruleenv = readRuleEnv env guts
case pass of
- CoreDoSimplify opts -> {-# SCC "Simplify" #-} do
- (guts', sc) <- liftIO $ simplifyPgm (co_logger env) read_ruleenv (co_unitEnv env) opts guts
- addCounts sc
- return guts'
+ CoreDoSimplify opts -> {-# SCC "Simplify" #-}
+ simplifyPgm (co_logger env) read_ruleenv (co_unitEnv env) opts guts
CoreCSE -> {-# SCC "CommonSubExpr" #-}
updateBinds cseProgram
@@ -183,10 +180,6 @@ doCorePass env pass guts = do
CoreDoRuleCheck opts -> {-# SCC "RuleCheck" #-}
liftIO $ ruleCheckPass (co_logger env) opts (co_hptRuleBase env) (co_visOrphans env) guts
- CoreDoNothing -> return guts
-
- CoreDoPasses passes -> runCorePasses env passes guts
-
CoreDoPluginPass _ p -> {-# SCC "Plugin" #-}
co_liftCoreM env (co_debugSetting env) guts $ p guts
where
=====================================
compiler/GHC/Core/Opt/Config.hs
=====================================
@@ -34,10 +34,10 @@ import GHC.Utils.Outputable as Outputable
-- | A description of the plugin pass itself
type CorePluginPass = ModGuts -> CoreM ModGuts
-data CoreToDo -- These are diff core-to-core passes,
- -- which may be invoked in any order,
- -- as many times as you like.
-
+-- | These are diff core-to-core passes, which may be invoked in any order, as
+-- many times as you like.
+-- See Note [The architecture of the Core optimizer].
+data CoreToDo
= -- | The core-to-core simplifier.
CoreDoSimplify !SimplifyOpts
| CoreDoPluginPass String CorePluginPass
@@ -55,11 +55,6 @@ data CoreToDo -- These are diff core-to-core passes,
| CoreDoSpecConstr !SpecConstrOpts
| CoreCSE
| CoreDoRuleCheck !RuleCheckOpts
- | -- | Useful when building up
- CoreDoNothing
- | -- | lists of these things
- CoreDoPasses [CoreToDo]
-
| CoreAddCallerCcs !CallerCCOpts
| CoreAddLateCcs !Bool -- ^ '-fprof-count-entries'
@@ -82,8 +77,6 @@ instance Outputable CoreToDo where
ppr (CoreAddLateCcs _) = text "Add late core cost-centres"
ppr CoreDoPrintCore = text "Print core"
ppr (CoreDoRuleCheck {}) = text "Rule check"
- ppr CoreDoNothing = text "CoreDoNothing"
- ppr (CoreDoPasses passes) = text "CoreDoPasses" <+> ppr passes
pprPassDetails :: CoreToDo -> SDoc
pprPassDetails (CoreDoSimplify cfg) = vcat [ text "Max iterations =" <+> int n
=====================================
compiler/GHC/Core/Opt/Simplify.hs
=====================================
@@ -21,7 +21,7 @@ import GHC.Core.Opt.Simplify.Iteration ( simplTopBinds, simplExpr, simplImpRules
import GHC.Core.Opt.Simplify.Utils ( activeRule, activeUnfolding )
import GHC.Core.Opt.Simplify.Env
import GHC.Core.Opt.Simplify.Monad
-import GHC.Core.Opt.Stats ( simplCountN )
+import GHC.Core.Opt.Stats ( SimplCountM, addCounts, simplCountN )
import GHC.Core.FamInstEnv
import GHC.Utils.Error ( withTiming )
@@ -44,6 +44,7 @@ import GHC.Types.Unique.FM
import GHC.Types.Name.Ppr
import Control.Monad
+import Control.Monad.IO.Class ( liftIO )
import Data.Foldable ( for_ )
#if __GLASGOW_HASKELL__ <= 810
@@ -144,9 +145,20 @@ simplifyPgm :: Logger
-> UnitEnv
-> SimplifyOpts
-> ModGuts
- -> IO (ModGuts, SimplCount) -- New bindings
-
-simplifyPgm logger read_ruleenv unit_env opts
+ -> SimplCountM ModGuts -- New bindings
+simplifyPgm logger read_ruleenv unit_env opts guts = do
+ (nguts, sc) <- liftIO $ simplifyPgmIO logger read_ruleenv unit_env opts guts
+ addCounts sc
+ return nguts
+
+simplifyPgmIO :: Logger
+ -> IO RuleEnv -- ^ Action to get the current RuleEnv
+ -> UnitEnv
+ -> SimplifyOpts
+ -> ModGuts
+ -> IO (ModGuts, SimplCount) -- New bindings
+
+simplifyPgmIO logger read_ruleenv unit_env opts
guts@(ModGuts { mg_module = this_mod
, mg_rdr_env = rdr_env
, mg_binds = binds, mg_rules = rules
=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -32,7 +32,7 @@ import GHC.Builtin.Types
import GHC.Core.Utils
import GHC.Core.Opt.Arity
-import GHC.Core.EndPass ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass ( EndPassConfig(..), endPass )
import GHC.Core
import GHC.Core.Make hiding( FloatBind(..) ) -- We use our own FloatBind here
import GHC.Core.Type
@@ -258,8 +258,7 @@ corePrepPgm logger cp_cfg pgm_cfg
floats2 <- corePrepTopBinds initialCorePrepEnv implicit_binds
return (deFloatTop (floats1 `appendFloats` floats2))
- endPassIO logger (cpPgm_endPassConfig pgm_cfg)
- binds_out []
+ endPass logger (cpPgm_endPassConfig pgm_cfg) binds_out []
return binds_out
corePrepExpr :: Logger -> CorePrepConfig -> CoreExpr -> IO CoreExpr
=====================================
compiler/GHC/Driver/Config/Core/EndPass.hs
=====================================
@@ -46,5 +46,3 @@ coreDumpFlag (CoreAddCallerCcs {}) = Nothing
coreDumpFlag (CoreAddLateCcs {}) = Nothing
coreDumpFlag CoreDoPrintCore = Nothing
coreDumpFlag (CoreDoRuleCheck {}) = Nothing
-coreDumpFlag CoreDoNothing = Nothing
-coreDumpFlag (CoreDoPasses {}) = Nothing
=====================================
compiler/GHC/Driver/Config/Core/Opt.hs
=====================================
@@ -1,7 +1,7 @@
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
-\section[SimplCore]{Configuration of the driver for simplifying @Core@ programs}
+\section[GHC.Driver.Config.Core.Opt]{Configuration of the driver for optimizing @Core@ programs}
-}
{-# LANGUAGE CPP #-}
@@ -29,6 +29,10 @@ import GHC.Types.Var ( Var )
import qualified GHC.LanguageExtensions as LangExt
+import Control.Monad
+import Control.Monad.Trans.Writer.Strict ( Writer, execWriter, tell )
+import Data.Foldable
+
{-
************************************************************************
* *
@@ -37,9 +41,187 @@ import qualified GHC.LanguageExtensions as LangExt
************************************************************************
-}
+-- | Construct the main optimisation pipeline from the driver's session state.
+-- See Note [The architecture of the Core optimizer].
getCoreToDo :: DynFlags -> [Var] -> [CoreToDo]
-getCoreToDo dflags extra_vars
- = flatten_todos core_todo
+getCoreToDo dflags extra_vars = execWriter $ do
+ -- We want to do the static argument transform before full laziness as it
+ -- may expose extra opportunities to float things outwards. However, to fix
+ -- up the output of the transformation we need at do at least one simplify
+ -- after this before anything else
+ when static_args $ do
+ simpl_gently
+ enqueue CoreDoStaticArgs
+
+ -- initial simplify: make specialiser happy: minimum effort please
+ when do_presimplify $
+ simpl_gently
+
+ -- Specialisation is best done before full laziness
+ -- so that overloaded functions have all their dictionary lambdas manifest
+ when do_specialise $
+ enqueue $ coreDoSpecialising dflags
+
+ if full_laziness then
+ -- Was: gentleFloatOutSwitches
+ --
+ -- I have no idea why, but not floating constants to
+ -- top level is very bad in some cases.
+ --
+ -- Notably: p_ident in spectral/rewrite
+ -- Changing from "gentle" to "constantsOnly"
+ -- improved rewrite's allocation by 19%, and
+ -- made 0.0% difference to any other nofib
+ -- benchmark
+ --
+ -- Not doing floatOutOverSatApps yet, we'll do
+ -- that later on when we've had a chance to get more
+ -- accurate arity information. In fact it makes no
+ -- difference at all to performance if we do it here,
+ -- but maybe we save some unnecessary to-and-fro in
+ -- the simplifier.
+ enqueue $ CoreDoFloatOutwards FloatOutSwitches
+ { floatOutLambdas = Just 0
+ , floatOutConstants = True
+ , floatOutOverSatApps = False
+ , floatToTopLevelOnly = False
+ }
+
+ else
+ -- Even with full laziness turned off, we still need to float static
+ -- forms to the top level. See Note [Grand plan for static forms] in
+ -- GHC.Iface.Tidy.StaticPtrTable.
+ --
+ when static_ptrs $ do
+ -- Float Out can't handle type lets (sometimes created
+ -- by simpleOptPgm via mkParallelBindings)
+ simpl_gently
+ -- Static forms are moved to the top level with the FloatOut pass.
+ -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
+ enqueue $ CoreDoFloatOutwards FloatOutSwitches
+ { floatOutLambdas = Just 0
+ , floatOutConstants = True
+ , floatOutOverSatApps = False
+ , floatToTopLevelOnly = True
+ }
+
+ -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
+ when do_simpl3 $ do
+ for_ [phases, phases-1 .. 1] $ \phase ->
+ simpl_phase (Phase phase) "main" max_iter
+
+ -- Phase 0: allow all Ids to be inlined now
+ -- This gets foldr inlined before strictness analysis
+
+ -- At least 3 iterations because otherwise we land up with
+ -- huge dead expressions because of an infelicity in the
+ -- simplifier.
+ -- let k = BIG in foldr k z xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
+ -- Don't stop now!
+ simpl_phase (Phase 0) "main" (max max_iter 3)
+
+ -- Run float-inwards immediately before the strictness analyser
+ -- Doing so pushes bindings nearer their use site and hence makes
+ -- them more likely to be strict. These bindings might only show
+ -- up after the inlining from simplification. Example in fulsom,
+ -- Csg.calc, where an arg of timesDouble thereby becomes strict.
+ when do_float_in $
+ enqueue $ CoreDoFloatInwards platform
+
+ when call_arity $ do
+ enqueue CoreDoCallArity
+ simplify "post-call-arity"
+
+ -- Strictness analysis
+ when strictness $ do
+ dmd_cpr_ww
+ simplify "post-worker-wrapper"
+
+ -- See Note [Placement of the exitification pass]
+ when exitification $
+ enqueue CoreDoExitify
+
+ when full_laziness $
+ enqueue $ CoreDoFloatOutwards FloatOutSwitches
+ { floatOutLambdas = floatLamArgs dflags
+ , floatOutConstants = True
+ , floatOutOverSatApps = True
+ , floatToTopLevelOnly = False
+ }
+ -- nofib/spectral/hartel/wang doubles in speed if you
+ -- do full laziness late in the day. It only happens
+ -- after fusion and other stuff, so the early pass doesn't
+ -- catch it. For the record, the redex is
+ -- f_el22 (f_el21 r_midblock)
+
+ -- We want CSE to follow the final full-laziness pass, because it may
+ -- succeed in commoning up things floated out by full laziness.
+ -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
+ when cse $
+ enqueue CoreCSE
+
+ when do_float_in $
+ enqueue $ CoreDoFloatInwards platform
+
+ -- Final tidy-up
+ simplify "final"
+
+ maybe_rule_check FinalPhase
+
+ -------- After this we have -O2 passes -----------------
+ -- None of them run with -O
+
+ -- Case-liberation for -O2. This should be after
+ -- strictness analysis and the simplification which follows it.
+ when liberate_case $ do
+ enqueue $ CoreLiberateCase (initLiberateCaseOpts dflags)
+ -- Run the simplifier after LiberateCase to vastly
+ -- reduce the possibility of shadowing
+ -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
+ simplify "post-liberate-case"
+
+ when spec_constr $ do
+ enqueue $ CoreDoSpecConstr (initSpecConstrOpts dflags)
+ -- See Note [Simplify after SpecConstr]
+ simplify "post-spec-constr"
+
+ maybe_rule_check FinalPhase
+
+ when late_specialise $ do
+ enqueue $ coreDoSpecialising dflags
+ simplify "post-late-spec"
+
+ -- LiberateCase can yield new CSE opportunities because it peels
+ -- off one layer of a recursive function (concretely, I saw this
+ -- in wheel-sieve1), and I'm guessing that SpecConstr can too
+ -- And CSE is a very cheap pass. So it seems worth doing here.
+ when ((liberate_case || spec_constr) && cse) $ do
+ enqueue CoreCSE
+ simplify "post-final-cse"
+
+ --------- End of -O2 passes --------------
+
+ when late_dmd_anal $ do
+ dmd_cpr_ww
+ simplify "post-late-ww"
+
+ -- Final run of the demand_analyser, ensures that one-shot thunks are
+ -- really really one-shot thunks. Only needed if the demand analyser
+ -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal
+ -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
+ -- can become /exponentially/ more expensive. See #11731, #12996.
+ when (strictness || late_dmd_anal) $
+ enqueue $ coreDoDemand dflags
+
+ maybe_rule_check FinalPhase
+
+ when profiling $ do
+ when (not (null $ callerCcFilters dflags)) $
+ enqueue $ CoreAddCallerCcs (initCallerCCOpts dflags)
+ when (gopt Opt_ProfLateInlineCcs dflags) $
+ enqueue $ CoreAddLateCcs (gopt Opt_ProfCountEntries dflags)
where
phases = simplPhases dflags
max_iter = maxSimplIterations dflags
@@ -66,228 +248,39 @@ getCoreToDo dflags extra_vars
do_presimplify = do_specialise -- TODO: any other optimizations benefit from pre-simplification?
do_simpl3 = const_fold || rules_on -- TODO: any other optimizations benefit from three-phase simplification?
- maybe_rule_check phase = runMaybe rule_check $
- CoreDoRuleCheck . initRuleCheckOpts dflags phase
+ maybe_rule_check phase = for_ rule_check $
+ enqueue . CoreDoRuleCheck . initRuleCheckOpts dflags phase
maybe_strictness_before (Phase phase)
- | phase `elem` strictnessBefore dflags = coreDoDemand dflags
- maybe_strictness_before _
- = CoreDoNothing
+ | phase `elem` strictnessBefore dflags = enqueue $ coreDoDemand dflags
+ maybe_strictness_before _ = return ()
- simpl_phase phase name iter
- = CoreDoPasses
- $ [ maybe_strictness_before phase
- , CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
- (initSimplMode dflags phase name)
- , maybe_rule_check phase ]
+ simpl_phase phase name iter = do
+ maybe_strictness_before phase
+ enqueue $ CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
+ (initSimplMode dflags phase name)
+ maybe_rule_check phase
-- Run GHC's internal simplification phase, after all rules have run.
-- See Note [Compiler phases] in GHC.Types.Basic
simplify name = simpl_phase FinalPhase name max_iter
- -- initial simplify: mk specialiser happy: minimum effort please
+ -- initial simplify: make specialiser happy: minimum effort please
-- See Note [Inline in InitialPhase]
-- See Note [RULEs enabled in InitialPhase]
- simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter
- (initGentleSimplMode dflags)
-
- dmd_cpr_ww = [coreDoDemand dflags, CoreDoCpr] ++
- if ww_on then [CoreDoWorkerWrapper (initWorkWrapOpts dflags)]
- else []
-
-
- demand_analyser = (CoreDoPasses (
- dmd_cpr_ww ++
- [simplify "post-worker-wrapper"]
- ))
-
- -- Static forms are moved to the top level with the FloatOut pass.
- -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
- static_ptrs_float_outwards =
- runWhen static_ptrs $ CoreDoPasses
- [ simpl_gently -- Float Out can't handle type lets (sometimes created
- -- by simpleOptPgm via mkParallelBindings)
- , CoreDoFloatOutwards FloatOutSwitches
- { floatOutLambdas = Just 0
- , floatOutConstants = True
- , floatOutOverSatApps = False
- , floatToTopLevelOnly = True
- }
- ]
-
- add_caller_ccs =
- runWhen (profiling && not (null $ callerCcFilters dflags)) $
- CoreAddCallerCcs (initCallerCCOpts dflags)
-
- add_late_ccs =
- runWhen (profiling && gopt Opt_ProfLateInlineCcs dflags) $
- CoreAddLateCcs (gopt Opt_ProfCountEntries dflags)
-
- core_todo =
- [
- -- We want to do the static argument transform before full laziness as it
- -- may expose extra opportunities to float things outwards. However, to fix
- -- up the output of the transformation we need at do at least one simplify
- -- after this before anything else
- runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
-
- -- initial simplify: mk specialiser happy: minimum effort please
- runWhen do_presimplify simpl_gently,
-
- -- Specialisation is best done before full laziness
- -- so that overloaded functions have all their dictionary lambdas manifest
- runWhen do_specialise $ coreDoSpecialising dflags,
-
- if full_laziness then
- CoreDoFloatOutwards FloatOutSwitches {
- floatOutLambdas = Just 0,
- floatOutConstants = True,
- floatOutOverSatApps = False,
- floatToTopLevelOnly = False }
- -- Was: gentleFloatOutSwitches
- --
- -- I have no idea why, but not floating constants to
- -- top level is very bad in some cases.
- --
- -- Notably: p_ident in spectral/rewrite
- -- Changing from "gentle" to "constantsOnly"
- -- improved rewrite's allocation by 19%, and
- -- made 0.0% difference to any other nofib
- -- benchmark
- --
- -- Not doing floatOutOverSatApps yet, we'll do
- -- that later on when we've had a chance to get more
- -- accurate arity information. In fact it makes no
- -- difference at all to performance if we do it here,
- -- but maybe we save some unnecessary to-and-fro in
- -- the simplifier.
- else
- -- Even with full laziness turned off, we still need to float static
- -- forms to the top level. See Note [Grand plan for static forms] in
- -- GHC.Iface.Tidy.StaticPtrTable.
- static_ptrs_float_outwards,
-
- -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
- runWhen do_simpl3
- (CoreDoPasses $ [ simpl_phase (Phase phase) "main" max_iter
- | phase <- [phases, phases-1 .. 1] ] ++
- [ simpl_phase (Phase 0) "main" (max max_iter 3) ]),
- -- Phase 0: allow all Ids to be inlined now
- -- This gets foldr inlined before strictness analysis
-
- -- At least 3 iterations because otherwise we land up with
- -- huge dead expressions because of an infelicity in the
- -- simplifier.
- -- let k = BIG in foldr k z xs
- -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
- -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
- -- Don't stop now!
-
- runWhen do_float_in (CoreDoFloatInwards platform),
- -- Run float-inwards immediately before the strictness analyser
- -- Doing so pushes bindings nearer their use site and hence makes
- -- them more likely to be strict. These bindings might only show
- -- up after the inlining from simplification. Example in fulsom,
- -- Csg.calc, where an arg of timesDouble thereby becomes strict.
-
- runWhen call_arity $ CoreDoPasses
- [ CoreDoCallArity
- , simplify "post-call-arity"
- ],
-
- -- Strictness analysis
- runWhen strictness demand_analyser,
-
- runWhen exitification CoreDoExitify,
- -- See Note [Placement of the exitification pass]
-
- runWhen full_laziness $
- CoreDoFloatOutwards FloatOutSwitches {
- floatOutLambdas = floatLamArgs dflags,
- floatOutConstants = True,
- floatOutOverSatApps = True,
- floatToTopLevelOnly = False },
- -- nofib/spectral/hartel/wang doubles in speed if you
- -- do full laziness late in the day. It only happens
- -- after fusion and other stuff, so the early pass doesn't
- -- catch it. For the record, the redex is
- -- f_el22 (f_el21 r_midblock)
-
-
- runWhen cse CoreCSE,
- -- We want CSE to follow the final full-laziness pass, because it may
- -- succeed in commoning up things floated out by full laziness.
- -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
-
- runWhen do_float_in (CoreDoFloatInwards platform),
-
- simplify "final", -- Final tidy-up
-
- maybe_rule_check FinalPhase,
-
- -------- After this we have -O2 passes -----------------
- -- None of them run with -O
-
- -- Case-liberation for -O2. This should be after
- -- strictness analysis and the simplification which follows it.
- runWhen liberate_case $ CoreDoPasses
- [ CoreLiberateCase (initLiberateCaseOpts dflags)
- , simplify "post-liberate-case" ],
- -- Run the simplifier after LiberateCase to vastly
- -- reduce the possibility of shadowing
- -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
-
- runWhen spec_constr $ CoreDoPasses
- [ CoreDoSpecConstr (initSpecConstrOpts dflags)
- , simplify "post-spec-constr"],
- -- See Note [Simplify after SpecConstr]
-
- maybe_rule_check FinalPhase,
-
- runWhen late_specialise $ CoreDoPasses
- [ coreDoSpecialising dflags, simplify "post-late-spec"],
-
- -- LiberateCase can yield new CSE opportunities because it peels
- -- off one layer of a recursive function (concretely, I saw this
- -- in wheel-sieve1), and I'm guessing that SpecConstr can too
- -- And CSE is a very cheap pass. So it seems worth doing here.
- runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses
- [ CoreCSE, simplify "post-final-cse" ],
-
- --------- End of -O2 passes --------------
-
- runWhen late_dmd_anal $ CoreDoPasses (
- dmd_cpr_ww ++ [simplify "post-late-ww"]
- ),
-
- -- Final run of the demand_analyser, ensures that one-shot thunks are
- -- really really one-shot thunks. Only needed if the demand analyser
- -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal
- -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
- -- can become /exponentially/ more expensive. See #11731, #12996.
- runWhen (strictness || late_dmd_anal) $ coreDoDemand dflags,
-
- maybe_rule_check FinalPhase,
-
- add_caller_ccs,
- add_late_ccs
- ]
-
- -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
- flatten_todos [] = []
- flatten_todos (CoreDoNothing : rest) = flatten_todos rest
- flatten_todos (CoreDoPasses passes : rest) =
- flatten_todos passes ++ flatten_todos rest
- flatten_todos (todo : rest) = todo : flatten_todos rest
-
--- The core-to-core pass ordering is derived from the DynFlags:
-runWhen :: Bool -> CoreToDo -> CoreToDo
-runWhen True do_this = do_this
-runWhen False _ = CoreDoNothing
-
-runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo
-runMaybe (Just x) f = f x
-runMaybe Nothing _ = CoreDoNothing
+ simpl_gently = enqueue $ CoreDoSimplify $
+ initSimplifyOpts dflags extra_vars max_iter (initGentleSimplMode dflags)
+
+ dmd_cpr_ww = do
+ enqueue $ coreDoDemand dflags
+ enqueue CoreDoCpr
+ when ww_on $
+ enqueue $ CoreDoWorkerWrapper (initWorkWrapOpts dflags)
+
+
+
+enqueue :: CoreToDo -> Writer [CoreToDo] ()
+enqueue pass = tell [pass]
coreDoDemand :: DynFlags -> CoreToDo
coreDoDemand dflags = CoreDoDemand $ initDmdAnalOpts dflags
@@ -295,7 +288,7 @@ coreDoDemand dflags = CoreDoDemand $ initDmdAnalOpts dflags
coreDoSpecialising :: DynFlags -> CoreToDo
coreDoSpecialising dflags = CoreDoSpecialising (initSpecialiseOpts dflags simplMask)
--- TODO DEDUp!!!!
+-- TODO: Deduplication
simplMask :: Char
simplMask = 's'
=====================================
compiler/GHC/Driver/Core/Opt.hs
=====================================
@@ -1,4 +1,4 @@
-module GHC.Driver.Core.Opt ( hscSimplify, hscSimplify' ) where
+module GHC.Driver.Core.Opt ( optimizeCoreHsc, optimizeCoreIO ) where
import GHC.Prelude
@@ -34,27 +34,27 @@ import GHC.Utils.Logger as Logger
import Control.Monad.IO.Class
--------------------------------------------------------------
--- Simplifiers
+-- Core optimization entrypoints
--------------------------------------------------------------
--- | Run Core2Core simplifier. The list of String is a list of (Core) plugin
+-- | Run Core optimizer. The list of String is a list of (Core) plugin
-- module names added via TH (cf 'addCorePlugin').
-hscSimplify :: HscEnv -> [String] -> ModGuts -> IO ModGuts
-hscSimplify hsc_env plugins modguts =
- runHsc hsc_env $ hscSimplify' plugins modguts
-
--- | Run Core2Core simplifier. The list of String is a list of (Core) plugin
--- module names added via TH (cf 'addCorePlugin').
-hscSimplify' :: [String] -> ModGuts -> Hsc ModGuts
-hscSimplify' plugins ds_result = do
- hsc_env <- getHscEnv
+optimizeCoreIO :: HscEnv -> [String] -> ModGuts -> IO ModGuts
+optimizeCoreIO hsc_env plugins guts = do
hsc_env_with_plugins <- if null plugins -- fast path
then return hsc_env
- else liftIO $ initializePlugins
- $ hscUpdateFlags (\dflags -> foldr addPluginModuleName dflags plugins)
- hsc_env
+ else initializePlugins
+ $ hscUpdateFlags (\dflags -> foldr addPluginModuleName dflags plugins)
+ hsc_env
{-# SCC "Core2Core" #-}
- liftIO $ core2core hsc_env_with_plugins ds_result
+ core2core hsc_env_with_plugins guts
+
+-- | Run Core optimizer. The list of String is a list of (Core) plugin
+-- module names added via TH (cf 'addCorePlugin').
+optimizeCoreHsc :: [String] -> ModGuts -> Hsc ModGuts
+optimizeCoreHsc plugins guts = do
+ hsc_env <- getHscEnv
+ liftIO $ optimizeCoreIO hsc_env plugins guts
core2core :: HscEnv -> ModGuts -> IO ModGuts
core2core hsc_env guts@(ModGuts { mg_module = mod
@@ -84,11 +84,6 @@ core2core hsc_env guts@(ModGuts { mg_module = mod
, gwib_isBoot = NotBoot })
hpt_rule_base = mkRuleBase home_pkg_rules
- -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
- -- This is very convienent for the users of the monad (e.g. plugins do not have to
- -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
- -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
- -- would mean our cached value would go out of date.
env = CoreOptEnv
{ co_logger = logger
, co_debugSetting = InheritDebugLevel
@@ -111,6 +106,11 @@ liftCoreMToSimplCountM hsc_env debug_settings guts m = do
return a
where
mod = mg_module guts
+ -- mod: get the module out of the ModGuts so we can retrieve it from the monad.
+ -- This is very convienent for the users of the monad (e.g. plugins do not have to
+ -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
+ -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
+ -- would mean our cached value would go out of date.
loc = mg_loc guts
orph_mods = mkModuleSet (mod : dep_orphs (mg_deps guts))
gwib = GWIB { gwib_mod = moduleName mod, gwib_isBoot = NotBoot }
@@ -122,3 +122,56 @@ liftCoreMToSimplCountM hsc_env debug_settings guts m = do
NoDebugging -> let
dflags' = (hsc_dflags hsc_env) { debugLevel = 0 }
in hsc_env { hsc_dflags = dflags' }
+
+{-
+Note [The architecture of the Core optimizer]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Conceptually the Core optimizer consists of two stages:
+
+ 1. The planning stage.
+ 2. The execution stage.
+
+This division is mirrored in the interface of the different optimizations. For
+each of those optimzations we have
+
+ 1. a configuration record bundeling the options for a particular optimization
+ pass.
+ 2. an initialization function used to obtain such a configuration from
+ `DynFlags`. This function typically lives in a module named after the pass
+ in the `GHC.Driver.Config.Core.Opt` namespace and is used in the planning
+ stage.
+ 3. the actual optimization pass itself, with an entrypoint that takes the
+ configuration of the pass along with the execution context as arguments.
+ This entrypoint is called in the execution stage.
+
+The plan that is the result of the first stage is constructed by the
+`getCoreToDo` function found in the `GHC.Driver.Config.Core.Opt` module. This
+function determines the sequence of optimization passes run on the module in
+question and derives the configuration for each pass from the session's state
+(`DynFlags`) using the aforementioned initialization functions. The `CoreToDo`
+type that is finally used to wrap this configuration value is a sum type
+enumerating all the optimizations available in GHC.
+
+The entrypoint of the second stage are the `optimizeCore*` functions found in
+GHC.Driver.Core.Opt. These functions is part of the Application Layer and
+utilize the `runCorePasses` function from `GHC.Core.Opt` which is the
+counterpart of these functions in the Domain Layer. In other words, while the
+`optimizeCore*` know about `HscEnv` and are therefore bound to a concrete
+driver, `runCorePasses` is more independent as it is a component of its own.
+
+`runCorePasses` is essentially an interpreter for the `CoreToDo`s constructed in
+the planning phase. It calls the entrypoints of the passes with their respective
+configurations as arguments as well as some execution context like the unit
+environment, the rules and the type family instance in scope, and most notably
+the module we wish to compile (`ModGuts`).
+
+A similar split in functionality is done for the Core Linting: After each pass
+we may check the sanity of the resulting Core running a so-called EndPass check.
+The entrypoint for this check is the `endPass` function found in
+GHC.Core.EndPass. It comes as well with a configuration record and a
+corresponding initialization function for it in GHC.Driver.Core.EndPass. The
+definition of what actually is a correct Core program is defined by the linting
+functions in GHC.Core.Lint. These are used by the EndPass to check the program.
+
+-}
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -59,7 +59,7 @@ module GHC.Driver.Main
, hscTypecheckAndGetWarnings
, hscDesugar
, makeSimpleDetails
- , hscSimplify -- ToDo, shouldn't really export this
+ , optimizeCoreIO -- TODO: shouldn't really export this
, hscDesugarAndSimplify
-- * Safe Haskell
@@ -88,7 +88,7 @@ module GHC.Driver.Main
, hscCompileCoreExpr'
-- We want to make sure that we export enough to be able to redefine
-- hsc_typecheck in client code
- , hscParse', hscSimplify', hscDesugar', tcRnModule', doCodeGen
+ , hscParse', optimizeCoreHsc, hscDesugar', tcRnModule', doCodeGen
, getHscEnv
, hscSimpleIface'
, oneShotMsg
@@ -123,7 +123,7 @@ import GHC.Driver.Config.Stg.Ppr (initStgPprOpts)
import GHC.Driver.Config.Stg.Pipeline (initStgPipelineOpts)
import GHC.Driver.Config.StgToCmm (initStgToCmmConfig)
import GHC.Driver.Config.Cmm (initCmmConfig)
-import GHC.Driver.Core.Opt ( hscSimplify, hscSimplify' )
+import GHC.Driver.Core.Opt ( optimizeCoreHsc, optimizeCoreIO )
import GHC.Driver.Config.Diagnostic
import GHC.Driver.Config.Tidy
import GHC.Driver.Hooks
@@ -158,7 +158,7 @@ import GHC.Iface.Ext.Binary ( readHieFile, writeHieFile , hie_file_result)
import GHC.Iface.Ext.Debug ( diffFile, validateScopes )
import GHC.Core
-import GHC.Core.EndPass ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass ( EndPassConfig(..), endPass )
import GHC.Core.Lint ( LintFlags(..), StaticPtrCheck(..) )
import GHC.Core.Lint.Interactive ( interactiveInScope )
import GHC.Core.Tidy ( tidyExpr )
@@ -1008,7 +1008,7 @@ hscDesugarAndSimplify summary (FrontendTypecheck tc_result) tc_warnings mb_old_h
-- Just cause we desugared doesn't mean we are generating code, see above.
Just desugared_guts | backendGeneratesCode bcknd -> do
plugins <- liftIO $ readIORef (tcg_th_coreplugins tc_result)
- simplified_guts <- hscSimplify' plugins desugared_guts
+ simplified_guts <- optimizeCoreHsc plugins desugared_guts
(cg_guts, details) <-
liftIO $ hscTidy hsc_env simplified_guts
@@ -2098,7 +2098,7 @@ hscParsedDecls hsc_env decls = runInteractiveHsc hsc_env $ do
{- Simplify -}
simpl_mg <- liftIO $ do
plugins <- readIORef (tcg_th_coreplugins tc_gblenv)
- hscSimplify hsc_env plugins ds_result
+ optimizeCoreIO hsc_env plugins ds_result
{- Tidy -}
(tidy_cg, mod_details) <- liftIO $ hscTidy hsc_env simpl_mg
@@ -2317,7 +2317,7 @@ hscTidy hsc_env guts = do
, ep_prettyPass = tidy_ppr
, ep_passDetails = empty
}
- endPassIO logger tidy_cfg all_tidy_binds tidy_rules
+ endPass logger tidy_cfg all_tidy_binds tidy_rules
-- If the endPass didn't print the rules, but ddump-rules is
-- on, print now
=====================================
compiler/GHC/HsToCore.hs
=====================================
@@ -48,7 +48,7 @@ import GHC.Core.Type
import GHC.Core.TyCon ( tyConDataCons )
import GHC.Core
import GHC.Core.FVs ( exprsSomeFreeVarsList )
-import GHC.Core.EndPass ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass ( EndPassConfig(..), endPass )
import GHC.Core.Lint ( LintFlags(..) )
import GHC.Core.Lint.Interactive ( interactiveInScope )
import GHC.Core.SimpleOpt ( simpleOptPgm, simpleOptExpr )
@@ -240,7 +240,7 @@ deSugar hsc_env
, ep_prettyPass = desugar_before_ppr
, ep_passDetails = empty
}
- ; endPassIO (hsc_logger hsc_env) desugar_before_cfg final_pgm rules_for_imps
+ ; endPass (hsc_logger hsc_env) desugar_before_cfg final_pgm rules_for_imps
; let simpl_opts = initSimpleOpts dflags
; let (ds_binds, ds_rules_for_imps, occ_anald_binds)
@@ -267,7 +267,7 @@ deSugar hsc_env
, ep_prettyPass = desugar_after_ppr
, ep_passDetails = empty
}
- ; endPassIO (hsc_logger hsc_env) desugar_after_cfg ds_binds ds_rules_for_imps
+ ; endPass (hsc_logger hsc_env) desugar_after_cfg ds_binds ds_rules_for_imps
; let used_names = mkUsedNames tcg_env
pluginModules = map lpModule (loadedPlugins (hsc_plugins hsc_env))
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5af6eb1172804ac05d7f5b0b0f7151e64df14fb9...dd0a2286a2a9abb93e4984e2441c31967f4e57a5
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5af6eb1172804ac05d7f5b0b0f7151e64df14fb9...dd0a2286a2a9abb93e4984e2441c31967f4e57a5
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220812/87188510/attachment-0001.html>
More information about the ghc-commits
mailing list