[Git][ghc/ghc][wip/21611-move-corem] 5 commits: Removed CoreDoNothing and CoreDoPasses

Dominik Peteler (@mmhat) gitlab at gitlab.haskell.org
Sat Aug 13 00:07:05 UTC 2022



Dominik Peteler pushed to branch wip/21611-move-corem at Glasgow Haskell Compiler / GHC


Commits:
11c0b085 by Dominik Peteler at 2022-08-13T02:06:16+02:00
Removed CoreDoNothing and CoreDoPasses

Rewrote the getCoreToDo function using a Writer monad. This makes these
data constructors superfluous.

- - - - -
6507925f by Dominik Peteler at 2022-08-13T02:06:26+02:00
Renamed endPassIO to endPass

- - - - -
8de857c4 by Dominik Peteler at 2022-08-13T02:06:27+02:00
Renamed hscSimplify/hscSimplify' to optimizeCoreIO/optimizeCoreHsc

- - - - -
5f7bcd6d by Dominik Peteler at 2022-08-13T02:06:27+02:00
Run simplifyPgm in SimplCountM

- - - - -
dd0a2286 by Dominik Peteler at 2022-08-13T02:06:28+02:00
Added note on the architecture of the Core optimizer

- - - - -


12 changed files:

- compiler/GHC.hs
- compiler/GHC/Core/EndPass.hs
- compiler/GHC/Core/Lint.hs
- compiler/GHC/Core/Opt.hs
- compiler/GHC/Core/Opt/Config.hs
- compiler/GHC/Core/Opt/Simplify.hs
- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Driver/Config/Core/EndPass.hs
- compiler/GHC/Driver/Config/Core/Opt.hs
- compiler/GHC/Driver/Core/Opt.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/HsToCore.hs


Changes:

=====================================
compiler/GHC.hs
=====================================
@@ -1285,7 +1285,7 @@ compileCore simplify fn = do
              hsc_env <- getSession
              simpl_guts <- liftIO $ do
                plugins <- readIORef (tcg_th_coreplugins tcg)
-               hscSimplify hsc_env plugins mod_guts
+               optimizeCoreIO hsc_env plugins mod_guts
              tidy_guts <- liftIO $ hscTidy hsc_env simpl_guts
              return $ Left tidy_guts
           else


=====================================
compiler/GHC/Core/EndPass.hs
=====================================
@@ -10,7 +10,7 @@ compilation pass that returns Core. Heavily leverages `GHC.Core.Lint`.
 
 module GHC.Core.EndPass (
     EndPassConfig (..),
-    endPassIO,
+    endPass,
     dumpPassResult
  ) where
 
@@ -57,12 +57,14 @@ data EndPassConfig = EndPassConfig
   , ep_passDetails :: !SDoc
   }
 
-endPassIO :: Logger
-          -> EndPassConfig
-          -> CoreProgram -> [CoreRule]
-          -> IO ()
--- Used by the IO-is CorePrep too
-endPassIO logger cfg binds rules
+-- | Check the correctness of a Core program after running an optimization pass.
+-- Used by CorePrep too.
+-- See Note [The architecture of the Core optimizer].
+endPass :: Logger
+        -> EndPassConfig
+        -> CoreProgram -> [CoreRule]
+        -> IO ()
+endPass logger cfg binds rules
   = do { dumpPassResult logger (ep_dumpCoreSizes cfg) (ep_printUnqual cfg) mb_flag
                         (renderWithContext defaultSDocContext (ep_prettyPass cfg))
                         (ep_passDetails cfg) binds rules


=====================================
compiler/GHC/Core/Lint.hs
=====================================
@@ -9,6 +9,7 @@
 
 A ``lint'' pass to check for Core correctness.
 See Note [Core Lint guarantee].
+See Note [The architecture of the Core optimizer].
 -}
 
 module GHC.Core.Lint (


=====================================
compiler/GHC/Core/Opt.hs
=====================================
@@ -1,7 +1,7 @@
 {-
 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
 
-\section[SimplCore]{Driver for simplifying @Core@ programs}
+\section[GHC.Core.Opt]{Driver for optimizing @Core@ programs}
 -}
 
 {-# LANGUAGE CPP #-}
@@ -11,7 +11,7 @@ module GHC.Core.Opt ( CoreOptEnv (..), runCorePasses ) where
 import GHC.Prelude
 
 import GHC.Core
-import GHC.Core.EndPass  ( EndPassConfig, endPassIO )
+import GHC.Core.EndPass  ( EndPassConfig, endPass )
 import GHC.Core.Opt.CSE  ( cseProgram )
 import GHC.Core.Ppr     ( pprCoreBindings )
 import GHC.Core.Lint    ( LintAnnotationsConfig, DebugSetting(..), lintAnnots )
@@ -30,7 +30,7 @@ import GHC.Core.Opt.Simplify.Monad
 import GHC.Core.Opt.SpecConstr   ( specConstrProgram )
 import GHC.Core.Opt.Specialise   ( specProgram )
 import GHC.Core.Opt.StaticArgs   ( doStaticArgs )
-import GHC.Core.Opt.Stats        ( SimplCountM, addCounts )
+import GHC.Core.Opt.Stats        ( SimplCountM )
 import GHC.Core.Opt.WorkWrap     ( wwTopBinds )
 import GHC.Core.LateCC           ( addLateCostCentresMG )
 import GHC.Core.Rules            ( extendRuleBaseList, extendRuleEnv )
@@ -87,6 +87,7 @@ data CoreOptEnv = CoreOptEnv
 -- creation of the '[CoreToDo]') happens in
 -- 'GHC.Driver.Config.Core.Opt'. Then this function "executes" that
 -- plan.
+-- See Note [The architecture of the Core optimizer].
 runCorePasses :: CoreOptEnv
               -> [CoreToDo]
               -> ModGuts
@@ -95,8 +96,6 @@ runCorePasses env passes guts
   = foldM do_pass guts passes
   where
     do_pass :: ModGuts -> CoreToDo -> SimplCountM ModGuts
-    do_pass res CoreDoNothing = return res
-    do_pass guts (CoreDoPasses ps) = runCorePasses env ps guts
     do_pass guts pass = do
       let end_pass_cfg = co_endPassCfg env  pass
       let lint_anno_cfg = co_lintAnnotationsCfg env pass
@@ -106,7 +105,7 @@ runCorePasses env passes guts
 
       withTiming (co_logger env) (ppr pass <+> brackets (ppr this_mod)) (const ()) $ do
         guts' <- lintAnnots (co_logger env) lint_anno_cfg doCorePassWithoutDebug guts
-        liftIO $ endPassIO (co_logger env) end_pass_cfg (mg_binds guts') (mg_rules guts')
+        liftIO $ endPass (co_logger env) end_pass_cfg (mg_binds guts') (mg_rules guts')
         return guts'
 
     this_mod = mg_module guts
@@ -128,10 +127,8 @@ doCorePass env pass guts = do
   let !read_ruleenv = readRuleEnv env guts
 
   case pass of
-    CoreDoSimplify opts       -> {-# SCC "Simplify" #-} do
-                                 (guts', sc) <- liftIO $ simplifyPgm (co_logger env) read_ruleenv (co_unitEnv env) opts guts
-                                 addCounts sc
-                                 return guts'
+    CoreDoSimplify opts       -> {-# SCC "Simplify" #-}
+                                 simplifyPgm (co_logger env) read_ruleenv (co_unitEnv env) opts guts
 
     CoreCSE                   -> {-# SCC "CommonSubExpr" #-}
                                  updateBinds cseProgram
@@ -183,10 +180,6 @@ doCorePass env pass guts = do
     CoreDoRuleCheck opts      -> {-# SCC "RuleCheck" #-}
                                  liftIO $ ruleCheckPass (co_logger env) opts (co_hptRuleBase env) (co_visOrphans env) guts
 
-    CoreDoNothing             -> return guts
-
-    CoreDoPasses passes       -> runCorePasses env passes guts
-
     CoreDoPluginPass _ p      -> {-# SCC "Plugin" #-}
                                  co_liftCoreM env (co_debugSetting env) guts $ p guts
   where


=====================================
compiler/GHC/Core/Opt/Config.hs
=====================================
@@ -34,10 +34,10 @@ import GHC.Utils.Outputable as Outputable
 -- | A description of the plugin pass itself
 type CorePluginPass = ModGuts -> CoreM ModGuts
 
-data CoreToDo           -- These are diff core-to-core passes,
-                        -- which may be invoked in any order,
-                        -- as many times as you like.
-
+-- | These are diff core-to-core passes, which may be invoked in any order, as
+-- many times as you like.
+-- See Note [The architecture of the Core optimizer].
+data CoreToDo
   = -- | The core-to-core simplifier.
     CoreDoSimplify !SimplifyOpts
   | CoreDoPluginPass String CorePluginPass
@@ -55,11 +55,6 @@ data CoreToDo           -- These are diff core-to-core passes,
   | CoreDoSpecConstr !SpecConstrOpts
   | CoreCSE
   | CoreDoRuleCheck !RuleCheckOpts
-  | -- | Useful when building up
-    CoreDoNothing
-  | -- | lists of these things
-    CoreDoPasses [CoreToDo]
-
   | CoreAddCallerCcs !CallerCCOpts
   | CoreAddLateCcs !Bool -- ^ '-fprof-count-entries'
 
@@ -82,8 +77,6 @@ instance Outputable CoreToDo where
   ppr (CoreAddLateCcs _)       = text "Add late core cost-centres"
   ppr CoreDoPrintCore          = text "Print core"
   ppr (CoreDoRuleCheck {})     = text "Rule check"
-  ppr CoreDoNothing            = text "CoreDoNothing"
-  ppr (CoreDoPasses passes)    = text "CoreDoPasses" <+> ppr passes
 
 pprPassDetails :: CoreToDo -> SDoc
 pprPassDetails (CoreDoSimplify cfg) = vcat [ text "Max iterations =" <+> int n


=====================================
compiler/GHC/Core/Opt/Simplify.hs
=====================================
@@ -21,7 +21,7 @@ import GHC.Core.Opt.Simplify.Iteration ( simplTopBinds, simplExpr, simplImpRules
 import GHC.Core.Opt.Simplify.Utils ( activeRule, activeUnfolding )
 import GHC.Core.Opt.Simplify.Env
 import GHC.Core.Opt.Simplify.Monad
-import GHC.Core.Opt.Stats ( simplCountN )
+import GHC.Core.Opt.Stats ( SimplCountM, addCounts, simplCountN )
 import GHC.Core.FamInstEnv
 
 import GHC.Utils.Error  ( withTiming )
@@ -44,6 +44,7 @@ import GHC.Types.Unique.FM
 import GHC.Types.Name.Ppr
 
 import Control.Monad
+import Control.Monad.IO.Class ( liftIO )
 import Data.Foldable ( for_ )
 
 #if __GLASGOW_HASKELL__ <= 810
@@ -144,9 +145,20 @@ simplifyPgm :: Logger
             -> UnitEnv
             -> SimplifyOpts
             -> ModGuts
-            -> IO (ModGuts, SimplCount)  -- New bindings
-
-simplifyPgm logger read_ruleenv unit_env opts
+            -> SimplCountM ModGuts  -- New bindings
+simplifyPgm logger read_ruleenv unit_env opts guts = do
+    (nguts, sc) <- liftIO $ simplifyPgmIO logger read_ruleenv unit_env opts guts
+    addCounts sc
+    return nguts
+
+simplifyPgmIO :: Logger
+              -> IO RuleEnv -- ^ Action to get the current RuleEnv
+              -> UnitEnv
+              -> SimplifyOpts
+              -> ModGuts
+              -> IO (ModGuts, SimplCount)  -- New bindings
+
+simplifyPgmIO logger read_ruleenv unit_env opts
             guts@(ModGuts { mg_module = this_mod
                           , mg_rdr_env = rdr_env
                           , mg_binds = binds, mg_rules = rules


=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -32,7 +32,7 @@ import GHC.Builtin.Types
 
 import GHC.Core.Utils
 import GHC.Core.Opt.Arity
-import GHC.Core.EndPass    ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass    ( EndPassConfig(..), endPass )
 import GHC.Core
 import GHC.Core.Make hiding( FloatBind(..) )   -- We use our own FloatBind here
 import GHC.Core.Type
@@ -258,8 +258,7 @@ corePrepPgm logger cp_cfg pgm_cfg
                       floats2 <- corePrepTopBinds initialCorePrepEnv implicit_binds
                       return (deFloatTop (floats1 `appendFloats` floats2))
 
-    endPassIO logger (cpPgm_endPassConfig pgm_cfg)
-              binds_out []
+    endPass logger (cpPgm_endPassConfig pgm_cfg) binds_out []
     return binds_out
 
 corePrepExpr :: Logger -> CorePrepConfig -> CoreExpr -> IO CoreExpr


=====================================
compiler/GHC/Driver/Config/Core/EndPass.hs
=====================================
@@ -46,5 +46,3 @@ coreDumpFlag (CoreAddCallerCcs {})    = Nothing
 coreDumpFlag (CoreAddLateCcs {})      = Nothing
 coreDumpFlag CoreDoPrintCore          = Nothing
 coreDumpFlag (CoreDoRuleCheck {})     = Nothing
-coreDumpFlag CoreDoNothing            = Nothing
-coreDumpFlag (CoreDoPasses {})        = Nothing


=====================================
compiler/GHC/Driver/Config/Core/Opt.hs
=====================================
@@ -1,7 +1,7 @@
 {-
 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
 
-\section[SimplCore]{Configuration of the driver for simplifying @Core@ programs}
+\section[GHC.Driver.Config.Core.Opt]{Configuration of the driver for optimizing @Core@ programs}
 -}
 
 {-# LANGUAGE CPP #-}
@@ -29,6 +29,10 @@ import GHC.Types.Var ( Var )
 
 import qualified GHC.LanguageExtensions as LangExt
 
+import Control.Monad
+import Control.Monad.Trans.Writer.Strict ( Writer, execWriter, tell )
+import Data.Foldable
+
 {-
 ************************************************************************
 *                                                                      *
@@ -37,9 +41,187 @@ import qualified GHC.LanguageExtensions as LangExt
 ************************************************************************
 -}
 
+-- | Construct the main optimisation pipeline from the driver's session state.
+-- See Note [The architecture of the Core optimizer].
 getCoreToDo :: DynFlags -> [Var] -> [CoreToDo]
-getCoreToDo dflags extra_vars
-  = flatten_todos core_todo
+getCoreToDo dflags extra_vars = execWriter $ do
+  -- We want to do the static argument transform before full laziness as it
+  -- may expose extra opportunities to float things outwards. However, to fix
+  -- up the output of the transformation we need at do at least one simplify
+  -- after this before anything else
+  when static_args $ do
+    simpl_gently
+    enqueue CoreDoStaticArgs
+
+  -- initial simplify: make specialiser happy: minimum effort please
+  when do_presimplify $
+    simpl_gently
+
+  -- Specialisation is best done before full laziness
+  -- so that overloaded functions have all their dictionary lambdas manifest
+  when do_specialise $
+    enqueue $ coreDoSpecialising dflags
+
+  if full_laziness then
+    -- Was: gentleFloatOutSwitches
+    --
+    -- I have no idea why, but not floating constants to
+    -- top level is very bad in some cases.
+    --
+    -- Notably: p_ident in spectral/rewrite
+    --          Changing from "gentle" to "constantsOnly"
+    --          improved rewrite's allocation by 19%, and
+    --          made 0.0% difference to any other nofib
+    --          benchmark
+    --
+    -- Not doing floatOutOverSatApps yet, we'll do
+    -- that later on when we've had a chance to get more
+    -- accurate arity information.  In fact it makes no
+    -- difference at all to performance if we do it here,
+    -- but maybe we save some unnecessary to-and-fro in
+    -- the simplifier.
+    enqueue $ CoreDoFloatOutwards FloatOutSwitches
+      { floatOutLambdas   = Just 0
+      , floatOutConstants = True
+      , floatOutOverSatApps = False
+      , floatToTopLevelOnly = False
+      }
+
+  else
+    -- Even with full laziness turned off, we still need to float static
+    -- forms to the top level. See Note [Grand plan for static forms] in
+    -- GHC.Iface.Tidy.StaticPtrTable.
+    --
+    when static_ptrs $ do
+      -- Float Out can't handle type lets (sometimes created
+      -- by simpleOptPgm via mkParallelBindings)
+      simpl_gently
+      -- Static forms are moved to the top level with the FloatOut pass.
+      -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
+      enqueue $ CoreDoFloatOutwards FloatOutSwitches
+        { floatOutLambdas   = Just 0
+        , floatOutConstants = True
+        , floatOutOverSatApps = False
+        , floatToTopLevelOnly = True
+        }
+
+  -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
+  when do_simpl3 $ do
+    for_ [phases, phases-1 .. 1] $ \phase ->
+      simpl_phase (Phase phase) "main" max_iter
+
+    -- Phase 0: allow all Ids to be inlined now
+    -- This gets foldr inlined before strictness analysis
+
+    -- At least 3 iterations because otherwise we land up with
+    -- huge dead expressions because of an infelicity in the
+    -- simplifier.
+    --      let k = BIG in foldr k z xs
+    -- ==>  let k = BIG in letrec go = \xs -> ...(k x).... in go xs
+    -- ==>  let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
+    -- Don't stop now!
+    simpl_phase (Phase 0) "main" (max max_iter 3)
+
+  -- Run float-inwards immediately before the strictness analyser
+  -- Doing so pushes bindings nearer their use site and hence makes
+  -- them more likely to be strict. These bindings might only show
+  -- up after the inlining from simplification.  Example in fulsom,
+  -- Csg.calc, where an arg of timesDouble thereby becomes strict.
+  when do_float_in $
+    enqueue $ CoreDoFloatInwards platform
+
+  when call_arity $ do
+    enqueue CoreDoCallArity
+    simplify "post-call-arity"
+
+  -- Strictness analysis
+  when strictness $ do
+    dmd_cpr_ww
+    simplify "post-worker-wrapper"
+
+  -- See Note [Placement of the exitification pass]
+  when exitification $
+    enqueue CoreDoExitify
+
+  when full_laziness $
+    enqueue $ CoreDoFloatOutwards FloatOutSwitches
+      { floatOutLambdas     = floatLamArgs dflags
+      , floatOutConstants   = True
+      , floatOutOverSatApps = True
+      , floatToTopLevelOnly = False
+      }
+      -- nofib/spectral/hartel/wang doubles in speed if you
+      -- do full laziness late in the day.  It only happens
+      -- after fusion and other stuff, so the early pass doesn't
+      -- catch it.  For the record, the redex is
+      --        f_el22 (f_el21 r_midblock)
+
+  -- We want CSE to follow the final full-laziness pass, because it may
+  -- succeed in commoning up things floated out by full laziness.
+  -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
+  when cse $
+    enqueue CoreCSE
+
+  when do_float_in $
+    enqueue $ CoreDoFloatInwards platform
+
+  -- Final tidy-up
+  simplify "final"
+
+  maybe_rule_check FinalPhase
+
+  --------  After this we have -O2 passes -----------------
+  -- None of them run with -O
+
+  -- Case-liberation for -O2.  This should be after
+  -- strictness analysis and the simplification which follows it.
+  when liberate_case $ do
+    enqueue $ CoreLiberateCase (initLiberateCaseOpts dflags)
+    -- Run the simplifier after LiberateCase to vastly
+    -- reduce the possibility of shadowing
+    -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
+    simplify "post-liberate-case"
+
+  when spec_constr $ do
+    enqueue $ CoreDoSpecConstr (initSpecConstrOpts dflags)
+    -- See Note [Simplify after SpecConstr]
+    simplify "post-spec-constr"
+
+  maybe_rule_check FinalPhase
+
+  when late_specialise $ do
+    enqueue $ coreDoSpecialising dflags
+    simplify "post-late-spec"
+
+  -- LiberateCase can yield new CSE opportunities because it peels
+  -- off one layer of a recursive function (concretely, I saw this
+  -- in wheel-sieve1), and I'm guessing that SpecConstr can too
+  -- And CSE is a very cheap pass. So it seems worth doing here.
+  when ((liberate_case || spec_constr) && cse) $ do
+    enqueue CoreCSE
+    simplify "post-final-cse"
+
+  ---------  End of -O2 passes --------------
+
+  when late_dmd_anal $ do
+    dmd_cpr_ww
+    simplify "post-late-ww"
+
+  -- Final run of the demand_analyser, ensures that one-shot thunks are
+  -- really really one-shot thunks. Only needed if the demand analyser
+  -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal
+  -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
+  -- can become /exponentially/ more expensive. See #11731, #12996.
+  when (strictness || late_dmd_anal) $
+    enqueue $ coreDoDemand dflags
+
+  maybe_rule_check FinalPhase
+
+  when profiling $ do
+    when (not (null $ callerCcFilters dflags)) $
+      enqueue $ CoreAddCallerCcs (initCallerCCOpts dflags)
+    when (gopt Opt_ProfLateInlineCcs dflags) $
+      enqueue $ CoreAddLateCcs (gopt Opt_ProfCountEntries dflags)
   where
     phases        = simplPhases        dflags
     max_iter      = maxSimplIterations dflags
@@ -66,228 +248,39 @@ getCoreToDo dflags extra_vars
     do_presimplify = do_specialise -- TODO: any other optimizations benefit from pre-simplification?
     do_simpl3      = const_fold || rules_on -- TODO: any other optimizations benefit from three-phase simplification?
 
-    maybe_rule_check phase = runMaybe rule_check $
-      CoreDoRuleCheck . initRuleCheckOpts dflags phase
+    maybe_rule_check phase = for_ rule_check $
+      enqueue . CoreDoRuleCheck . initRuleCheckOpts dflags phase
 
     maybe_strictness_before (Phase phase)
-      | phase `elem` strictnessBefore dflags = coreDoDemand dflags
-    maybe_strictness_before _
-      = CoreDoNothing
+      | phase `elem` strictnessBefore dflags = enqueue $ coreDoDemand dflags
+    maybe_strictness_before _ = return ()
 
-    simpl_phase phase name iter
-      = CoreDoPasses
-      $   [ maybe_strictness_before phase
-          , CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
-                             (initSimplMode dflags phase name)
-          , maybe_rule_check phase ]
+    simpl_phase phase name iter = do
+      maybe_strictness_before phase
+      enqueue $ CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
+                                 (initSimplMode dflags phase name)
+      maybe_rule_check phase
 
     -- Run GHC's internal simplification phase, after all rules have run.
     -- See Note [Compiler phases] in GHC.Types.Basic
     simplify name = simpl_phase FinalPhase name max_iter
 
-    -- initial simplify: mk specialiser happy: minimum effort please
+    -- initial simplify: make specialiser happy: minimum effort please
     -- See Note [Inline in InitialPhase]
     -- See Note [RULEs enabled in InitialPhase]
-    simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter
-                                    (initGentleSimplMode dflags)
-
-    dmd_cpr_ww = [coreDoDemand dflags, CoreDoCpr] ++
-      if ww_on then [CoreDoWorkerWrapper (initWorkWrapOpts dflags)]
-               else []
-
-
-    demand_analyser = (CoreDoPasses (
-                           dmd_cpr_ww ++
-                           [simplify "post-worker-wrapper"]
-                           ))
-
-    -- Static forms are moved to the top level with the FloatOut pass.
-    -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
-    static_ptrs_float_outwards =
-      runWhen static_ptrs $ CoreDoPasses
-        [ simpl_gently -- Float Out can't handle type lets (sometimes created
-                       -- by simpleOptPgm via mkParallelBindings)
-        , CoreDoFloatOutwards FloatOutSwitches
-          { floatOutLambdas   = Just 0
-          , floatOutConstants = True
-          , floatOutOverSatApps = False
-          , floatToTopLevelOnly = True
-          }
-        ]
-
-    add_caller_ccs =
-        runWhen (profiling && not (null $ callerCcFilters dflags)) $
-          CoreAddCallerCcs (initCallerCCOpts dflags)
-
-    add_late_ccs =
-        runWhen (profiling && gopt Opt_ProfLateInlineCcs dflags) $
-          CoreAddLateCcs (gopt Opt_ProfCountEntries dflags)
-
-    core_todo =
-     [
-    -- We want to do the static argument transform before full laziness as it
-    -- may expose extra opportunities to float things outwards. However, to fix
-    -- up the output of the transformation we need at do at least one simplify
-    -- after this before anything else
-        runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
-
-        -- initial simplify: mk specialiser happy: minimum effort please
-        runWhen do_presimplify simpl_gently,
-
-        -- Specialisation is best done before full laziness
-        -- so that overloaded functions have all their dictionary lambdas manifest
-        runWhen do_specialise $ coreDoSpecialising dflags,
-
-        if full_laziness then
-           CoreDoFloatOutwards FloatOutSwitches {
-                                 floatOutLambdas   = Just 0,
-                                 floatOutConstants = True,
-                                 floatOutOverSatApps = False,
-                                 floatToTopLevelOnly = False }
-                -- Was: gentleFloatOutSwitches
-                --
-                -- I have no idea why, but not floating constants to
-                -- top level is very bad in some cases.
-                --
-                -- Notably: p_ident in spectral/rewrite
-                --          Changing from "gentle" to "constantsOnly"
-                --          improved rewrite's allocation by 19%, and
-                --          made 0.0% difference to any other nofib
-                --          benchmark
-                --
-                -- Not doing floatOutOverSatApps yet, we'll do
-                -- that later on when we've had a chance to get more
-                -- accurate arity information.  In fact it makes no
-                -- difference at all to performance if we do it here,
-                -- but maybe we save some unnecessary to-and-fro in
-                -- the simplifier.
-        else
-           -- Even with full laziness turned off, we still need to float static
-           -- forms to the top level. See Note [Grand plan for static forms] in
-           -- GHC.Iface.Tidy.StaticPtrTable.
-           static_ptrs_float_outwards,
-
-        -- Run the simplier phases 2,1,0 to allow rewrite rules to fire
-        runWhen do_simpl3
-            (CoreDoPasses $ [ simpl_phase (Phase phase) "main" max_iter
-                            | phase <- [phases, phases-1 .. 1] ] ++
-                            [ simpl_phase (Phase 0) "main" (max max_iter 3) ]),
-                -- Phase 0: allow all Ids to be inlined now
-                -- This gets foldr inlined before strictness analysis
-
-                -- At least 3 iterations because otherwise we land up with
-                -- huge dead expressions because of an infelicity in the
-                -- simplifier.
-                --      let k = BIG in foldr k z xs
-                -- ==>  let k = BIG in letrec go = \xs -> ...(k x).... in go xs
-                -- ==>  let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
-                -- Don't stop now!
-
-        runWhen do_float_in (CoreDoFloatInwards platform),
-            -- Run float-inwards immediately before the strictness analyser
-            -- Doing so pushes bindings nearer their use site and hence makes
-            -- them more likely to be strict. These bindings might only show
-            -- up after the inlining from simplification.  Example in fulsom,
-            -- Csg.calc, where an arg of timesDouble thereby becomes strict.
-
-        runWhen call_arity $ CoreDoPasses
-            [ CoreDoCallArity
-            , simplify "post-call-arity"
-            ],
-
-        -- Strictness analysis
-        runWhen strictness demand_analyser,
-
-        runWhen exitification CoreDoExitify,
-            -- See Note [Placement of the exitification pass]
-
-        runWhen full_laziness $
-           CoreDoFloatOutwards FloatOutSwitches {
-                                 floatOutLambdas     = floatLamArgs dflags,
-                                 floatOutConstants   = True,
-                                 floatOutOverSatApps = True,
-                                 floatToTopLevelOnly = False },
-                -- nofib/spectral/hartel/wang doubles in speed if you
-                -- do full laziness late in the day.  It only happens
-                -- after fusion and other stuff, so the early pass doesn't
-                -- catch it.  For the record, the redex is
-                --        f_el22 (f_el21 r_midblock)
-
-
-        runWhen cse CoreCSE,
-                -- We want CSE to follow the final full-laziness pass, because it may
-                -- succeed in commoning up things floated out by full laziness.
-                -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
-
-        runWhen do_float_in (CoreDoFloatInwards platform),
-
-        simplify "final",  -- Final tidy-up
-
-        maybe_rule_check FinalPhase,
-
-        --------  After this we have -O2 passes -----------------
-        -- None of them run with -O
-
-                -- Case-liberation for -O2.  This should be after
-                -- strictness analysis and the simplification which follows it.
-        runWhen liberate_case $ CoreDoPasses
-           [ CoreLiberateCase (initLiberateCaseOpts dflags)
-           , simplify "post-liberate-case" ],
-           -- Run the simplifier after LiberateCase to vastly
-           -- reduce the possibility of shadowing
-           -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
-
-        runWhen spec_constr $ CoreDoPasses
-           [ CoreDoSpecConstr (initSpecConstrOpts dflags)
-           , simplify "post-spec-constr"],
-           -- See Note [Simplify after SpecConstr]
-
-        maybe_rule_check FinalPhase,
-
-        runWhen late_specialise $ CoreDoPasses
-           [ coreDoSpecialising dflags, simplify "post-late-spec"],
-
-        -- LiberateCase can yield new CSE opportunities because it peels
-        -- off one layer of a recursive function (concretely, I saw this
-        -- in wheel-sieve1), and I'm guessing that SpecConstr can too
-        -- And CSE is a very cheap pass. So it seems worth doing here.
-        runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses
-           [ CoreCSE, simplify "post-final-cse" ],
-
-        ---------  End of -O2 passes --------------
-
-        runWhen late_dmd_anal $ CoreDoPasses (
-            dmd_cpr_ww ++ [simplify "post-late-ww"]
-          ),
-
-        -- Final run of the demand_analyser, ensures that one-shot thunks are
-        -- really really one-shot thunks. Only needed if the demand analyser
-        -- has run at all. See Note [Final Demand Analyser run] in GHC.Core.Opt.DmdAnal
-        -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
-        -- can become /exponentially/ more expensive. See #11731, #12996.
-        runWhen (strictness || late_dmd_anal) $ coreDoDemand dflags,
-
-        maybe_rule_check FinalPhase,
-
-        add_caller_ccs,
-        add_late_ccs
-     ]
-
-    -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
-    flatten_todos [] = []
-    flatten_todos (CoreDoNothing : rest) = flatten_todos rest
-    flatten_todos (CoreDoPasses passes : rest) =
-      flatten_todos passes ++ flatten_todos rest
-    flatten_todos (todo : rest) = todo : flatten_todos rest
-
--- The core-to-core pass ordering is derived from the DynFlags:
-runWhen :: Bool -> CoreToDo -> CoreToDo
-runWhen True  do_this = do_this
-runWhen False _       = CoreDoNothing
-
-runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo
-runMaybe (Just x) f = f x
-runMaybe Nothing  _ = CoreDoNothing
+    simpl_gently = enqueue $ CoreDoSimplify $
+      initSimplifyOpts dflags extra_vars max_iter (initGentleSimplMode dflags)
+
+    dmd_cpr_ww = do
+      enqueue $ coreDoDemand dflags
+      enqueue CoreDoCpr
+      when ww_on $
+        enqueue $ CoreDoWorkerWrapper (initWorkWrapOpts dflags)
+
+
+
+enqueue :: CoreToDo -> Writer [CoreToDo] ()
+enqueue pass = tell [pass]
 
 coreDoDemand :: DynFlags -> CoreToDo
 coreDoDemand dflags = CoreDoDemand $ initDmdAnalOpts dflags
@@ -295,7 +288,7 @@ coreDoDemand dflags = CoreDoDemand $ initDmdAnalOpts dflags
 coreDoSpecialising :: DynFlags -> CoreToDo
 coreDoSpecialising dflags = CoreDoSpecialising (initSpecialiseOpts dflags simplMask)
 
--- TODO DEDUp!!!!
+-- TODO: Deduplication
 simplMask :: Char
 simplMask = 's'
 


=====================================
compiler/GHC/Driver/Core/Opt.hs
=====================================
@@ -1,4 +1,4 @@
-module GHC.Driver.Core.Opt ( hscSimplify, hscSimplify' ) where
+module GHC.Driver.Core.Opt ( optimizeCoreHsc, optimizeCoreIO ) where
 
 import GHC.Prelude
 
@@ -34,27 +34,27 @@ import GHC.Utils.Logger as Logger
 import Control.Monad.IO.Class
 
 --------------------------------------------------------------
--- Simplifiers
+-- Core optimization entrypoints
 --------------------------------------------------------------
 
--- | Run Core2Core simplifier. The list of String is a list of (Core) plugin
+-- | Run Core optimizer. The list of String is a list of (Core) plugin
 -- module names added via TH (cf 'addCorePlugin').
-hscSimplify :: HscEnv -> [String] -> ModGuts -> IO ModGuts
-hscSimplify hsc_env plugins modguts =
-    runHsc hsc_env $ hscSimplify' plugins modguts
-
--- | Run Core2Core simplifier. The list of String is a list of (Core) plugin
--- module names added via TH (cf 'addCorePlugin').
-hscSimplify' :: [String] -> ModGuts -> Hsc ModGuts
-hscSimplify' plugins ds_result = do
-    hsc_env <- getHscEnv
+optimizeCoreIO :: HscEnv -> [String] -> ModGuts -> IO ModGuts
+optimizeCoreIO hsc_env plugins guts = do
     hsc_env_with_plugins <- if null plugins -- fast path
         then return hsc_env
-        else liftIO $ initializePlugins
-                    $ hscUpdateFlags (\dflags -> foldr addPluginModuleName dflags plugins)
-                      hsc_env
+        else initializePlugins
+             $ hscUpdateFlags (\dflags -> foldr addPluginModuleName dflags plugins)
+             hsc_env
     {-# SCC "Core2Core" #-}
-      liftIO $ core2core hsc_env_with_plugins ds_result
+      core2core hsc_env_with_plugins guts
+
+-- | Run Core optimizer. The list of String is a list of (Core) plugin
+-- module names added via TH (cf 'addCorePlugin').
+optimizeCoreHsc :: [String] -> ModGuts -> Hsc ModGuts
+optimizeCoreHsc plugins guts = do
+    hsc_env <- getHscEnv
+    liftIO $ optimizeCoreIO hsc_env plugins guts
 
 core2core :: HscEnv -> ModGuts -> IO ModGuts
 core2core hsc_env guts@(ModGuts { mg_module  = mod
@@ -84,11 +84,6 @@ core2core hsc_env guts@(ModGuts { mg_module  = mod
                                                                  , gwib_isBoot = NotBoot })
     hpt_rule_base  = mkRuleBase home_pkg_rules
 
-    -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
-    -- This is very convienent for the users of the monad (e.g. plugins do not have to
-    -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
-    -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
-    -- would mean our cached value would go out of date.
     env = CoreOptEnv
       { co_logger        = logger
       , co_debugSetting  = InheritDebugLevel
@@ -111,6 +106,11 @@ liftCoreMToSimplCountM hsc_env debug_settings guts m = do
   return a
   where
     mod = mg_module guts
+    -- mod: get the module out of the ModGuts so we can retrieve it from the monad.
+    -- This is very convienent for the users of the monad (e.g. plugins do not have to
+    -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
+    -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
+    -- would mean our cached value would go out of date.
     loc = mg_loc guts
     orph_mods = mkModuleSet (mod : dep_orphs (mg_deps guts))
     gwib = GWIB { gwib_mod = moduleName mod, gwib_isBoot = NotBoot }
@@ -122,3 +122,56 @@ liftCoreMToSimplCountM hsc_env debug_settings guts m = do
       NoDebugging -> let
         dflags' = (hsc_dflags hsc_env) { debugLevel = 0 }
         in hsc_env { hsc_dflags = dflags' }
+
+{-
+Note [The architecture of the Core optimizer]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Conceptually the Core optimizer consists of two stages:
+
+ 1. The planning stage.
+ 2. The execution stage.
+
+This division is mirrored in the interface of the different optimizations. For
+each of those optimzations we have
+
+ 1. a configuration record bundeling the options for a particular optimization
+    pass.
+ 2. an initialization function used to obtain such a configuration from
+    `DynFlags`. This function typically lives in a module named after the pass
+    in the `GHC.Driver.Config.Core.Opt` namespace and is used in the planning
+    stage.
+ 3. the actual optimization pass itself, with an entrypoint that takes the
+    configuration of the pass along with the execution context as arguments.
+    This entrypoint is called in the execution stage.
+
+The plan that is the result of the first stage is constructed by the
+`getCoreToDo` function found in the `GHC.Driver.Config.Core.Opt` module. This
+function determines the sequence of optimization passes run on the module in
+question and derives the configuration for each pass from the session's state
+(`DynFlags`) using the aforementioned initialization functions. The `CoreToDo`
+type that is finally used to wrap this configuration value is a sum type
+enumerating all the optimizations available in GHC.
+
+The entrypoint of the second stage are the `optimizeCore*` functions found in
+GHC.Driver.Core.Opt. These functions is part of the Application Layer and
+utilize the `runCorePasses` function from `GHC.Core.Opt` which is the
+counterpart of these functions in the Domain Layer. In other words, while the
+`optimizeCore*` know about `HscEnv` and are therefore bound to a concrete
+driver, `runCorePasses` is more independent as it is a component of its own.
+
+`runCorePasses` is essentially an interpreter for the `CoreToDo`s constructed in
+the planning phase. It calls the entrypoints of the passes with their respective
+configurations as arguments as well as some execution context like the unit
+environment, the rules and the type family instance in scope, and most notably
+the module we wish to compile (`ModGuts`).
+
+A similar split in functionality is done for the Core Linting: After each pass
+we may check the sanity of the resulting Core running a so-called EndPass check.
+The entrypoint for this check is the `endPass` function found in
+GHC.Core.EndPass. It comes as well with a configuration record and a
+corresponding initialization function for it in GHC.Driver.Core.EndPass. The
+definition of what actually is a correct Core program is defined by the linting
+functions in GHC.Core.Lint. These are used by the EndPass to check the program.
+
+-}


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -59,7 +59,7 @@ module GHC.Driver.Main
     , hscTypecheckAndGetWarnings
     , hscDesugar
     , makeSimpleDetails
-    , hscSimplify -- ToDo, shouldn't really export this
+    , optimizeCoreIO -- TODO: shouldn't really export this
     , hscDesugarAndSimplify
 
     -- * Safe Haskell
@@ -88,7 +88,7 @@ module GHC.Driver.Main
     , hscCompileCoreExpr'
       -- We want to make sure that we export enough to be able to redefine
       -- hsc_typecheck in client code
-    , hscParse', hscSimplify', hscDesugar', tcRnModule', doCodeGen
+    , hscParse', optimizeCoreHsc, hscDesugar', tcRnModule', doCodeGen
     , getHscEnv
     , hscSimpleIface'
     , oneShotMsg
@@ -123,7 +123,7 @@ import GHC.Driver.Config.Stg.Ppr  (initStgPprOpts)
 import GHC.Driver.Config.Stg.Pipeline (initStgPipelineOpts)
 import GHC.Driver.Config.StgToCmm  (initStgToCmmConfig)
 import GHC.Driver.Config.Cmm       (initCmmConfig)
-import GHC.Driver.Core.Opt         ( hscSimplify, hscSimplify' )
+import GHC.Driver.Core.Opt         ( optimizeCoreHsc, optimizeCoreIO )
 import GHC.Driver.Config.Diagnostic
 import GHC.Driver.Config.Tidy
 import GHC.Driver.Hooks
@@ -158,7 +158,7 @@ import GHC.Iface.Ext.Binary ( readHieFile, writeHieFile , hie_file_result)
 import GHC.Iface.Ext.Debug  ( diffFile, validateScopes )
 
 import GHC.Core
-import GHC.Core.EndPass ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass ( EndPassConfig(..), endPass )
 import GHC.Core.Lint ( LintFlags(..), StaticPtrCheck(..) )
 import GHC.Core.Lint.Interactive ( interactiveInScope )
 import GHC.Core.Tidy           ( tidyExpr )
@@ -1008,7 +1008,7 @@ hscDesugarAndSimplify summary (FrontendTypecheck tc_result) tc_warnings mb_old_h
       -- Just cause we desugared doesn't mean we are generating code, see above.
       Just desugared_guts | backendGeneratesCode bcknd -> do
           plugins <- liftIO $ readIORef (tcg_th_coreplugins tc_result)
-          simplified_guts <- hscSimplify' plugins desugared_guts
+          simplified_guts <- optimizeCoreHsc plugins desugared_guts
 
           (cg_guts, details) <-
               liftIO $ hscTidy hsc_env simplified_guts
@@ -2098,7 +2098,7 @@ hscParsedDecls hsc_env decls = runInteractiveHsc hsc_env $ do
     {- Simplify -}
     simpl_mg <- liftIO $ do
       plugins <- readIORef (tcg_th_coreplugins tc_gblenv)
-      hscSimplify hsc_env plugins ds_result
+      optimizeCoreIO hsc_env plugins ds_result
 
     {- Tidy -}
     (tidy_cg, mod_details) <- liftIO $ hscTidy hsc_env simpl_mg
@@ -2317,7 +2317,7 @@ hscTidy hsc_env guts = do
         , ep_prettyPass = tidy_ppr
         , ep_passDetails = empty
         }
-  endPassIO logger tidy_cfg all_tidy_binds tidy_rules
+  endPass logger tidy_cfg all_tidy_binds tidy_rules
 
   -- If the endPass didn't print the rules, but ddump-rules is
   -- on, print now


=====================================
compiler/GHC/HsToCore.hs
=====================================
@@ -48,7 +48,7 @@ import GHC.Core.Type
 import GHC.Core.TyCon     ( tyConDataCons )
 import GHC.Core
 import GHC.Core.FVs       ( exprsSomeFreeVarsList )
-import GHC.Core.EndPass   ( EndPassConfig(..), endPassIO )
+import GHC.Core.EndPass   ( EndPassConfig(..), endPass )
 import GHC.Core.Lint      ( LintFlags(..) )
 import GHC.Core.Lint.Interactive ( interactiveInScope )
 import GHC.Core.SimpleOpt ( simpleOptPgm, simpleOptExpr )
@@ -240,7 +240,7 @@ deSugar hsc_env
                 , ep_prettyPass = desugar_before_ppr
                 , ep_passDetails = empty
                 }
-        ; endPassIO (hsc_logger hsc_env) desugar_before_cfg final_pgm rules_for_imps
+        ; endPass (hsc_logger hsc_env) desugar_before_cfg final_pgm rules_for_imps
 
         ; let simpl_opts = initSimpleOpts dflags
         ; let (ds_binds, ds_rules_for_imps, occ_anald_binds)
@@ -267,7 +267,7 @@ deSugar hsc_env
                 , ep_prettyPass = desugar_after_ppr
                 , ep_passDetails = empty
                 }
-        ; endPassIO (hsc_logger hsc_env) desugar_after_cfg ds_binds ds_rules_for_imps
+        ; endPass (hsc_logger hsc_env) desugar_after_cfg ds_binds ds_rules_for_imps
 
         ; let used_names = mkUsedNames tcg_env
               pluginModules = map lpModule (loadedPlugins (hsc_plugins hsc_env))



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5af6eb1172804ac05d7f5b0b0f7151e64df14fb9...dd0a2286a2a9abb93e4984e2441c31967f4e57a5

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5af6eb1172804ac05d7f5b0b0f7151e64df14fb9...dd0a2286a2a9abb93e4984e2441c31967f4e57a5
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220812/87188510/attachment-0001.html>


More information about the ghc-commits mailing list