[Git][ghc/ghc][wip/spj-unf-size] More -- now it compiles

Simon Peyton Jones (@simonpj) gitlab at gitlab.haskell.org
Sun Oct 22 21:30:09 UTC 2023



Simon Peyton Jones pushed to branch wip/spj-unf-size at Glasgow Haskell Compiler / GHC


Commits:
97fac893 by Simon Peyton Jones at 2023-10-22T22:29:25+01:00
More -- now it compiles

- - - - -


15 changed files:

- compiler/GHC/Core.hs
- compiler/GHC/Core/Opt/LiberateCase.hs
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Opt/Simplify/Inline.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Core/Opt/SpecConstr.hs
- compiler/GHC/Core/Ppr.hs
- compiler/GHC/Core/Seq.hs
- compiler/GHC/Core/Unfold.hs
- compiler/GHC/Core/Unfold/Make.hs
- compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
- compiler/GHC/Types/Id.hs


Changes:

=====================================
compiler/GHC/Core.hs
=====================================
@@ -1390,6 +1390,8 @@ data UnfoldingGuidance
   | UnfIfGoodArgs {     -- Arose from a normal Id
       ug_args :: [Var],      -- Arguments
       ug_tree :: ExprTree    -- Abstraction of the body
+      -- Invariant: free vars of ug_tree are the ug_args, plus variables
+      --            in scope at the binding site of the function definition
     }
 
   | UnfNever        -- The RHS is big, so don't inline it
@@ -1397,13 +1399,14 @@ data UnfoldingGuidance
 data ExprTree
   = TooBig
   | SizeIs { et_size  :: {-# UNPACK #-} !Int
-           , et_cases :: Bag CaseTree
            , et_ret   :: {-# UNPACK #-} !Int
                 -- ^ Discount when result is scrutinised
+           , et_cases :: Bag CaseTree
     }
 
 data CaseTree
   = CaseOf Id          -- Abstracts a case expression on this Id
+           Id          -- Case binder
            [AltTree]   -- Always non-empty, but not worth making NonEmpty;
                        -- nothing relies on non-empty-ness
   | ScrutOf Id Int     -- If this Id is bound to a value, apply this discount


=====================================
compiler/GHC/Core/Opt/LiberateCase.hs
=====================================
@@ -14,7 +14,6 @@ import GHC.Prelude
 
 import GHC.Core
 import GHC.Core.Unfold
-import GHC.Core.Opt.Simplify.Inline
 import GHC.Builtin.Types ( unitDataConId )
 import GHC.Types.Id
 import GHC.Types.Var.Env


=====================================
compiler/GHC/Core/Opt/OccurAnal.hs
=====================================
@@ -1919,20 +1919,17 @@ nodeScore !env new_bndr lb_deps
     -- is_lb: see Note [Loop breakers, node scoring, and stability]
     is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
 
-    old_unf = realIdUnfolding old_bndr
+    old_unf    = realIdUnfolding old_bndr
     can_unfold = canUnfold old_unf
     rhs        = case old_unf of
                    CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
                      | isStableSource src
                      -> unf_rhs
                    _ -> bind_rhs
-       -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
-    rhs_size = case old_unf of
-                 CoreUnfolding { uf_guidance = guidance }
-                    | UnfIfGoodArgs { ug_size = size } <- guidance
-                    -> size
-                 _  -> cheapExprSize rhs
+        -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
 
+    rhs_size = cheapExprSize rhs
+        -- ToDo: could exploit pre-computed unfolding size?
 
         -- Checking for a constructor application
         -- Cheap and cheerful; the simplifier moves casts out of the way


=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -76,7 +76,7 @@ core2core :: HscEnv -> ModGuts -> IO ModGuts
 core2core hsc_env guts@(ModGuts { mg_module  = mod
                                 , mg_loc     = loc
                                 , mg_rdr_env = rdr_env })
-  = do { let builtin_passes = getCoreToDo dflags hpt_rule_base extra_vars
+  = do { let builtin_passes = getCoreToDo dflags mod hpt_rule_base extra_vars
              uniq_tag = 's'
 
        ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_tag mod
@@ -116,9 +116,9 @@ core2core hsc_env guts@(ModGuts { mg_module  = mod
 ************************************************************************
 -}
 
-getCoreToDo :: DynFlags -> RuleBase -> [Var] -> [CoreToDo]
+getCoreToDo :: DynFlags -> Module -> RuleBase -> [Var] -> [CoreToDo]
 -- This function builds the pipeline of optimisations
-getCoreToDo dflags hpt_rule_base extra_vars
+getCoreToDo dflags mod hpt_rule_base extra_vars
   = flatten_todos core_todo
   where
     phases        = simplPhases        dflags
@@ -156,7 +156,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
       = CoreDoPasses
       $   [ maybe_strictness_before phase
           , CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
-                             (initSimplMode dflags phase name) hpt_rule_base
+                             (initSimplMode dflags mod phase name) hpt_rule_base
           , maybe_rule_check phase ]
 
     -- Run GHC's internal simplification phase, after all rules have run.
@@ -167,7 +167,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
     -- See Note [Inline in InitialPhase]
     -- See Note [RULEs enabled in InitialPhase]
     simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter
-                                    (initGentleSimplMode dflags) hpt_rule_base
+                                    (initGentleSimplMode dflags mod) hpt_rule_base
 
     dmd_cpr_ww = if ww_on then [CoreDoDemand True,CoreDoCpr,CoreDoWorkerWrapper]
                           else [CoreDoDemand False] -- NB: No CPR! See Note [Don't change boxity without worker/wrapper]


=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -75,6 +75,8 @@ import GHC.Types.Id as Id
 import GHC.Types.Basic
 import GHC.Types.Unique.FM      ( pprUniqFM )
 
+import GHC.Unit.Types( Module )
+
 import GHC.Data.OrdList
 import GHC.Data.Graph.UnVar
 
@@ -242,7 +244,8 @@ seUnfoldingOpts env = sm_uf_opts (seMode env)
 
 -- See Note [The environments of the Simplify pass]
 data SimplMode = SimplMode -- See comments in GHC.Core.Opt.Simplify.Monad
-  { sm_phase        :: !CompilerPhase
+  { sm_module       ::  !Module
+  , sm_phase        :: !CompilerPhase
   , sm_names        :: ![String]      -- ^ Name(s) of the phase
   , sm_rules        :: !Bool          -- ^ Whether RULES are enabled
   , sm_inline       :: !Bool          -- ^ Whether inlining is enabled
@@ -910,12 +913,12 @@ So we want to look up the inner X.g_34 in the substitution, where we'll
 find that it has been substituted by b.  (Or conceivably cloned.)
 -}
 
-substId :: SimplEnv -> InId -> SimplSR
+substId :: HasDebugCallStack => SimplEnv -> InId -> SimplSR
 -- Returns DoneEx only on a non-Var expression
-substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
+substId (SimplEnv { seMode = mode, seInScope = in_scope, seIdSubst = ids }) v
   = case lookupVarEnv ids v of  -- Note [Global Ids in the substitution]
-        Nothing               -> DoneId (refineFromInScope in_scope v)
-        Just (DoneId v)       -> DoneId (refineFromInScope in_scope v)
+        Nothing               -> DoneId (refineFromInScope mode in_scope v)
+        Just (DoneId v)       -> DoneId (refineFromInScope mode in_scope v)
         Just res              -> res    -- DoneEx non-var, or ContEx
 
         -- Get the most up-to-date thing from the in-scope set
@@ -924,22 +927,24 @@ substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
         --
         -- See also Note [In-scope set as a substitution] in GHC.Core.Opt.Simplify.
 
-refineFromInScope :: InScopeSet -> Var -> Var
-refineFromInScope in_scope v
+refineFromInScope :: HasDebugCallStack => SimplMode -> InScopeSet -> Var -> Var
+refineFromInScope mode in_scope v
   | isLocalId v = case lookupInScope in_scope v of
                   Just v' -> v'
-                  Nothing -> pprPanic "refineFromInScope" (ppr in_scope $$ ppr v)
+                  Nothing -> -- pprPanic "refineFromInScope" (ppr in_scope $$ ppr v)
+                             pprTrace "refineFromInScope"
+                                 (ppr (sm_module mode) <+> ppr v) v
                              -- c.f #19074 for a subtle place where this went wrong
   | otherwise = v
 
 lookupRecBndr :: SimplEnv -> InId -> OutId
 -- Look up an Id which has been put into the envt by simplRecBndrs,
 -- but where we have not yet done its RHS
-lookupRecBndr (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
+lookupRecBndr (SimplEnv { seMode = mode, seInScope = in_scope, seIdSubst = ids }) v
   = case lookupVarEnv ids v of
         Just (DoneId v) -> v
         Just _ -> pprPanic "lookupRecBndr" (ppr v)
-        Nothing -> refineFromInScope in_scope v
+        Nothing -> refineFromInScope mode in_scope v
 
 {-
 ************************************************************************


=====================================
compiler/GHC/Core/Opt/Simplify/Inline.hs
=====================================
@@ -9,12 +9,10 @@ This module contains inlining logic used by the simplifier.
 {-# LANGUAGE BangPatterns #-}
 
 module GHC.Core.Opt.Simplify.Inline (
-        -- * Cheap and cheerful inlining checks.
-        couldBeSmallEnoughToInline,
-        smallEnoughToInline,
-
         -- * The smart inlining decisions are made by callSiteInline
         callSiteInline, CallCtxt(..),
+
+        exprSummary
     ) where
 
 import GHC.Prelude
@@ -25,47 +23,20 @@ import GHC.Core.Opt.Simplify.Env
 import GHC.Core.Opt.Simplify.Utils
 import GHC.Core
 import GHC.Core.Unfold
+
 import GHC.Types.Id
+import GHC.Types.Literal ( isLitRubbish )
 import GHC.Types.Basic  ( Arity, RecFlag(..) )
+import GHC.Types.Name
+import GHC.Types.Var.Env
+
 import GHC.Utils.Logger
 import GHC.Utils.Misc
 import GHC.Utils.Outputable
-import GHC.Types.Name
+import GHC.Utils.Panic
 
 import Data.List (isPrefixOf)
 
-{-
-************************************************************************
-*                                                                      *
-\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
-*                                                                      *
-************************************************************************
-
-We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
-we ``couldn't possibly use'' on the other side.  Can be overridden w/
-flaggery.  Just the same as smallEnoughToInline, except that it has no
-actual arguments.
--}
-
-couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
-couldBeSmallEnoughToInline opts threshold rhs
-  = case sizeExpr opts threshold [] body of
-       TooBig -> False
-       _      -> True
-  where
-    (_, body) = collectBinders rhs
-
-----------------
-smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
-smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance})
-  = case guidance of
-       UnfIfGoodArgs {ug_tree = et}
-         -> exprTreeSize emptyVarEnv False et `ltSize` unfoldingUseThreshold opts
-       UnfWhen {} -> True
-       UnfNever   -> False
-smallEnoughToInline _ _
-  = False
-
 {-
 ************************************************************************
 *                                                                      *
@@ -90,44 +61,36 @@ them inlining is to give them a NOINLINE pragma, which we do in
 StrictAnal.addStrictnessInfoToTopId
 -}
 
-callSiteInline :: Logger
-               -> UnfoldingOpts
-               -> Int                   -- Case depth
-               -> Id                    -- The Id
-               -> Bool                  -- True <=> unfolding is active
-               -> Bool                  -- True if there are no arguments at all (incl type args)
-               -> [ArgSummary]          -- One for each value arg; True if it is interesting
-               -> CallCtxt              -- True <=> continuation is interesting
+callSiteInline :: Logger -> SimplEnv
+               -> Id -> SimplCont
                -> Maybe CoreExpr        -- Unfolding, if any
-callSiteInline logger opts !case_depth id active_unfolding lone_variable arg_infos cont_info
-  = case idUnfolding id of
+callSiteInline logger env fn cont
+  = case idUnfolding fn of
       -- idUnfolding checks for loop-breakers, returning NoUnfolding
       -- Things with an INLINE pragma may have an unfolding *and*
       -- be a loop breaker  (maybe the knot is not yet untied)
         CoreUnfolding { uf_tmpl = unf_template
                       , uf_cache = unf_cache
                       , uf_guidance = guidance }
-          | active_unf -> tryUnfolding logger opts case_depth id lone_variable
-                                    arg_infos cont_info unf_template
-                                    unf_cache guidance
-          | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
+          | active_unf -> tryUnfolding logger env fn cont unf_template unf_cache guidance
+          | otherwise  -> traceInline logger env fn "Inactive unfolding:" (ppr fn) Nothing
         NoUnfolding      -> Nothing
         BootUnfolding    -> Nothing
         OtherCon {}      -> Nothing
         DFunUnfolding {} -> Nothing     -- Never unfold a DFun
   where
-    active_unf = activeUnfolding (seMode env) var
-
+    active_unf = activeUnfolding (seMode env) fn
 
 -- | Report the inlining of an identifier's RHS to the user, if requested.
-traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
-traceInline logger opts inline_id str doc result
+traceInline :: Logger -> SimplEnv -> Id -> String -> SDoc -> a -> a
+traceInline logger env inline_id str doc result
   -- We take care to ensure that doc is used in only one branch, ensuring that
   -- the simplifier can push its allocation into the branch. See Note [INLINE
   -- conditional tracing utilities].
   | enable    = logTraceMsg logger str doc result
   | otherwise = result
   where
+    opts = seUnfoldingOpts env
     enable
       | logHasDumpFlag logger Opt_D_dump_verbose_inlinings
       = True
@@ -233,39 +196,32 @@ needed on a per-module basis.
 
 -}
 
-tryUnfolding :: Logger -> UnfoldingOpts -> Int
-             -> Id -> Bool -> [ArgSummary] -> CallCtxt
+tryUnfolding :: Logger -> SimplEnv -> Id -> SimplCont
              -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance
              -> Maybe CoreExpr
-tryUnfolding logger opts !case_depth id lone_variable arg_infos
-             cont_info unf_template unf_cache guidance
+tryUnfolding logger env fn cont unf_template unf_cache guidance
  = case guidance of
-     UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
+     UnfNever -> traceInline logger env fn str (text "UnfNever") Nothing
 
      UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
         | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
                 -- See Note [INLINE for small functions] (3)
-        -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
+        -> traceInline logger env fn str (mk_doc some_benefit empty True) (Just unf_template)
         | otherwise
-        -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
+        -> traceInline logger env fn str (mk_doc some_benefit empty False) Nothing
         where
           some_benefit = calc_some_benefit uf_arity
           enough_args  = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
 
      UnfIfGoodArgs { ug_args = arg_bndrs, ug_tree = expr_tree }
         | unfoldingVeryAggressive opts
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        -> traceInline logger env fn str (mk_doc some_benefit extra_doc True) (Just unf_template)
         | is_wf && some_benefit && small_enough
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        -> traceInline logger env fn str (mk_doc some_benefit extra_doc True) (Just unf_template)
         | otherwise
-        -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
+        -> traceInline logger env fn str (mk_doc some_benefit extra_doc False) Nothing
         where
-          some_benefit = calc_some_benefit (length arg_discounts)
-          -- See Note [Avoid inlining into deeply nested cases]
-          depth_treshold = unfoldingCaseThreshold opts
-          depth_scaling = unfoldingCaseScaling opts
-          depth_penalty | case_depth <= depth_treshold = 0
-                        | otherwise       = (size * (case_depth - depth_treshold)) `div` depth_scaling
+          some_benefit = calc_some_benefit (length arg_bndrs)
 
           want_result
              | LT <- arg_bndrs `compareLength` arg_infos
@@ -274,16 +230,48 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
                               BoringCtxt -> False
                               _          -> True
 
-          context = IC { ic_bound = mkVarEnv (arg_bnds `zip` arg_infos)
-                       , ic_free  = xx
+          zapped_env = zapSubstEnv env
+          context = IC { ic_bound    = mkVarEnv (arg_bndrs `zip` arg_infos)
+                       , ic_free     = getFreeSummary zapped_env
                        , ic_want_res = want_result }
-          size = depth_penalty `addSizeN` exprTreeSize context expr_tree
-          small_enough = adjusted_size `leqSize` unfoldingUseThreshold opts
+          size :: Size
+          size = exprTreeSize context expr_tree
+
+          getFreeSummary :: SimplEnv -> Id -> ArgSummary
+          -- Get the ArgSummary of a free variable
+          getFreeSummary env x
+            = case lookupInScope (seInScope env) x of
+                Just x' | warnPprTrace (not (isId x')) "GFS" (vcat
+                            [ ppr fn <+> equals <+> ppr unf_template
+                            , text "expr_tree:" <+> ppr expr_tree
+                            , ppr x <+> ppr x'
+                           ]) True
+                        , Just expr <- maybeUnfoldingTemplate (idUnfolding x')
+                        -> exprSummary env expr
+                _ -> ArgNoInfo
+
+
+          -- Adjust by the depth scaling
+          -- See Note [Avoid inlining into deeply nested cases]
+          depth_threshold = unfoldingCaseThreshold opts
+          depth_scaling   = unfoldingCaseScaling opts
+
+          add_depth_penalty size = size + (size * (case_depth - depth_threshold))
+                                                  `div` depth_scaling
+          final_size | case_depth <= depth_threshold = size
+                     | otherwise = adjustSize add_depth_penalty size
 
-          extra_doc = vcat [ text "case depth =" <+> int case_depth
-                           , text "depth based penalty =" <+> int depth_penalty
-                           , text "discounted size =" <+> int adjusted_size ]
+          small_enough = final_size `leqSize` unfoldingUseThreshold opts
+
+          extra_doc = vcat [ text "size =" <+> ppr size
+                           , text "case depth =" <+> int case_depth
+                           , text "final_size =" <+> ppr final_size ]
   where
+    (lone_variable, arg_infos, call_cont) = contArgs cont
+    cont_info  = interestingCallContext env call_cont
+    case_depth = seCaseDepth env
+    opts       = seUnfoldingOpts env
+
     -- Unpack the UnfoldingCache lazily because it may not be needed, and all
     -- its fields are strict; so evaluating unf_cache at all forces all the
     -- isWorkFree etc computations to take place.  That risks wasting effort for
@@ -302,7 +290,7 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
              , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
 
     ctx = log_default_dump_context (logFlags logger)
-    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
+    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
     n_val_args = length arg_infos
 
            -- some_benefit is used when the RHS is small enough
@@ -320,7 +308,7 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
       where
         saturated      = n_val_args >= uf_arity
         over_saturated = n_val_args > uf_arity
-        interesting_args = any nonTriv arg_infos
+        interesting_args = any hasArgInfo arg_infos
                 -- NB: (any nonTriv arg_infos) looks at the
                 -- over-saturated args too which is "wrong";
                 -- but if over-saturated we inline anyway.
@@ -338,7 +326,6 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
                           -> uf_arity > 0  -- See Note [RHS of lets]
               _other      -> False         -- See Note [Nested functions]
 
-
 {- Note [RHS of lets]
 ~~~~~~~~~~~~~~~~~~~~~
 When the call is the argument of a function with a RULE, or the RHS of a let,
@@ -527,6 +514,7 @@ which Roman did.
 
 -}
 
+{-
 computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
                 -> Int
 computeDiscount arg_discounts res_discount arg_infos cont_info
@@ -570,3 +558,111 @@ computeDiscount arg_discounts res_discount arg_infos cont_info
                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
                 -- But we want to avoid inlining large functions that return
                 -- constructors into contexts that are simply "interesting"
+-}
+
+{- Note [Interesting arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An argument is interesting if it deserves a discount for unfoldings
+with a discount in that argument position.  The idea is to avoid
+unfolding a function that is applied only to variables that have no
+unfolding (i.e. they are probably lambda bound): f x y z There is
+little point in inlining f here.
+
+Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
+we must look through lets, eg (let x = e in C a b), because the let will
+float, exposing the value, if we inline.  That makes it different to
+exprIsHNF.
+
+Before 2009 we said it was interesting if the argument had *any* structure
+at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see #3016.
+
+But we don't regard (f x y) as interesting, unless f is unsaturated.
+If it's saturated and f hasn't inlined, then it's probably not going
+to now!
+
+Note [Conlike is interesting]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+        f d = ...((*) d x y)...
+        ... f (df d')...
+where df is con-like. Then we'd really like to inline 'f' so that the
+rule for (*) (df d) can fire.  To do this
+  a) we give a discount for being an argument of a class-op (eg (*) d)
+  b) we say that a con-like argument (eg (df d)) is interesting
+-}
+
+-------------------
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
+-- Summarises value args, discards type args and coercions
+-- The returned continuation of the call is only used to
+-- answer questions like "are you interesting?"
+contArgs cont
+  | lone cont = (True, [], cont)
+  | otherwise = go [] cont
+  where
+    lone (ApplyToTy  {}) = False  -- See Note [Lone variables] in GHC.Core.Unfold
+    lone (ApplyToVal {}) = False  -- NB: even a type application or cast
+    lone (CastIt {})     = False  --     stops it being "lone"
+    lone _               = True
+
+    go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
+                                        = go (exprSummary se arg : args) k
+    go args (ApplyToTy { sc_cont = k }) = go args k
+    go args (CastIt _ k)                = go args k
+    go args k                           = (False, reverse args, k)
+
+------------------------------
+exprSummary :: SimplEnv -> CoreExpr -> ArgSummary
+-- Very simple version of exprIsConApp_maybe
+-- But /do/ take the SimplEnv into account.  We must:
+-- (a) Apply the substitution.  E.g
+--       (\x. ...(f x)...) (a,b)
+---    We may have x:->(a,b) in the substitution, and we want to see that
+--     (a,b) when we are deciding whether or not to inline f
+-- (b) Refine using the in-scope set. E.g
+--       \x. ....case x of { (a,b) -> ...f x... }....
+--     We want to see that x is (a,b) at the call site of f
+exprSummary env e = go env e []
+  where
+    go :: SimplEnv -> CoreExpr -> [CoreExpr] -> ArgSummary
+    go env (Cast e _) as = go env e as
+    go env (Tick _ e) as = go env e as
+    go env (App f a)  as = go env f (a:as)
+    go env (Let b e)  as = go env' e as
+      where
+        env' = env `addNewInScopeIds` bindersOf b
+
+    go env (Var v)    as
+       = -- Simplify.Env.substId Looks up in substitution
+         -- /and/ refines from the InScopeset
+         case substId env v of
+           DoneId v'            -> go_var env v' as
+           DoneEx e _           -> go (zapSubstEnv env)             e as
+           ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) e as
+
+    go _ (Lit l) as
+       | isLitRubbish l = ArgNoInfo -- Leads to unproductive inlining in WWRec, #20035
+       | otherwise      = assertPpr (null as) (ppr as) $
+                          ArgIsCon (LitAlt l) []
+
+    go env (Lam b e)  as
+      | null as = if isRuntimeVar b
+                  then ArgIsLam
+                  else go env' e []
+      where
+        env' = modifyInScope env b  -- Tricky corner here
+
+    go _ _ _ = ArgNoInfo
+
+    go_var env f args
+      | Just con <- isDataConWorkId_maybe f
+      = ArgIsCon (DataAlt con) (map (exprSummary env) args)
+
+      | Just rhs <- expandUnfolding_maybe (idUnfolding f)
+      = go (zapSubstEnv env) rhs args
+
+      | idArity f > valArgCount args
+      = ArgIsLam
+
+      | otherwise
+      = ArgNoInfo


=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -2302,8 +2302,7 @@ rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
 -----------------------------------
 tryInlining :: SimplEnv -> Logger -> OutId -> SimplCont -> SimplM (Maybe OutExpr)
 tryInlining env logger var cont
-  | Just expr <- callSiteInline logger uf_opts case_depth var active_unf
-                                lone_variable arg_infos interesting_cont
+  | Just expr <- callSiteInline logger env var cont
   = do { dump_inline expr cont
        ; return (Just expr) }
 
@@ -2311,12 +2310,6 @@ tryInlining env logger var cont
   = return Nothing
 
   where
-    uf_opts    = seUnfoldingOpts env
-    case_depth = seCaseDepth env
-    (lone_variable, arg_infos, call_cont) = contArgs cont
-    interesting_cont = interestingCallContext env call_cont
-    active_unf       = activeUnfolding (seMode env) var
-
     log_inlining doc
       = liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify)
            Opt_D_dump_inlinings


=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -24,7 +24,7 @@ module GHC.Core.Opt.Simplify.Utils (
         SimplCont(..), DupFlag(..), FromWhat(..), StaticEnv,
         isSimplified, contIsStop,
         contIsDupable, contResultType, contHoleType, contHoleScaling,
-        contIsTrivial, contArgs, contIsRhs,
+        contIsTrivial, contIsRhs,
         countArgs,
         mkBoringStop, mkRhsStop, mkLazyArgStop,
         interestingCallContext,
@@ -46,9 +46,7 @@ import GHC.Prelude hiding (head, init, last, tail)
 import qualified GHC.Prelude as Partial (head)
 
 import GHC.Core
-import GHC.Types.Literal ( isLitRubbish )
 import GHC.Core.Opt.Simplify.Env
--- import GHC.Core.Opt.Simplify.Inline
 import GHC.Core.Opt.Stats ( Tick(..) )
 import qualified GHC.Core.Subst
 import GHC.Core.Ppr
@@ -558,29 +556,6 @@ countValArgs (CastIt _ cont)                 = countValArgs cont
 countValArgs _                               = 0
 
 -------------------
-contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
--- Summarises value args, discards type args and coercions
--- The returned continuation of the call is only used to
--- answer questions like "are you interesting?"
-contArgs cont
-  | lone cont = (True, [], cont)
-  | otherwise = go [] cont
-  where
-    lone (ApplyToTy  {}) = False  -- See Note [Lone variables] in GHC.Core.Unfold
-    lone (ApplyToVal {}) = False  -- NB: even a type application or cast
-    lone (CastIt {})     = False  --     stops it being "lone"
-    lone _               = True
-
-    go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
-                                        = go (is_interesting arg se : args) k
-    go args (ApplyToTy { sc_cont = k }) = go args k
-    go args (CastIt _ k)                = go args k
-    go args k                           = (False, reverse args, k)
-
-    is_interesting arg se = exprSummary se arg
-       -- Do *not* use short-cutting substitution here
-       -- because we want to get as much IdInfo as possible
-
 -- | Describes how the 'SimplCont' will evaluate the hole as a 'SubDemand'.
 -- This can be more insightful than the limited syntactic context that
 -- 'SimplCont' provides, because the 'Stop' constructor might carry a useful
@@ -593,10 +568,10 @@ contArgs cont
 -- about what to do then and no call sites so far seem to care.
 contEvalContext :: SimplCont -> SubDemand
 contEvalContext k = case k of
-  (Stop _ _ sd)              -> sd
-  (TickIt _ k)               -> contEvalContext k
-  (CastIt _ k)               -> contEvalContext k
-  ApplyToTy{sc_cont=k}       -> contEvalContext k
+  Stop _ _ sd          -> sd
+  TickIt _ k           -> contEvalContext k
+  CastIt _ k           -> contEvalContext k
+  ApplyToTy{sc_cont=k} -> contEvalContext k
     --  ApplyToVal{sc_cont=k}      -> mkCalledOnceDmd $ contEvalContext k
     -- Not 100% sure that's correct, . Here's an example:
     --   f (e x) and f :: <SC(S,C(1,L))>
@@ -638,9 +613,11 @@ mkArgInfo env rule_base fun cont
     vanilla_discounts, arg_discounts :: [Int]
     vanilla_discounts = repeat 0
     arg_discounts = case idUnfolding fun of
-                        CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
-                              -> discounts ++ vanilla_discounts
+                        CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = _discounts}}
+                              -> {- discounts ++ -} vanilla_discounts
                         _     -> vanilla_discounts
+         -- ToDo: with the New Plan it's harder to know which arguments
+         -- attract a discount.  For now, let's just drop this and see.
 
     vanilla_dmds, arg_dmds :: [Demand]
     vanilla_dmds  = repeat topDmd
@@ -928,118 +905,6 @@ contHasRules cont
     go (StrictBind {})                 = False      -- ??
     go (Stop _ _ _)                    = False
 
-{- Note [Interesting arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-An argument is interesting if it deserves a discount for unfoldings
-with a discount in that argument position.  The idea is to avoid
-unfolding a function that is applied only to variables that have no
-unfolding (i.e. they are probably lambda bound): f x y z There is
-little point in inlining f here.
-
-Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
-we must look through lets, eg (let x = e in C a b), because the let will
-float, exposing the value, if we inline.  That makes it different to
-exprIsHNF.
-
-Before 2009 we said it was interesting if the argument had *any* structure
-at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see #3016.
-
-But we don't regard (f x y) as interesting, unless f is unsaturated.
-If it's saturated and f hasn't inlined, then it's probably not going
-to now!
-
-Note [Conlike is interesting]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
-        f d = ...((*) d x y)...
-        ... f (df d')...
-where df is con-like. Then we'd really like to inline 'f' so that the
-rule for (*) (df d) can fire.  To do this
-  a) we give a discount for being an argument of a class-op (eg (*) d)
-  b) we say that a con-like argument (eg (df d)) is interesting
--}
-
-interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
--- See Note [Interesting arguments]
-interestingArg env e = go env 0 e
-  where
-    -- n is # value args to which the expression is applied
-    go env n (Var v)
-       = case substId env v of
-           DoneId v'            -> go_var n v'
-           DoneEx e _           -> go (zapSubstEnv env)             n e
-           ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
-
-    go _   _ (Lit l)
-       | isLitRubbish l        = TrivArg -- Leads to unproductive inlining in WWRec, #20035
-       | otherwise             = ValueArg
-    go _   _ (Type _)          = TrivArg
-    go _   _ (Coercion _)      = TrivArg
-    go env n (App fn (Type _)) = go env n fn
-    go env n (App fn _)        = go env (n+1) fn
-    go env n (Tick _ a)        = go env n a
-    go env n (Cast e _)        = go env n e
-    go env n (Lam v e)
-       | isTyVar v             = go env n e
-       | n>0                   = NonTrivArg     -- (\x.b) e   is NonTriv
-       | otherwise             = ValueArg
-    go _ _ (Case {})           = NonTrivArg
-    go env n (Let b e)         = case go env' n e of
-                                   ValueArg -> ValueArg
-                                   _        -> NonTrivArg
-                               where
-                                 env' = env `addNewInScopeIds` bindersOf b
-
-    go_var n v
-       | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
-                                        --    data constructors here
-       | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
-       | n > 0             = NonTrivArg -- Saturated or unknown call
-       | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
-                                        -- See Note [Conlike is interesting]
-       | otherwise         = TrivArg    -- n==0, no useful unfolding
-       where
-         conlike_unfolding = isConLikeUnfolding (idUnfolding v)
-
-------------------------------
-idSummary :: SimplEnv -> Id -> ArgSummary
-idSummary env bndr
-  = case idUnfolding bndr of
-      OtherCon cs -> ScrutIsNot cs
-      DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args }
-        | null bndrs
-        -> ScrutIsCon (DataAlt con) (map exprSummary args)
-        | otherwise
-        -> ScrutNoInfo
-      CoreUnfolding { uf_tmpl = e }
-        -> exprSummary e
-      NoUnfolding   -> ScrutNoInfo
-      BootUnfolding -> ScrutNoInfo
-
-exprSummary :: SimplEnv -> CoreExpr -> ArgSummary
--- Very simple version of exprIsConApp_maybe
-exprSummary env e = go e []
-  where
-    go (Cast e _) as = go e as
-    go (Tick _ e) as = go e as
-    go (Let _ e)  as = go e as
-    go (App f a)  as = go f (a:as)
-    go (Lit l)    as = assertPpr (null as) (ppr as) $
-                       ScrutIsCon (LitAlt l) []
-    go (Var v)    as = go_var v as
-    go (Lam b e)  as
-      | null as = if isRuntimeVar b
-                  then ScrutIsLam
-                  else go e []
-    go _ _ = ScrutNoInfo
-
-    go_var v as
-      | Just con <- isDataConWorkId_maybe v
-      = ScrutIsCon (DataAlt con) (map (exprSummary env) as)
-      | Just rhs <- expandUnfolding_maybe (idUnfolding v)
-      = go rhs as
-      | otherwise
-      = ScrutNoInfo
 
 {-
 ************************************************************************
@@ -1590,7 +1455,7 @@ postInlineUnconditionally env bind_cxt bndr occ_info rhs
 
         -> n_br < 100  -- See Note [Suppress exponential blowup]
 
-           && smallEnoughToInline uf_opts unfolding     -- Small enough to dup
+           && smallEnoughToInline env unfolding     -- Small enough to dup
                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
                         --
                         -- NB: Do NOT inline arbitrarily big things, even if occ_n_br=1
@@ -1636,11 +1501,23 @@ postInlineUnconditionally env bind_cxt bndr occ_info rhs
 
   where
     unfolding = idUnfolding bndr
-    uf_opts   = seUnfoldingOpts env
     phase     = sePhase env
     active    = isActive phase (idInlineActivation bndr)
         -- See Note [pre/postInlineUnconditionally in gentle mode]
 
+smallEnoughToInline :: SimplEnv -> Unfolding -> Bool
+smallEnoughToInline env unfolding
+  | CoreUnfolding {uf_guidance = guidance} <- unfolding
+  = case guidance of
+       UnfIfGoodArgs {ug_tree = et} -> exprTreeWillInline limit et
+       UnfWhen {}                   -> True
+       UnfNever                     -> False
+  | otherwise
+  = False
+  where
+    uf_opts = seUnfoldingOpts env
+    limit   = unfoldingUseThreshold uf_opts
+
 {- Note [Inline small things to avoid creating a thunk]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The point of examining occ_info here is that for *non-values* that


=====================================
compiler/GHC/Core/Opt/SpecConstr.hs
=====================================
@@ -32,7 +32,6 @@ import GHC.Core
 import GHC.Core.Subst
 import GHC.Core.Utils
 import GHC.Core.Unfold
-import GHC.Core.Opt.Simplify.Inline
 import GHC.Core.FVs     ( exprsFreeVarsList, exprFreeVars )
 import GHC.Core.Opt.Monad
 import GHC.Core.Opt.WorkWrap.Utils


=====================================
compiler/GHC/Core/Ppr.hs
=====================================
@@ -634,11 +634,11 @@ instance Outputable ExprTree where
 
 instance Outputable CaseTree where
   ppr (ScrutOf x n)   = ppr x <> colon <> int n
-  ppr (CaseOf x alts) = text "case" <+> ppr x
-                        <+> brackets (sep (map ppr alts))
+  ppr (CaseOf x b alts) = sep [ text "case" <+> ppr x <+> ppr b
+                              , nest 2 $ braces $ sep $ map ppr alts ]
 
 instance Outputable AltTree where
-  ppr (AltTree con bs rhs) = ppr con <+> ppr bs <+> text "->" <+> ppr rhs
+  ppr (AltTree con bs rhs) = sep [ppr con <+> ppr bs <+> text "->", nest 2 (ppr rhs)]
 
 instance Outputable Unfolding where
   ppr NoUnfolding                = text "No unfolding"


=====================================
compiler/GHC/Core/Seq.hs
=====================================
@@ -132,7 +132,7 @@ seqET (SizeIs { et_size = size, et_cases = cases, et_ret = ret })
 
 seqCT :: CaseTree -> ()
 seqCT (ScrutOf x i) = x `seq` i `seq` ()
-seqCT (CaseOf x alts) = x `seq` seqList seqAT alts
+seqCT (CaseOf x y alts) = x `seq` y `seq` seqList seqAT alts
 
 seqAT :: AltTree -> ()
 seqAT (AltTree con bs e) = con `seq` seqBndrs bs `seq` seqET e


=====================================
compiler/GHC/Core/Unfold.hs
=====================================
@@ -22,8 +22,10 @@ module GHC.Core.Unfold (
         Unfolding, UnfoldingGuidance,   -- Abstract types
 
         ExprTree, exprTree, exprTreeSize,
-        ArgSummary(..), CallCtxt(..),
-        Size, leqSize, addSizeN,
+        exprTreeWillInline, couldBeSmallEnoughToInline,
+        ArgSummary(..), CallCtxt(..), hasArgInfo,
+        Size, leqSize, addSizeN, adjustSize,
+        InlineContext(..),
 
         UnfoldingOpts (..), defaultUnfoldingOpts,
         updateCreationThreshold, updateUseThreshold,
@@ -56,6 +58,7 @@ import GHC.Builtin.PrimOps
 
 import GHC.Utils.Misc
 import GHC.Utils.Outputable
+import GHC.Utils.Panic
 
 import GHC.Data.Bag
 
@@ -164,10 +167,14 @@ updateReportPrefix n opts = opts { unfoldingReportPrefix = n }
 ********************************************************************* -}
 
 data ArgSummary = ArgNoInfo
-                | ArgIsCon AltCon [ArgSummary]
+                | ArgIsCon AltCon [ArgSummary]  -- Includes type args
                 | ArgIsNot [AltCon]
                 | ArgIsLam
 
+hasArgInfo :: ArgSummary -> Bool
+hasArgInfo ArgNoInfo = False
+hasArgInfo _         = True
+
 instance Outputable ArgSummary where
   ppr ArgNoInfo       = text "ArgNoInfo"
   ppr ArgIsLam        = text "ArgIsLam"
@@ -265,10 +272,11 @@ calcUnfoldingGuidance opts is_top_bottoming (Tick t expr)
   | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
   = calcUnfoldingGuidance opts is_top_bottoming expr
 calcUnfoldingGuidance opts is_top_bottoming expr
-  = case exprTree opts bOMB_OUT_SIZE (mkVarSet val_bndrs) body of
+  = case exprTree opts val_bndrs body of
       TooBig -> UnfNever
-      et@(SizeIs { et_size = size })
-        | uncondInline expr n_val_bndrs size
+      et@(SizeIs { et_size = size, et_cases = cases })
+        | not (any is_case cases)
+        , uncondInline expr n_val_bndrs size
         -> UnfWhen { ug_unsat_ok  = unSaturatedOk
                    , ug_boring_ok =  boringCxtOk
                    , ug_arity     = n_val_bndrs }   -- Note [INLINE for small functions]
@@ -281,11 +289,28 @@ calcUnfoldingGuidance opts is_top_bottoming expr
 
   where
     (bndrs, body) = collectBinders expr
-    bOMB_OUT_SIZE = unfoldingCreationThreshold opts
-           -- Bomb out if size gets bigger than this
     val_bndrs   = filter isId bndrs
     n_val_bndrs = length val_bndrs
 
+    is_case (CaseOf {})  = True
+    is_case (ScrutOf {}) = False
+
+{- We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
+   we ``couldn't possibly use'' on the other side.  Can be overridden w/
+   flaggery.  Just the same as smallEnoughToInline, except that it has no
+   actual arguments.
+-}
+
+couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline opts threshold rhs
+  = exprTreeWillInline threshold $
+    exprTree opts [] body
+  where
+    (_, body) = collectBinders rhs
+
+----------------
+
+
 {- Note [Inline unsafeCoerce]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We really want to inline unsafeCoerce, even when applied to boring
@@ -439,56 +464,99 @@ uncondInline rhs arity size
 *                                                                      *
 ********************************************************************* -}
 
-exprTree :: UnfoldingOpts
-         -> Int             -- Bomb out if it gets bigger than this
-         -> VarSet          -- Record scrutiny of these variables
-         -> CoreExpr
-         -> ExprTree
+{- Note [Constructing an ExprTree]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We maintain:
+* avs: argument variables, or variables bound by a case on an
+       argument variable.
+
+  We record a CaseOf or ScrutOf for the `avs`
+
+* lvs: variables bound by lambda and lets in the body; and by
+       case expressions that scrutinise one of the `lvs`, or
+       a non-variable.
+
+  We never record a CaseOf or ScrutOf for one of the `lvs`.
+
+* We record a CaseOf, but not ScrutOf, for other variables; that is,
+  variables free in the entire function definition.  For example:
+        let  f x = case y of
+                     A -> True
+                     B -> <big>
+        in
+        case y of
+          A -> ....f 3....f 4....
+          B -> blah
+  At the calls site of `f` we know that the free var `y` is equal to A, so
+  f should definitely inline.
+
+  But consider instead this example
+        let f x = y x 3 <big>
+        in  ...(f 3)...
+  There nothing we will learn about the free `y` that will make the inining of
+  `f` more attractive.  Hence we don't record ScrutOf for y.
+
+  This is IMPORTANT, because even a call like (reverse xs) would otherwise record
+  a ScrutOf for `reverse` which is very silly.
+-}
+
+type ETVars = (VarSet,VarSet)  -- (avs, lvs)
+              -- See Note [Constructing an ExprTree]
 
+exprTree :: UnfoldingOpts -> [Var] -> CoreExpr -> ExprTree
 -- Note [Computing the size of an expression]
 
--- Forcing bOMB_OUT_SIZE early prevents repeated
--- unboxing of the Int argument.
-exprTree opts !bOMB_OUT_SIZE svars expr
-  = size_up expr
+exprTree opts args expr
+  = go (mkVarSet args, emptyVarSet) expr
   where
+    !bOMB_OUT_SIZE = unfoldingCreationThreshold opts
+       -- Bomb out if size gets bigger than this
+       -- Forcing bOMB_OUT_SIZE early prevents repeated
+       -- unboxing of the Int argument.
+
     et_add     = etAdd bOMB_OUT_SIZE
     et_add_alt = etAddAlt bOMB_OUT_SIZE
 
-    size_up :: CoreExpr -> ExprTree
-    size_up (Cast e _)   = size_up e
-    size_up (Tick _ e)   = size_up e
-    size_up (Type _)     = exprTreeN 0
-    size_up (Coercion _) = exprTreeN 0
-    size_up (Lit lit)    = exprTreeN (litSize lit)
+    go :: ETVars -> CoreExpr -> ExprTree
+          -- (avs,lvs): see Note [Constructing an ExprTree]
+    go vs (Cast e _)   = go vs e
+    go vs (Tick _ e)   = go vs e
+    go _  (Type _)     = exprTreeN 0
+    go _  (Coercion _) = exprTreeN 0
+    go _  (Lit lit)    = exprTreeN (litSize lit)
 
-    size_up (Lam b e)
-      | isId b, not (id_is_free b) = size_up e `et_add` lamSize opts
-      | otherwise                  = size_up e
+    go vs (Lam b e)
+      | isId b, not (id_is_free b) = go vs' e `et_add` lamSize opts
+      | otherwise                  = go vs' e
+      where
+        vs' = vs `add_lv` b
 
-    size_up (Let (NonRec binder rhs) body)
-      = size_up_bind (binder, rhs)  `et_add` size_up body
+    go vs (Let (NonRec binder rhs) body)
+      = go_bind vs (binder, rhs)  `et_add`
+        go (vs `add_lv` binder) body
 
-    size_up (Let (Rec pairs) body)
-      = foldr (et_add . size_up_bind) (size_up body) pairs
+    go vs (Let (Rec pairs) body)
+      = foldr (et_add . go_bind vs') (go vs' body) pairs
+      where
+        vs' = vs `add_lvs` map fst pairs
 
-    size_up e@(App {}) = size_up_app e []
+    go vs e@(App {}) = go_app vs e []
 
-    size_up (Var f) | id_is_free f = exprTreeN 0
+    go vs (Var f) | id_is_free f = exprTreeN 0
                     -- Use calLSize to ensure we get constructor
                     -- discounts even on nullary constructors
-                    | otherwise    = callTree opts svars f []
+                    | otherwise  = callTree opts vs f []
 
-    size_up (Case e _ _ alts) = size_up_case e alts
+    go vs (Case e b _ alts) = go_case vs e b alts
 
     -----------------------------
-    size_up_bind (bndr, rhs)
+    go_bind vs (bndr, rhs)
       | JoinPoint join_arity <- idJoinPointHood bndr
-      , (_bndrs, body) <- collectNBinders join_arity rhs
+      , (bndrs, body) <- collectNBinders join_arity rhs
                           -- Skip arguments to join point
-      = size_up body
+      = go (vs `add_lvs` bndrs) body
       | otherwise
-      = size_up_alloc bndr `etAddN` size_up rhs
+      = size_up_alloc bndr `etAddN` go vs rhs
 
     -- Cost to allocate binding with given binder
     size_up_alloc bndr
@@ -501,94 +569,109 @@ exprTree opts !bOMB_OUT_SIZE svars expr
 
     -----------------------------
     -- size_up_app is used when there's ONE OR MORE value args
-    size_up_app :: CoreExpr -> [CoreExpr] -> ExprTree
+    go_app :: ETVars -> CoreExpr -> [CoreExpr] -> ExprTree
                    -- args are the non-void value args
-    size_up_app (App fun arg) args
-                 | arg_is_free arg = size_up_app fun args
-                 | otherwise       = size_up arg  `et_add`
-                                     size_up_app fun (arg:args)
-    size_up_app (Var fun)     args = callTree opts svars fun args
-    size_up_app (Tick _ expr) args = size_up_app expr args
-    size_up_app (Cast expr _) args = size_up_app expr args
-    size_up_app other         args = vanillaCallSize (length args) `etAddN`
-                                     size_up other
+    go_app vs (App fun arg) args
+               | arg_is_free arg = go_app vs fun args
+               | otherwise       = go vs arg `et_add`
+                                   go_app vs fun (arg:args)
+    go_app vs (Var fun)     args = callTree opts vs fun args
+    go_app vs (Tick _ expr) args = go_app vs expr args
+    go_app vs (Cast expr _) args = go_app vs expr args
+    go_app vs other         args = vanillaCallSize (length args) `etAddN`
+                                   go vs other
        -- if the lhs is not an App or a Var, or an invisible thing like a
        -- Tick or Cast, then we should charge for a complete call plus the
        -- size of the lhs itself.
 
     -----------------------------
-    size_up_case scrut [] = size_up scrut
+    -- Empty case
+    go_case vs scrut _ [] = go vs scrut
          -- case e of {} never returns, so take size of scrutinee
 
-    size_up_case scrut alts                    -- Now alts is non-empty
-        | Just v <- interesting_id svars scrut -- We are scrutinising an argument variable
-        = size_up scrut `et_add`
-          etZero { et_cases = unitBag (CaseOf v (map alt_alt_tree alts)) }
+    -- Record a CaseOf
+    go_case vs@(avs,lvs) scrut b alts                 -- Now alts is non-empty
+      | Just v <- recordCaseOf vs scrut
+      = -- pprTrace "recordCaseOf" (ppr v $$ ppr lvs $$ ppr scrut $$ ppr alts) $
+        go vs scrut `et_add`
+        etZero { et_cases = unitBag (CaseOf v b (map (alt_alt_tree v) alts)) }
+      where
+        alt_alt_tree :: Id -> Alt Var -> AltTree
+        alt_alt_tree v (Alt con bs rhs)
+          = AltTree con bs (10 `etAddN` go (add_alt_bndrs v bs) rhs)
+
+        add_alt_bndrs v bs
+          | v `elemVarSet` avs = (avs `extendVarSetList` (b:bs), lvs)
+                                 -- Don't forget to add the case binder, b
+          | otherwise = vs
+
+    -- Don't record a CaseOf
+    go_case vs scrut b alts    -- alts is non-empty
+      = caseSize scrut alts `etAddN`   -- A bit odd that this is only in one branch
+        go vs scrut         `et_add`
+        foldr1 et_add_alt (map alt_expr_tree alts)
+      where
+        alt_expr_tree :: Alt Var -> ExprTree
+        alt_expr_tree (Alt _con bs rhs)
+          = 10 `etAddN` go (vs `add_lvs` (b:bs)) rhs
+            -- Don't charge for bndrs, so that wrappers look cheap
+            -- (See comments about wrappers with Case)
+            -- Don't forget to add the case binder, b, to lvs.
+            --
+            -- IMPORTANT: *do* charge 10 for the alternative, else we
+            -- find that giant case nests are treated as practically free
+            -- A good example is Foreign.C.Error.errnoToIOError
+
+caseSize :: CoreExpr -> [CoreAlt] -> Int
+caseSize scrut alts
+  | is_inline_scrut scrut, lengthAtMost alts 1 = -10
+  | otherwise                                  = 0
+              -- Normally we don't charge for the case itself, but
+              -- we charge one per alternative (see size_up_alt,
+              -- below) to account for the cost of the info table
+              -- and comparisons.
+              --
+              -- However, in certain cases (see is_inline_scrut
+              -- below), no code is generated for the case unless
+              -- there are multiple alts.  In these cases we
+              -- subtract one, making the first alt free.
+              -- e.g. case x# +# y# of _ -> ...   should cost 1
+              --      case touch# x# of _ -> ...  should cost 0
+              -- (see #4978)
+              --
+              -- I would like to not have the "lengthAtMost alts 1"
+              -- condition above, but without that some programs got worse
+              -- (spectral/hartel/event and spectral/para).  I don't fully
+              -- understand why. (SDM 24/5/11)
 
+              -- Unboxed variables, inline primops and unsafe foreign calls
+              -- are all "inline" things:
+  where
+    is_inline_scrut (Var v) =
+      isUnliftedType (idType v)
+        -- isUnliftedType is OK here: scrutinees have a fixed RuntimeRep (search for FRRCase)
+    is_inline_scrut scrut
+        | (Var f, _) <- collectArgs scrut
+          = case idDetails f of
+              FCallId fc    -> not (isSafeForeignCall fc)
+              PrimOpId op _ -> not (primOpOutOfLine op)
+              _other        -> False
         | otherwise
-        = case_size     `etAddN`   -- A bit odd that this is only in one branch
-          size_up scrut `et_add`
-          foldr1 et_add_alt (map alt_expr_tree alts)
-
-        where
-          alt_alt_tree :: Alt Var -> AltTree
-          alt_alt_tree (Alt con bs rhs)
-            = AltTree con bs (exprTree opts bOMB_OUT_SIZE svars' rhs)
-            where
-              svars' = svars `extendVarSetList` bs
-
-          alt_expr_tree :: Alt Var -> ExprTree
-          alt_expr_tree (Alt _con _bndrs rhs) = 10 `etAddN` size_up rhs
-              -- Don't charge for bndrs, so that wrappers look cheap
-              -- (See comments about wrappers with Case)
-              --
-              -- IMPORTANT: *do* charge 10 for the alternative, else we
-              -- find that giant case nests are treated as practically free
-              -- A good example is Foreign.C.Error.errnoToIOError
-
-          case_size
-           | is_inline_scrut scrut, lengthAtMost alts 1 = -10
-           | otherwise                                  = 0
-                -- Normally we don't charge for the case itself, but
-                -- we charge one per alternative (see size_up_alt,
-                -- below) to account for the cost of the info table
-                -- and comparisons.
-                --
-                -- However, in certain cases (see is_inline_scrut
-                -- below), no code is generated for the case unless
-                -- there are multiple alts.  In these cases we
-                -- subtract one, making the first alt free.
-                -- e.g. case x# +# y# of _ -> ...   should cost 1
-                --      case touch# x# of _ -> ...  should cost 0
-                -- (see #4978)
-                --
-                -- I would like to not have the "lengthAtMost alts 1"
-                -- condition above, but without that some programs got worse
-                -- (spectral/hartel/event and spectral/para).  I don't fully
-                -- understand why. (SDM 24/5/11)
-
-                -- Unboxed variables, inline primops and unsafe foreign calls
-                -- are all "inline" things:
-
-          is_inline_scrut (Var v) =
-            isUnliftedType (idType v)
-              -- isUnliftedType is OK here: scrutinees have a fixed RuntimeRep (search for FRRCase)
-          is_inline_scrut scrut
-              | (Var f, _) <- collectArgs scrut
-                = case idDetails f of
-                    FCallId fc    -> not (isSafeForeignCall fc)
-                    PrimOpId op _ -> not (primOpOutOfLine op)
-                    _other        -> False
-              | otherwise
-                = False
-    ------------
-
-interesting_id :: VarSet -> CoreExpr -> Maybe Id
-interesting_id svars (Var v)
-  | v `elemVarSet` svars  = Just v
-interesting_id svars (Tick _ e) = interesting_id svars e
-interesting_id svars (Cast e _) = interesting_id svars e
-interesting_id _     _          = Nothing
+          = False
+
+add_lv :: ETVars -> Var -> ETVars
+add_lv (avs,lvs) b = (avs, lvs `extendVarSet` b)
+
+add_lvs :: ETVars -> [Var] -> ETVars
+add_lvs (avs,lvs) bs = (avs, lvs `extendVarSetList` bs)
+
+recordCaseOf :: ETVars -> CoreExpr -> Maybe Id
+recordCaseOf (_,lvs) (Var v)
+     | v `elemVarSet` lvs  = Nothing
+     | otherwise           = Just v
+recordCaseOf vs (Tick _ e) = recordCaseOf vs e
+recordCaseOf vs (Cast e _) = recordCaseOf vs e
+recordCaseOf _     _       = Nothing
 
 arg_is_free :: CoreExpr -> Bool
 -- "free" means we don't charge for this
@@ -620,15 +703,15 @@ litSize _other = 0    -- Must match size of nullary constructors
                       --            (eg via case binding)
 
 ----------------------------
-callTree :: UnfoldingOpts -> VarSet -> Id -> [CoreExpr] -> ExprTree
-callTree opts svars fun val_args
+callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
+callTree opts vs fun val_args
   = case idDetails fun of
       FCallId _        -> exprTreeN (vanillaCallSize n_val_args)
       JoinId {}        -> exprTreeN (jumpSize        n_val_args)
       PrimOpId op _    -> exprTreeN (primOpSize op   n_val_args)
       DataConWorkId dc -> conSize dc n_val_args
-      ClassOpId {}     -> classOpSize opts svars val_args
-      _                -> funSize opts svars fun n_val_args
+      ClassOpId {}     -> classOpSize opts vs fun val_args
+      _                -> funSize opts vs fun n_val_args
   where
     n_val_args = length val_args
 
@@ -646,14 +729,15 @@ jumpSize n_val_args = 2 * (1 + n_val_args)
   -- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
   -- better solution?
 
-classOpSize :: UnfoldingOpts -> VarSet -> [CoreExpr] -> ExprTree
+classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
 -- See Note [Conlike is interesting]
-classOpSize _ _ []
+classOpSize _ _ _ []
   = etZero
-classOpSize opts svars val_args
+classOpSize opts vs fn val_args
   | arg1 : _ <- val_args
-  , Just dict <- interesting_id svars arg1
-  = vanillaCallSize (length val_args) `etAddN`
+  , Just dict <- recordCaseOf vs arg1
+  = warnPprTrace (not (isId dict)) "classOpSize" (ppr fn <+> ppr val_args) $
+    vanillaCallSize (length val_args) `etAddN`
     etZero { et_cases = unitBag (ScrutOf dict (unfoldingDictDiscount opts)) }
            -- If the class op is scrutinising a lambda bound dictionary then
            -- give it a discount, to encourage the inlining of this function
@@ -661,10 +745,10 @@ classOpSize opts svars val_args
   | otherwise
   = exprTreeN (vanillaCallSize (length val_args))
 
-funSize :: UnfoldingOpts -> VarSet -> Id -> Int -> ExprTree
+funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> ExprTree
 -- Size for function calls that are not constructors or primops
 -- Note [Function applications]
-funSize opts svars fun n_val_args
+funSize opts (avs,_) fun n_val_args
   | fun `hasKey` buildIdKey   = etZero  -- Wwant to inline applications of build/augment
   | fun `hasKey` augmentIdKey = etZero  -- so we give size zero to the whole call
   | otherwise = SizeIs { et_size  = size
@@ -677,7 +761,7 @@ funSize opts svars fun n_val_args
     -- Discount if this is an interesting variable, and is applied
     -- Discount is enough to make the application free (but not negative!)
     --  See Note [Function and non-function discounts]
-    cases | n_val_args > 0, fun `elemVarSet` svars
+    cases | n_val_args > 0, fun `elemVarSet` avs
           = unitBag (ScrutOf fun size)
           | otherwise
           = emptyBag
@@ -920,6 +1004,10 @@ etZero = SizeIs { et_size = 0, et_cases = emptyBag, et_ret = 0 }
 
 data Size = STooBig | SSize {-# UNPACK #-} !Int
 
+instance Outputable Size where
+  ppr STooBig = text "STooBig"
+  ppr (SSize n) = int n
+
 sizeN :: Int -> Size
 sizeN n = SSize n
 
@@ -931,6 +1019,10 @@ addSizeN :: Int -> Size -> Size
 addSizeN n1 (SSize n2) = SSize (n1+n2)
 addSizeN _  STooBig    = STooBig
 
+adjustSize :: (Int -> Int) -> Size -> Size
+adjustSize f (SSize n) = SSize (f n)
+adjustSize _ STooBig   = STooBig
+
 leqSize :: Size -> Int -> Bool
 leqSize STooBig   _ = False
 leqSize (SSize n) m = n <= m
@@ -944,6 +1036,27 @@ data InlineContext
      }
 
 -------------------------
+exprTreeWillInline :: Int -> ExprTree -> Bool
+-- (cheapExprTreeSize limit et) takes an upper bound `n` on the
+-- size of et; i.e. without discounts etc.
+-- Return True if (s <- limit), False otherwise
+-- Bales out early in the False case
+exprTreeWillInline limit et
+  = go et (\n -> n <= limit) 0
+  where
+    go :: ExprTree -> (Int -> Bool) -> Int -> Bool
+    go _      _ n | n > limit = False
+    go TooBig _ _             = False
+    go (SizeIs { et_size = size, et_cases = cases }) k n
+      = foldr go_ct k cases (n+size)
+
+    go_ct :: CaseTree -> (Int -> Bool) -> Int -> Bool
+    go_ct (ScrutOf {})      k n = k n
+    go_ct (CaseOf _ _ alts) k n = foldr go_alt k alts n
+
+    go_alt :: AltTree -> (Int -> Bool) -> Int -> Bool
+    go_alt (AltTree _ _ et) k n = go et k (n+10)
+
 exprTreeSize :: InlineContext -> ExprTree -> Size
 exprTreeSize _    TooBig = STooBig
 exprTreeSize !ic (SizeIs { et_size  = size
@@ -963,16 +1076,18 @@ caseTreeSize ic (ScrutOf bndr disc)
       ArgIsLam    -> sizeN (-disc)  -- Apply discount
       ArgIsCon {} -> sizeN (-disc)  -- Apply discount
 
-caseTreeSize ic (CaseOf var alts)
-  = case lookupBndr ic var of
+caseTreeSize ic (CaseOf scrut_var case_bndr alts)
+  = case lookupBndr ic scrut_var of
       ArgNoInfo     -> keptCaseSize ic alts
       ArgIsLam      -> keptCaseSize ic alts
       ArgIsNot cons -> keptCaseSize ic (trim_alts cons alts)
-      ArgIsCon con args
+      arg_summ@(ArgIsCon con args)
          | Just (AltTree _ bs rhs) <- find_alt con alts
-         , let ic' = ic { ic_bound = ic_bound ic `extendVarEnvList`
-                                     (bs `zip` args) }
-                     -- In DEFAULT case, bs is empty, so extend is a no-op
+         , let new_summaries :: [(Var,ArgSummary)]
+               new_summaries = (case_bndr,arg_summ) : bs `zip` args
+                  -- Don't forget to add a summary for the case binder!
+               ic' = ic { ic_bound = ic_bound ic `extendVarEnvList` new_summaries }
+                     -- In DEFAULT case, bs is empty, so extending is a no-op
          -> exprTreeSize ic' rhs
          | otherwise  -- Happens for empty alternatives
          -> keptCaseSize ic alts
@@ -1002,17 +1117,11 @@ keptCaseSize ic alts
     -- If there are no alternatives (case e of {}), we get just the size of the scrutinee
   where
     size_alt :: AltTree -> Size
-    size_alt (AltTree _ _ rhs) = sizeN 10 `addSize` exprTreeSize ic rhs
-        -- Add 10 for each alternative
-        -- Don't charge for args, so that wrappers look cheap
-        -- (See comments about wrappers with Case)
-        --
-        -- IMPORTANT: *do* charge 1 for the alternative, else we
-        -- find that giant case nests are treated as practically free
-        -- A good example is Foreign.C.Error.errnoToIOError
-
-lookupBndr :: InlineContext -> Id -> ArgSummary
+    size_alt (AltTree _ _ rhs) = exprTreeSize ic rhs
+        -- Cost for the alternative is already in `rhs`
+
+lookupBndr :: HasDebugCallStack => InlineContext -> Id -> ArgSummary
 lookupBndr (IC { ic_bound = bound_env, ic_free = lookup_free }) var
-  | Just info <- lookupVarEnv bound_env var = info
+  | Just info <- assertPpr (isId var) (ppr var) $
+                 lookupVarEnv bound_env var = info
   | otherwise                               = lookup_free var
-


=====================================
compiler/GHC/Core/Unfold/Make.hs
=====================================
@@ -376,8 +376,8 @@ certainlyWillInline opts fn_info rhs'
              UnfNever   -> Nothing
              UnfWhen {} -> Just (fn_unf { uf_src = src', uf_tmpl = tmpl' })
                              -- INLINE functions have UnfWhen
-             UnfIfGoodArgs { ug_size = size, ug_args = args }
-                        -> do_cunf size args src' tmpl'
+             UnfIfGoodArgs { ug_args = args, ug_tree = tree }
+                        -> do_cunf args tree src' tmpl'
         where
           src' | isCompulsorySource src = src  -- Do not change InlineCompulsory!
                | otherwise              = StableSystemSrc
@@ -396,19 +396,20 @@ certainlyWillInline opts fn_info rhs'
     noinline = isNoInlinePragma (inlinePragInfo fn_info)
     fn_unf   = unfoldingInfo fn_info -- NB: loop-breakers never inline
 
-        -- The UnfIfGoodArgs case seems important.  If we w/w small functions
-        -- binary sizes go up by 10%!  (This is with SplitObjs.)
-        -- I'm not totally sure why.
-        -- INLINABLE functions come via this path
-        --    See Note [certainlyWillInline: INLINABLE]
-    do_cunf size args src' tmpl'
+    -- The UnfIfGoodArgs case seems important.  If we w/w small functions
+    -- binary sizes go up by 10%!  (This is with SplitObjs.)
+    -- I'm not totally sure why.
+    -- INLINABLE functions come via this path
+    --    See Note [certainlyWillInline: INLINABLE]
+    do_cunf args tree src' tmpl'
       | arityInfo fn_info > 0  -- See Note [certainlyWillInline: be careful of thunks]
       , not (isDeadEndSig (dmdSigInfo fn_info))
               -- Do not unconditionally inline a bottoming functions even if
               -- it seems smallish. We've carefully lifted it out to top level,
               -- so we don't want to re-inline it.
       , let unf_arity = length args
-      , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts
+            limit = unfoldingUseThreshold opts + (10 * (unf_arity + 1))
+      , exprTreeWillInline limit tree
       = Just (fn_unf { uf_src      = src'
                      , uf_tmpl     = tmpl'
                      , uf_guidance = UnfWhen { ug_arity     = unf_arity


=====================================
compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
=====================================
@@ -19,15 +19,17 @@ import GHC.Driver.Config.Core.Rules ( initRuleOpts )
 import GHC.Driver.Config.Core.Opt.Arity ( initArityOpts )
 import GHC.Driver.DynFlags ( DynFlags(..), GeneralFlag(..), gopt )
 
-import GHC.Runtime.Context ( InteractiveContext(..) )
+import GHC.Runtime.Context ( InteractiveContext(..), icInteractiveModule )
 
 import GHC.Types.Basic ( CompilerPhase(..) )
 import GHC.Types.Var ( Var )
 
+import GHC.Unit.Types( Module )
+
 initSimplifyExprOpts :: DynFlags -> InteractiveContext -> SimplifyExprOpts
 initSimplifyExprOpts dflags ic = SimplifyExprOpts
   { se_fam_inst = snd $ ic_instances ic
-  , se_mode = (initSimplMode dflags InitialPhase "GHCi")
+  , se_mode = (initSimplMode dflags mod InitialPhase "GHCi")
     { sm_inline = False
       -- Do not do any inlining, in case we expose some
       -- unboxed tuple stuff that confuses the bytecode
@@ -38,6 +40,8 @@ initSimplifyExprOpts dflags ic = SimplifyExprOpts
     , te_tick_factor = simplTickFactor dflags
     }
   }
+  where
+    mod = icInteractiveModule ic
 
 initSimplifyOpts :: DynFlags -> [Var] -> Int -> SimplMode -> RuleBase -> SimplifyOpts
 initSimplifyOpts dflags extra_vars iterations mode hpt_rule_base = let
@@ -56,9 +60,10 @@ initSimplifyOpts dflags extra_vars iterations mode hpt_rule_base = let
     }
   in opts
 
-initSimplMode :: DynFlags -> CompilerPhase -> String -> SimplMode
-initSimplMode dflags phase name = SimplMode
-  { sm_names = [name]
+initSimplMode :: DynFlags -> Module -> CompilerPhase -> String -> SimplMode
+initSimplMode dflags mod phase name = SimplMode
+  { sm_module = mod
+  , sm_names = [name]
   , sm_phase = phase
   , sm_rules = gopt Opt_EnableRewriteRules dflags
   , sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags
@@ -76,8 +81,8 @@ initSimplMode dflags phase name = SimplMode
   , sm_co_opt_opts = initOptCoercionOpts dflags
   }
 
-initGentleSimplMode :: DynFlags -> SimplMode
-initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle")
+initGentleSimplMode :: DynFlags -> Module -> SimplMode
+initGentleSimplMode dflags mod = (initSimplMode dflags mod InitialPhase "Gentle")
   { -- Don't do case-of-case transformations.
     -- This makes full laziness work better
     sm_case_case = False


=====================================
compiler/GHC/Types/Id.hs
=====================================
@@ -752,7 +752,7 @@ idTagSig_maybe = tagSig . idInfo
 -- loop breaker. See 'unfoldingInfo'.
 --
 -- If you really want the unfolding of a strong loopbreaker, call 'realIdUnfolding'.
-idUnfolding :: IdUnfoldingFun
+idUnfolding :: HasDebugCallStack => IdUnfoldingFun
 idUnfolding id = unfoldingInfo (idInfo id)
 
 noUnfoldingFun :: IdUnfoldingFun



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/97fac8936b424f47b3687f542c8485cf2d39d7b5

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/97fac8936b424f47b3687f542c8485cf2d39d7b5
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231022/67705a80/attachment-0001.html>


More information about the ghc-commits mailing list