[Git][ghc/ghc][wip/spj-unf-size] More -- now it compiles
Simon Peyton Jones (@simonpj)
gitlab at gitlab.haskell.org
Sun Oct 22 21:30:09 UTC 2023
Simon Peyton Jones pushed to branch wip/spj-unf-size at Glasgow Haskell Compiler / GHC
Commits:
97fac893 by Simon Peyton Jones at 2023-10-22T22:29:25+01:00
More -- now it compiles
- - - - -
15 changed files:
- compiler/GHC/Core.hs
- compiler/GHC/Core/Opt/LiberateCase.hs
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Opt/Simplify/Inline.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Core/Opt/SpecConstr.hs
- compiler/GHC/Core/Ppr.hs
- compiler/GHC/Core/Seq.hs
- compiler/GHC/Core/Unfold.hs
- compiler/GHC/Core/Unfold/Make.hs
- compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
- compiler/GHC/Types/Id.hs
Changes:
=====================================
compiler/GHC/Core.hs
=====================================
@@ -1390,6 +1390,8 @@ data UnfoldingGuidance
| UnfIfGoodArgs { -- Arose from a normal Id
ug_args :: [Var], -- Arguments
ug_tree :: ExprTree -- Abstraction of the body
+ -- Invariant: free vars of ug_tree are the ug_args, plus variables
+ -- in scope at the binding site of the function definition
}
| UnfNever -- The RHS is big, so don't inline it
@@ -1397,13 +1399,14 @@ data UnfoldingGuidance
data ExprTree
= TooBig
| SizeIs { et_size :: {-# UNPACK #-} !Int
- , et_cases :: Bag CaseTree
, et_ret :: {-# UNPACK #-} !Int
-- ^ Discount when result is scrutinised
+ , et_cases :: Bag CaseTree
}
data CaseTree
= CaseOf Id -- Abstracts a case expression on this Id
+ Id -- Case binder
[AltTree] -- Always non-empty, but not worth making NonEmpty;
-- nothing relies on non-empty-ness
| ScrutOf Id Int -- If this Id is bound to a value, apply this discount
=====================================
compiler/GHC/Core/Opt/LiberateCase.hs
=====================================
@@ -14,7 +14,6 @@ import GHC.Prelude
import GHC.Core
import GHC.Core.Unfold
-import GHC.Core.Opt.Simplify.Inline
import GHC.Builtin.Types ( unitDataConId )
import GHC.Types.Id
import GHC.Types.Var.Env
=====================================
compiler/GHC/Core/Opt/OccurAnal.hs
=====================================
@@ -1919,20 +1919,17 @@ nodeScore !env new_bndr lb_deps
-- is_lb: see Note [Loop breakers, node scoring, and stability]
is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
- old_unf = realIdUnfolding old_bndr
+ old_unf = realIdUnfolding old_bndr
can_unfold = canUnfold old_unf
rhs = case old_unf of
CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
| isStableSource src
-> unf_rhs
_ -> bind_rhs
- -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
- rhs_size = case old_unf of
- CoreUnfolding { uf_guidance = guidance }
- | UnfIfGoodArgs { ug_size = size } <- guidance
- -> size
- _ -> cheapExprSize rhs
+ -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
+ rhs_size = cheapExprSize rhs
+ -- ToDo: could exploit pre-computed unfolding size?
-- Checking for a constructor application
-- Cheap and cheerful; the simplifier moves casts out of the way
=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -76,7 +76,7 @@ core2core :: HscEnv -> ModGuts -> IO ModGuts
core2core hsc_env guts@(ModGuts { mg_module = mod
, mg_loc = loc
, mg_rdr_env = rdr_env })
- = do { let builtin_passes = getCoreToDo dflags hpt_rule_base extra_vars
+ = do { let builtin_passes = getCoreToDo dflags mod hpt_rule_base extra_vars
uniq_tag = 's'
; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_tag mod
@@ -116,9 +116,9 @@ core2core hsc_env guts@(ModGuts { mg_module = mod
************************************************************************
-}
-getCoreToDo :: DynFlags -> RuleBase -> [Var] -> [CoreToDo]
+getCoreToDo :: DynFlags -> Module -> RuleBase -> [Var] -> [CoreToDo]
-- This function builds the pipeline of optimisations
-getCoreToDo dflags hpt_rule_base extra_vars
+getCoreToDo dflags mod hpt_rule_base extra_vars
= flatten_todos core_todo
where
phases = simplPhases dflags
@@ -156,7 +156,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
= CoreDoPasses
$ [ maybe_strictness_before phase
, CoreDoSimplify $ initSimplifyOpts dflags extra_vars iter
- (initSimplMode dflags phase name) hpt_rule_base
+ (initSimplMode dflags mod phase name) hpt_rule_base
, maybe_rule_check phase ]
-- Run GHC's internal simplification phase, after all rules have run.
@@ -167,7 +167,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
-- See Note [Inline in InitialPhase]
-- See Note [RULEs enabled in InitialPhase]
simpl_gently = CoreDoSimplify $ initSimplifyOpts dflags extra_vars max_iter
- (initGentleSimplMode dflags) hpt_rule_base
+ (initGentleSimplMode dflags mod) hpt_rule_base
dmd_cpr_ww = if ww_on then [CoreDoDemand True,CoreDoCpr,CoreDoWorkerWrapper]
else [CoreDoDemand False] -- NB: No CPR! See Note [Don't change boxity without worker/wrapper]
=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -75,6 +75,8 @@ import GHC.Types.Id as Id
import GHC.Types.Basic
import GHC.Types.Unique.FM ( pprUniqFM )
+import GHC.Unit.Types( Module )
+
import GHC.Data.OrdList
import GHC.Data.Graph.UnVar
@@ -242,7 +244,8 @@ seUnfoldingOpts env = sm_uf_opts (seMode env)
-- See Note [The environments of the Simplify pass]
data SimplMode = SimplMode -- See comments in GHC.Core.Opt.Simplify.Monad
- { sm_phase :: !CompilerPhase
+ { sm_module :: !Module
+ , sm_phase :: !CompilerPhase
, sm_names :: ![String] -- ^ Name(s) of the phase
, sm_rules :: !Bool -- ^ Whether RULES are enabled
, sm_inline :: !Bool -- ^ Whether inlining is enabled
@@ -910,12 +913,12 @@ So we want to look up the inner X.g_34 in the substitution, where we'll
find that it has been substituted by b. (Or conceivably cloned.)
-}
-substId :: SimplEnv -> InId -> SimplSR
+substId :: HasDebugCallStack => SimplEnv -> InId -> SimplSR
-- Returns DoneEx only on a non-Var expression
-substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
+substId (SimplEnv { seMode = mode, seInScope = in_scope, seIdSubst = ids }) v
= case lookupVarEnv ids v of -- Note [Global Ids in the substitution]
- Nothing -> DoneId (refineFromInScope in_scope v)
- Just (DoneId v) -> DoneId (refineFromInScope in_scope v)
+ Nothing -> DoneId (refineFromInScope mode in_scope v)
+ Just (DoneId v) -> DoneId (refineFromInScope mode in_scope v)
Just res -> res -- DoneEx non-var, or ContEx
-- Get the most up-to-date thing from the in-scope set
@@ -924,22 +927,24 @@ substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
--
-- See also Note [In-scope set as a substitution] in GHC.Core.Opt.Simplify.
-refineFromInScope :: InScopeSet -> Var -> Var
-refineFromInScope in_scope v
+refineFromInScope :: HasDebugCallStack => SimplMode -> InScopeSet -> Var -> Var
+refineFromInScope mode in_scope v
| isLocalId v = case lookupInScope in_scope v of
Just v' -> v'
- Nothing -> pprPanic "refineFromInScope" (ppr in_scope $$ ppr v)
+ Nothing -> -- pprPanic "refineFromInScope" (ppr in_scope $$ ppr v)
+ pprTrace "refineFromInScope"
+ (ppr (sm_module mode) <+> ppr v) v
-- c.f #19074 for a subtle place where this went wrong
| otherwise = v
lookupRecBndr :: SimplEnv -> InId -> OutId
-- Look up an Id which has been put into the envt by simplRecBndrs,
-- but where we have not yet done its RHS
-lookupRecBndr (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
+lookupRecBndr (SimplEnv { seMode = mode, seInScope = in_scope, seIdSubst = ids }) v
= case lookupVarEnv ids v of
Just (DoneId v) -> v
Just _ -> pprPanic "lookupRecBndr" (ppr v)
- Nothing -> refineFromInScope in_scope v
+ Nothing -> refineFromInScope mode in_scope v
{-
************************************************************************
=====================================
compiler/GHC/Core/Opt/Simplify/Inline.hs
=====================================
@@ -9,12 +9,10 @@ This module contains inlining logic used by the simplifier.
{-# LANGUAGE BangPatterns #-}
module GHC.Core.Opt.Simplify.Inline (
- -- * Cheap and cheerful inlining checks.
- couldBeSmallEnoughToInline,
- smallEnoughToInline,
-
-- * The smart inlining decisions are made by callSiteInline
callSiteInline, CallCtxt(..),
+
+ exprSummary
) where
import GHC.Prelude
@@ -25,47 +23,20 @@ import GHC.Core.Opt.Simplify.Env
import GHC.Core.Opt.Simplify.Utils
import GHC.Core
import GHC.Core.Unfold
+
import GHC.Types.Id
+import GHC.Types.Literal ( isLitRubbish )
import GHC.Types.Basic ( Arity, RecFlag(..) )
+import GHC.Types.Name
+import GHC.Types.Var.Env
+
import GHC.Utils.Logger
import GHC.Utils.Misc
import GHC.Utils.Outputable
-import GHC.Types.Name
+import GHC.Utils.Panic
import Data.List (isPrefixOf)
-{-
-************************************************************************
-* *
-\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
-* *
-************************************************************************
-
-We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
-we ``couldn't possibly use'' on the other side. Can be overridden w/
-flaggery. Just the same as smallEnoughToInline, except that it has no
-actual arguments.
--}
-
-couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
-couldBeSmallEnoughToInline opts threshold rhs
- = case sizeExpr opts threshold [] body of
- TooBig -> False
- _ -> True
- where
- (_, body) = collectBinders rhs
-
-----------------
-smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool
-smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance})
- = case guidance of
- UnfIfGoodArgs {ug_tree = et}
- -> exprTreeSize emptyVarEnv False et `ltSize` unfoldingUseThreshold opts
- UnfWhen {} -> True
- UnfNever -> False
-smallEnoughToInline _ _
- = False
-
{-
************************************************************************
* *
@@ -90,44 +61,36 @@ them inlining is to give them a NOINLINE pragma, which we do in
StrictAnal.addStrictnessInfoToTopId
-}
-callSiteInline :: Logger
- -> UnfoldingOpts
- -> Int -- Case depth
- -> Id -- The Id
- -> Bool -- True <=> unfolding is active
- -> Bool -- True if there are no arguments at all (incl type args)
- -> [ArgSummary] -- One for each value arg; True if it is interesting
- -> CallCtxt -- True <=> continuation is interesting
+callSiteInline :: Logger -> SimplEnv
+ -> Id -> SimplCont
-> Maybe CoreExpr -- Unfolding, if any
-callSiteInline logger opts !case_depth id active_unfolding lone_variable arg_infos cont_info
- = case idUnfolding id of
+callSiteInline logger env fn cont
+ = case idUnfolding fn of
-- idUnfolding checks for loop-breakers, returning NoUnfolding
-- Things with an INLINE pragma may have an unfolding *and*
-- be a loop breaker (maybe the knot is not yet untied)
CoreUnfolding { uf_tmpl = unf_template
, uf_cache = unf_cache
, uf_guidance = guidance }
- | active_unf -> tryUnfolding logger opts case_depth id lone_variable
- arg_infos cont_info unf_template
- unf_cache guidance
- | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
+ | active_unf -> tryUnfolding logger env fn cont unf_template unf_cache guidance
+ | otherwise -> traceInline logger env fn "Inactive unfolding:" (ppr fn) Nothing
NoUnfolding -> Nothing
BootUnfolding -> Nothing
OtherCon {} -> Nothing
DFunUnfolding {} -> Nothing -- Never unfold a DFun
where
- active_unf = activeUnfolding (seMode env) var
-
+ active_unf = activeUnfolding (seMode env) fn
-- | Report the inlining of an identifier's RHS to the user, if requested.
-traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
-traceInline logger opts inline_id str doc result
+traceInline :: Logger -> SimplEnv -> Id -> String -> SDoc -> a -> a
+traceInline logger env inline_id str doc result
-- We take care to ensure that doc is used in only one branch, ensuring that
-- the simplifier can push its allocation into the branch. See Note [INLINE
-- conditional tracing utilities].
| enable = logTraceMsg logger str doc result
| otherwise = result
where
+ opts = seUnfoldingOpts env
enable
| logHasDumpFlag logger Opt_D_dump_verbose_inlinings
= True
@@ -233,39 +196,32 @@ needed on a per-module basis.
-}
-tryUnfolding :: Logger -> UnfoldingOpts -> Int
- -> Id -> Bool -> [ArgSummary] -> CallCtxt
+tryUnfolding :: Logger -> SimplEnv -> Id -> SimplCont
-> CoreExpr -> UnfoldingCache -> UnfoldingGuidance
-> Maybe CoreExpr
-tryUnfolding logger opts !case_depth id lone_variable arg_infos
- cont_info unf_template unf_cache guidance
+tryUnfolding logger env fn cont unf_template unf_cache guidance
= case guidance of
- UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
+ UnfNever -> traceInline logger env fn str (text "UnfNever") Nothing
UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
| enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
-- See Note [INLINE for small functions] (3)
- -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
+ -> traceInline logger env fn str (mk_doc some_benefit empty True) (Just unf_template)
| otherwise
- -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
+ -> traceInline logger env fn str (mk_doc some_benefit empty False) Nothing
where
some_benefit = calc_some_benefit uf_arity
enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_bndrs, ug_tree = expr_tree }
| unfoldingVeryAggressive opts
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+ -> traceInline logger env fn str (mk_doc some_benefit extra_doc True) (Just unf_template)
| is_wf && some_benefit && small_enough
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+ -> traceInline logger env fn str (mk_doc some_benefit extra_doc True) (Just unf_template)
| otherwise
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
+ -> traceInline logger env fn str (mk_doc some_benefit extra_doc False) Nothing
where
- some_benefit = calc_some_benefit (length arg_discounts)
- -- See Note [Avoid inlining into deeply nested cases]
- depth_treshold = unfoldingCaseThreshold opts
- depth_scaling = unfoldingCaseScaling opts
- depth_penalty | case_depth <= depth_treshold = 0
- | otherwise = (size * (case_depth - depth_treshold)) `div` depth_scaling
+ some_benefit = calc_some_benefit (length arg_bndrs)
want_result
| LT <- arg_bndrs `compareLength` arg_infos
@@ -274,16 +230,48 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
BoringCtxt -> False
_ -> True
- context = IC { ic_bound = mkVarEnv (arg_bnds `zip` arg_infos)
- , ic_free = xx
+ zapped_env = zapSubstEnv env
+ context = IC { ic_bound = mkVarEnv (arg_bndrs `zip` arg_infos)
+ , ic_free = getFreeSummary zapped_env
, ic_want_res = want_result }
- size = depth_penalty `addSizeN` exprTreeSize context expr_tree
- small_enough = adjusted_size `leqSize` unfoldingUseThreshold opts
+ size :: Size
+ size = exprTreeSize context expr_tree
+
+ getFreeSummary :: SimplEnv -> Id -> ArgSummary
+ -- Get the ArgSummary of a free variable
+ getFreeSummary env x
+ = case lookupInScope (seInScope env) x of
+ Just x' | warnPprTrace (not (isId x')) "GFS" (vcat
+ [ ppr fn <+> equals <+> ppr unf_template
+ , text "expr_tree:" <+> ppr expr_tree
+ , ppr x <+> ppr x'
+ ]) True
+ , Just expr <- maybeUnfoldingTemplate (idUnfolding x')
+ -> exprSummary env expr
+ _ -> ArgNoInfo
+
+
+ -- Adjust by the depth scaling
+ -- See Note [Avoid inlining into deeply nested cases]
+ depth_threshold = unfoldingCaseThreshold opts
+ depth_scaling = unfoldingCaseScaling opts
+
+ add_depth_penalty size = size + (size * (case_depth - depth_threshold))
+ `div` depth_scaling
+ final_size | case_depth <= depth_threshold = size
+ | otherwise = adjustSize add_depth_penalty size
- extra_doc = vcat [ text "case depth =" <+> int case_depth
- , text "depth based penalty =" <+> int depth_penalty
- , text "discounted size =" <+> int adjusted_size ]
+ small_enough = final_size `leqSize` unfoldingUseThreshold opts
+
+ extra_doc = vcat [ text "size =" <+> ppr size
+ , text "case depth =" <+> int case_depth
+ , text "final_size =" <+> ppr final_size ]
where
+ (lone_variable, arg_infos, call_cont) = contArgs cont
+ cont_info = interestingCallContext env call_cont
+ case_depth = seCaseDepth env
+ opts = seUnfoldingOpts env
+
-- Unpack the UnfoldingCache lazily because it may not be needed, and all
-- its fields are strict; so evaluating unf_cache at all forces all the
-- isWorkFree etc computations to take place. That risks wasting effort for
@@ -302,7 +290,7 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
, text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
ctx = log_default_dump_context (logFlags logger)
- str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
+ str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
n_val_args = length arg_infos
-- some_benefit is used when the RHS is small enough
@@ -320,7 +308,7 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
where
saturated = n_val_args >= uf_arity
over_saturated = n_val_args > uf_arity
- interesting_args = any nonTriv arg_infos
+ interesting_args = any hasArgInfo arg_infos
-- NB: (any nonTriv arg_infos) looks at the
-- over-saturated args too which is "wrong";
-- but if over-saturated we inline anyway.
@@ -338,7 +326,6 @@ tryUnfolding logger opts !case_depth id lone_variable arg_infos
-> uf_arity > 0 -- See Note [RHS of lets]
_other -> False -- See Note [Nested functions]
-
{- Note [RHS of lets]
~~~~~~~~~~~~~~~~~~~~~
When the call is the argument of a function with a RULE, or the RHS of a let,
@@ -527,6 +514,7 @@ which Roman did.
-}
+{-
computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
-> Int
computeDiscount arg_discounts res_discount arg_infos cont_info
@@ -570,3 +558,111 @@ computeDiscount arg_discounts res_discount arg_infos cont_info
-- Otherwise we, rather arbitrarily, threshold it. Yuk.
-- But we want to avoid inlining large functions that return
-- constructors into contexts that are simply "interesting"
+-}
+
+{- Note [Interesting arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An argument is interesting if it deserves a discount for unfoldings
+with a discount in that argument position. The idea is to avoid
+unfolding a function that is applied only to variables that have no
+unfolding (i.e. they are probably lambda bound): f x y z There is
+little point in inlining f here.
+
+Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
+we must look through lets, eg (let x = e in C a b), because the let will
+float, exposing the value, if we inline. That makes it different to
+exprIsHNF.
+
+Before 2009 we said it was interesting if the argument had *any* structure
+at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
+
+But we don't regard (f x y) as interesting, unless f is unsaturated.
+If it's saturated and f hasn't inlined, then it's probably not going
+to now!
+
+Note [Conlike is interesting]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ f d = ...((*) d x y)...
+ ... f (df d')...
+where df is con-like. Then we'd really like to inline 'f' so that the
+rule for (*) (df d) can fire. To do this
+ a) we give a discount for being an argument of a class-op (eg (*) d)
+ b) we say that a con-like argument (eg (df d)) is interesting
+-}
+
+-------------------
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
+-- Summarises value args, discards type args and coercions
+-- The returned continuation of the call is only used to
+-- answer questions like "are you interesting?"
+contArgs cont
+ | lone cont = (True, [], cont)
+ | otherwise = go [] cont
+ where
+ lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold
+ lone (ApplyToVal {}) = False -- NB: even a type application or cast
+ lone (CastIt {}) = False -- stops it being "lone"
+ lone _ = True
+
+ go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
+ = go (exprSummary se arg : args) k
+ go args (ApplyToTy { sc_cont = k }) = go args k
+ go args (CastIt _ k) = go args k
+ go args k = (False, reverse args, k)
+
+------------------------------
+exprSummary :: SimplEnv -> CoreExpr -> ArgSummary
+-- Very simple version of exprIsConApp_maybe
+-- But /do/ take the SimplEnv into account. We must:
+-- (a) Apply the substitution. E.g
+-- (\x. ...(f x)...) (a,b)
+--- We may have x:->(a,b) in the substitution, and we want to see that
+-- (a,b) when we are deciding whether or not to inline f
+-- (b) Refine using the in-scope set. E.g
+-- \x. ....case x of { (a,b) -> ...f x... }....
+-- We want to see that x is (a,b) at the call site of f
+exprSummary env e = go env e []
+ where
+ go :: SimplEnv -> CoreExpr -> [CoreExpr] -> ArgSummary
+ go env (Cast e _) as = go env e as
+ go env (Tick _ e) as = go env e as
+ go env (App f a) as = go env f (a:as)
+ go env (Let b e) as = go env' e as
+ where
+ env' = env `addNewInScopeIds` bindersOf b
+
+ go env (Var v) as
+ = -- Simplify.Env.substId Looks up in substitution
+ -- /and/ refines from the InScopeset
+ case substId env v of
+ DoneId v' -> go_var env v' as
+ DoneEx e _ -> go (zapSubstEnv env) e as
+ ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) e as
+
+ go _ (Lit l) as
+ | isLitRubbish l = ArgNoInfo -- Leads to unproductive inlining in WWRec, #20035
+ | otherwise = assertPpr (null as) (ppr as) $
+ ArgIsCon (LitAlt l) []
+
+ go env (Lam b e) as
+ | null as = if isRuntimeVar b
+ then ArgIsLam
+ else go env' e []
+ where
+ env' = modifyInScope env b -- Tricky corner here
+
+ go _ _ _ = ArgNoInfo
+
+ go_var env f args
+ | Just con <- isDataConWorkId_maybe f
+ = ArgIsCon (DataAlt con) (map (exprSummary env) args)
+
+ | Just rhs <- expandUnfolding_maybe (idUnfolding f)
+ = go (zapSubstEnv env) rhs args
+
+ | idArity f > valArgCount args
+ = ArgIsLam
+
+ | otherwise
+ = ArgNoInfo
=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -2302,8 +2302,7 @@ rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
-----------------------------------
tryInlining :: SimplEnv -> Logger -> OutId -> SimplCont -> SimplM (Maybe OutExpr)
tryInlining env logger var cont
- | Just expr <- callSiteInline logger uf_opts case_depth var active_unf
- lone_variable arg_infos interesting_cont
+ | Just expr <- callSiteInline logger env var cont
= do { dump_inline expr cont
; return (Just expr) }
@@ -2311,12 +2310,6 @@ tryInlining env logger var cont
= return Nothing
where
- uf_opts = seUnfoldingOpts env
- case_depth = seCaseDepth env
- (lone_variable, arg_infos, call_cont) = contArgs cont
- interesting_cont = interestingCallContext env call_cont
- active_unf = activeUnfolding (seMode env) var
-
log_inlining doc
= liftIO $ logDumpFile logger (mkDumpStyle alwaysQualify)
Opt_D_dump_inlinings
=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -24,7 +24,7 @@ module GHC.Core.Opt.Simplify.Utils (
SimplCont(..), DupFlag(..), FromWhat(..), StaticEnv,
isSimplified, contIsStop,
contIsDupable, contResultType, contHoleType, contHoleScaling,
- contIsTrivial, contArgs, contIsRhs,
+ contIsTrivial, contIsRhs,
countArgs,
mkBoringStop, mkRhsStop, mkLazyArgStop,
interestingCallContext,
@@ -46,9 +46,7 @@ import GHC.Prelude hiding (head, init, last, tail)
import qualified GHC.Prelude as Partial (head)
import GHC.Core
-import GHC.Types.Literal ( isLitRubbish )
import GHC.Core.Opt.Simplify.Env
--- import GHC.Core.Opt.Simplify.Inline
import GHC.Core.Opt.Stats ( Tick(..) )
import qualified GHC.Core.Subst
import GHC.Core.Ppr
@@ -558,29 +556,6 @@ countValArgs (CastIt _ cont) = countValArgs cont
countValArgs _ = 0
-------------------
-contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
--- Summarises value args, discards type args and coercions
--- The returned continuation of the call is only used to
--- answer questions like "are you interesting?"
-contArgs cont
- | lone cont = (True, [], cont)
- | otherwise = go [] cont
- where
- lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold
- lone (ApplyToVal {}) = False -- NB: even a type application or cast
- lone (CastIt {}) = False -- stops it being "lone"
- lone _ = True
-
- go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
- = go (is_interesting arg se : args) k
- go args (ApplyToTy { sc_cont = k }) = go args k
- go args (CastIt _ k) = go args k
- go args k = (False, reverse args, k)
-
- is_interesting arg se = exprSummary se arg
- -- Do *not* use short-cutting substitution here
- -- because we want to get as much IdInfo as possible
-
-- | Describes how the 'SimplCont' will evaluate the hole as a 'SubDemand'.
-- This can be more insightful than the limited syntactic context that
-- 'SimplCont' provides, because the 'Stop' constructor might carry a useful
@@ -593,10 +568,10 @@ contArgs cont
-- about what to do then and no call sites so far seem to care.
contEvalContext :: SimplCont -> SubDemand
contEvalContext k = case k of
- (Stop _ _ sd) -> sd
- (TickIt _ k) -> contEvalContext k
- (CastIt _ k) -> contEvalContext k
- ApplyToTy{sc_cont=k} -> contEvalContext k
+ Stop _ _ sd -> sd
+ TickIt _ k -> contEvalContext k
+ CastIt _ k -> contEvalContext k
+ ApplyToTy{sc_cont=k} -> contEvalContext k
-- ApplyToVal{sc_cont=k} -> mkCalledOnceDmd $ contEvalContext k
-- Not 100% sure that's correct, . Here's an example:
-- f (e x) and f :: <SC(S,C(1,L))>
@@ -638,9 +613,11 @@ mkArgInfo env rule_base fun cont
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
arg_discounts = case idUnfolding fun of
- CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
- -> discounts ++ vanilla_discounts
+ CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = _discounts}}
+ -> {- discounts ++ -} vanilla_discounts
_ -> vanilla_discounts
+ -- ToDo: with the New Plan it's harder to know which arguments
+ -- attract a discount. For now, let's just drop this and see.
vanilla_dmds, arg_dmds :: [Demand]
vanilla_dmds = repeat topDmd
@@ -928,118 +905,6 @@ contHasRules cont
go (StrictBind {}) = False -- ??
go (Stop _ _ _) = False
-{- Note [Interesting arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-An argument is interesting if it deserves a discount for unfoldings
-with a discount in that argument position. The idea is to avoid
-unfolding a function that is applied only to variables that have no
-unfolding (i.e. they are probably lambda bound): f x y z There is
-little point in inlining f here.
-
-Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
-we must look through lets, eg (let x = e in C a b), because the let will
-float, exposing the value, if we inline. That makes it different to
-exprIsHNF.
-
-Before 2009 we said it was interesting if the argument had *any* structure
-at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
-
-But we don't regard (f x y) as interesting, unless f is unsaturated.
-If it's saturated and f hasn't inlined, then it's probably not going
-to now!
-
-Note [Conlike is interesting]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
- f d = ...((*) d x y)...
- ... f (df d')...
-where df is con-like. Then we'd really like to inline 'f' so that the
-rule for (*) (df d) can fire. To do this
- a) we give a discount for being an argument of a class-op (eg (*) d)
- b) we say that a con-like argument (eg (df d)) is interesting
--}
-
-interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
--- See Note [Interesting arguments]
-interestingArg env e = go env 0 e
- where
- -- n is # value args to which the expression is applied
- go env n (Var v)
- = case substId env v of
- DoneId v' -> go_var n v'
- DoneEx e _ -> go (zapSubstEnv env) n e
- ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
-
- go _ _ (Lit l)
- | isLitRubbish l = TrivArg -- Leads to unproductive inlining in WWRec, #20035
- | otherwise = ValueArg
- go _ _ (Type _) = TrivArg
- go _ _ (Coercion _) = TrivArg
- go env n (App fn (Type _)) = go env n fn
- go env n (App fn _) = go env (n+1) fn
- go env n (Tick _ a) = go env n a
- go env n (Cast e _) = go env n e
- go env n (Lam v e)
- | isTyVar v = go env n e
- | n>0 = NonTrivArg -- (\x.b) e is NonTriv
- | otherwise = ValueArg
- go _ _ (Case {}) = NonTrivArg
- go env n (Let b e) = case go env' n e of
- ValueArg -> ValueArg
- _ -> NonTrivArg
- where
- env' = env `addNewInScopeIds` bindersOf b
-
- go_var n v
- | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
- -- data constructors here
- | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
- | n > 0 = NonTrivArg -- Saturated or unknown call
- | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
- -- See Note [Conlike is interesting]
- | otherwise = TrivArg -- n==0, no useful unfolding
- where
- conlike_unfolding = isConLikeUnfolding (idUnfolding v)
-
-------------------------------
-idSummary :: SimplEnv -> Id -> ArgSummary
-idSummary env bndr
- = case idUnfolding bndr of
- OtherCon cs -> ScrutIsNot cs
- DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args }
- | null bndrs
- -> ScrutIsCon (DataAlt con) (map exprSummary args)
- | otherwise
- -> ScrutNoInfo
- CoreUnfolding { uf_tmpl = e }
- -> exprSummary e
- NoUnfolding -> ScrutNoInfo
- BootUnfolding -> ScrutNoInfo
-
-exprSummary :: SimplEnv -> CoreExpr -> ArgSummary
--- Very simple version of exprIsConApp_maybe
-exprSummary env e = go e []
- where
- go (Cast e _) as = go e as
- go (Tick _ e) as = go e as
- go (Let _ e) as = go e as
- go (App f a) as = go f (a:as)
- go (Lit l) as = assertPpr (null as) (ppr as) $
- ScrutIsCon (LitAlt l) []
- go (Var v) as = go_var v as
- go (Lam b e) as
- | null as = if isRuntimeVar b
- then ScrutIsLam
- else go e []
- go _ _ = ScrutNoInfo
-
- go_var v as
- | Just con <- isDataConWorkId_maybe v
- = ScrutIsCon (DataAlt con) (map (exprSummary env) as)
- | Just rhs <- expandUnfolding_maybe (idUnfolding v)
- = go rhs as
- | otherwise
- = ScrutNoInfo
{-
************************************************************************
@@ -1590,7 +1455,7 @@ postInlineUnconditionally env bind_cxt bndr occ_info rhs
-> n_br < 100 -- See Note [Suppress exponential blowup]
- && smallEnoughToInline uf_opts unfolding -- Small enough to dup
+ && smallEnoughToInline env unfolding -- Small enough to dup
-- ToDo: consider discount on smallEnoughToInline if int_cxt is true
--
-- NB: Do NOT inline arbitrarily big things, even if occ_n_br=1
@@ -1636,11 +1501,23 @@ postInlineUnconditionally env bind_cxt bndr occ_info rhs
where
unfolding = idUnfolding bndr
- uf_opts = seUnfoldingOpts env
phase = sePhase env
active = isActive phase (idInlineActivation bndr)
-- See Note [pre/postInlineUnconditionally in gentle mode]
+smallEnoughToInline :: SimplEnv -> Unfolding -> Bool
+smallEnoughToInline env unfolding
+ | CoreUnfolding {uf_guidance = guidance} <- unfolding
+ = case guidance of
+ UnfIfGoodArgs {ug_tree = et} -> exprTreeWillInline limit et
+ UnfWhen {} -> True
+ UnfNever -> False
+ | otherwise
+ = False
+ where
+ uf_opts = seUnfoldingOpts env
+ limit = unfoldingUseThreshold uf_opts
+
{- Note [Inline small things to avoid creating a thunk]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The point of examining occ_info here is that for *non-values* that
=====================================
compiler/GHC/Core/Opt/SpecConstr.hs
=====================================
@@ -32,7 +32,6 @@ import GHC.Core
import GHC.Core.Subst
import GHC.Core.Utils
import GHC.Core.Unfold
-import GHC.Core.Opt.Simplify.Inline
import GHC.Core.FVs ( exprsFreeVarsList, exprFreeVars )
import GHC.Core.Opt.Monad
import GHC.Core.Opt.WorkWrap.Utils
=====================================
compiler/GHC/Core/Ppr.hs
=====================================
@@ -634,11 +634,11 @@ instance Outputable ExprTree where
instance Outputable CaseTree where
ppr (ScrutOf x n) = ppr x <> colon <> int n
- ppr (CaseOf x alts) = text "case" <+> ppr x
- <+> brackets (sep (map ppr alts))
+ ppr (CaseOf x b alts) = sep [ text "case" <+> ppr x <+> ppr b
+ , nest 2 $ braces $ sep $ map ppr alts ]
instance Outputable AltTree where
- ppr (AltTree con bs rhs) = ppr con <+> ppr bs <+> text "->" <+> ppr rhs
+ ppr (AltTree con bs rhs) = sep [ppr con <+> ppr bs <+> text "->", nest 2 (ppr rhs)]
instance Outputable Unfolding where
ppr NoUnfolding = text "No unfolding"
=====================================
compiler/GHC/Core/Seq.hs
=====================================
@@ -132,7 +132,7 @@ seqET (SizeIs { et_size = size, et_cases = cases, et_ret = ret })
seqCT :: CaseTree -> ()
seqCT (ScrutOf x i) = x `seq` i `seq` ()
-seqCT (CaseOf x alts) = x `seq` seqList seqAT alts
+seqCT (CaseOf x y alts) = x `seq` y `seq` seqList seqAT alts
seqAT :: AltTree -> ()
seqAT (AltTree con bs e) = con `seq` seqBndrs bs `seq` seqET e
=====================================
compiler/GHC/Core/Unfold.hs
=====================================
@@ -22,8 +22,10 @@ module GHC.Core.Unfold (
Unfolding, UnfoldingGuidance, -- Abstract types
ExprTree, exprTree, exprTreeSize,
- ArgSummary(..), CallCtxt(..),
- Size, leqSize, addSizeN,
+ exprTreeWillInline, couldBeSmallEnoughToInline,
+ ArgSummary(..), CallCtxt(..), hasArgInfo,
+ Size, leqSize, addSizeN, adjustSize,
+ InlineContext(..),
UnfoldingOpts (..), defaultUnfoldingOpts,
updateCreationThreshold, updateUseThreshold,
@@ -56,6 +58,7 @@ import GHC.Builtin.PrimOps
import GHC.Utils.Misc
import GHC.Utils.Outputable
+import GHC.Utils.Panic
import GHC.Data.Bag
@@ -164,10 +167,14 @@ updateReportPrefix n opts = opts { unfoldingReportPrefix = n }
********************************************************************* -}
data ArgSummary = ArgNoInfo
- | ArgIsCon AltCon [ArgSummary]
+ | ArgIsCon AltCon [ArgSummary] -- Includes type args
| ArgIsNot [AltCon]
| ArgIsLam
+hasArgInfo :: ArgSummary -> Bool
+hasArgInfo ArgNoInfo = False
+hasArgInfo _ = True
+
instance Outputable ArgSummary where
ppr ArgNoInfo = text "ArgNoInfo"
ppr ArgIsLam = text "ArgIsLam"
@@ -265,10 +272,11 @@ calcUnfoldingGuidance opts is_top_bottoming (Tick t expr)
| not (tickishIsCode t) -- non-code ticks don't matter for unfolding
= calcUnfoldingGuidance opts is_top_bottoming expr
calcUnfoldingGuidance opts is_top_bottoming expr
- = case exprTree opts bOMB_OUT_SIZE (mkVarSet val_bndrs) body of
+ = case exprTree opts val_bndrs body of
TooBig -> UnfNever
- et@(SizeIs { et_size = size })
- | uncondInline expr n_val_bndrs size
+ et@(SizeIs { et_size = size, et_cases = cases })
+ | not (any is_case cases)
+ , uncondInline expr n_val_bndrs size
-> UnfWhen { ug_unsat_ok = unSaturatedOk
, ug_boring_ok = boringCxtOk
, ug_arity = n_val_bndrs } -- Note [INLINE for small functions]
@@ -281,11 +289,28 @@ calcUnfoldingGuidance opts is_top_bottoming expr
where
(bndrs, body) = collectBinders expr
- bOMB_OUT_SIZE = unfoldingCreationThreshold opts
- -- Bomb out if size gets bigger than this
val_bndrs = filter isId bndrs
n_val_bndrs = length val_bndrs
+ is_case (CaseOf {}) = True
+ is_case (ScrutOf {}) = False
+
+{- We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
+ we ``couldn't possibly use'' on the other side. Can be overridden w/
+ flaggery. Just the same as smallEnoughToInline, except that it has no
+ actual arguments.
+-}
+
+couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline opts threshold rhs
+ = exprTreeWillInline threshold $
+ exprTree opts [] body
+ where
+ (_, body) = collectBinders rhs
+
+----------------
+
+
{- Note [Inline unsafeCoerce]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We really want to inline unsafeCoerce, even when applied to boring
@@ -439,56 +464,99 @@ uncondInline rhs arity size
* *
********************************************************************* -}
-exprTree :: UnfoldingOpts
- -> Int -- Bomb out if it gets bigger than this
- -> VarSet -- Record scrutiny of these variables
- -> CoreExpr
- -> ExprTree
+{- Note [Constructing an ExprTree]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We maintain:
+* avs: argument variables, or variables bound by a case on an
+ argument variable.
+
+ We record a CaseOf or ScrutOf for the `avs`
+
+* lvs: variables bound by lambda and lets in the body; and by
+ case expressions that scrutinise one of the `lvs`, or
+ a non-variable.
+
+ We never record a CaseOf or ScrutOf for one of the `lvs`.
+
+* We record a CaseOf, but not ScrutOf, for other variables; that is,
+ variables free in the entire function definition. For example:
+ let f x = case y of
+ A -> True
+ B -> <big>
+ in
+ case y of
+ A -> ....f 3....f 4....
+ B -> blah
+ At the calls site of `f` we know that the free var `y` is equal to A, so
+ f should definitely inline.
+
+ But consider instead this example
+ let f x = y x 3 <big>
+ in ...(f 3)...
+ There nothing we will learn about the free `y` that will make the inining of
+ `f` more attractive. Hence we don't record ScrutOf for y.
+
+ This is IMPORTANT, because even a call like (reverse xs) would otherwise record
+ a ScrutOf for `reverse` which is very silly.
+-}
+
+type ETVars = (VarSet,VarSet) -- (avs, lvs)
+ -- See Note [Constructing an ExprTree]
+exprTree :: UnfoldingOpts -> [Var] -> CoreExpr -> ExprTree
-- Note [Computing the size of an expression]
--- Forcing bOMB_OUT_SIZE early prevents repeated
--- unboxing of the Int argument.
-exprTree opts !bOMB_OUT_SIZE svars expr
- = size_up expr
+exprTree opts args expr
+ = go (mkVarSet args, emptyVarSet) expr
where
+ !bOMB_OUT_SIZE = unfoldingCreationThreshold opts
+ -- Bomb out if size gets bigger than this
+ -- Forcing bOMB_OUT_SIZE early prevents repeated
+ -- unboxing of the Int argument.
+
et_add = etAdd bOMB_OUT_SIZE
et_add_alt = etAddAlt bOMB_OUT_SIZE
- size_up :: CoreExpr -> ExprTree
- size_up (Cast e _) = size_up e
- size_up (Tick _ e) = size_up e
- size_up (Type _) = exprTreeN 0
- size_up (Coercion _) = exprTreeN 0
- size_up (Lit lit) = exprTreeN (litSize lit)
+ go :: ETVars -> CoreExpr -> ExprTree
+ -- (avs,lvs): see Note [Constructing an ExprTree]
+ go vs (Cast e _) = go vs e
+ go vs (Tick _ e) = go vs e
+ go _ (Type _) = exprTreeN 0
+ go _ (Coercion _) = exprTreeN 0
+ go _ (Lit lit) = exprTreeN (litSize lit)
- size_up (Lam b e)
- | isId b, not (id_is_free b) = size_up e `et_add` lamSize opts
- | otherwise = size_up e
+ go vs (Lam b e)
+ | isId b, not (id_is_free b) = go vs' e `et_add` lamSize opts
+ | otherwise = go vs' e
+ where
+ vs' = vs `add_lv` b
- size_up (Let (NonRec binder rhs) body)
- = size_up_bind (binder, rhs) `et_add` size_up body
+ go vs (Let (NonRec binder rhs) body)
+ = go_bind vs (binder, rhs) `et_add`
+ go (vs `add_lv` binder) body
- size_up (Let (Rec pairs) body)
- = foldr (et_add . size_up_bind) (size_up body) pairs
+ go vs (Let (Rec pairs) body)
+ = foldr (et_add . go_bind vs') (go vs' body) pairs
+ where
+ vs' = vs `add_lvs` map fst pairs
- size_up e@(App {}) = size_up_app e []
+ go vs e@(App {}) = go_app vs e []
- size_up (Var f) | id_is_free f = exprTreeN 0
+ go vs (Var f) | id_is_free f = exprTreeN 0
-- Use calLSize to ensure we get constructor
-- discounts even on nullary constructors
- | otherwise = callTree opts svars f []
+ | otherwise = callTree opts vs f []
- size_up (Case e _ _ alts) = size_up_case e alts
+ go vs (Case e b _ alts) = go_case vs e b alts
-----------------------------
- size_up_bind (bndr, rhs)
+ go_bind vs (bndr, rhs)
| JoinPoint join_arity <- idJoinPointHood bndr
- , (_bndrs, body) <- collectNBinders join_arity rhs
+ , (bndrs, body) <- collectNBinders join_arity rhs
-- Skip arguments to join point
- = size_up body
+ = go (vs `add_lvs` bndrs) body
| otherwise
- = size_up_alloc bndr `etAddN` size_up rhs
+ = size_up_alloc bndr `etAddN` go vs rhs
-- Cost to allocate binding with given binder
size_up_alloc bndr
@@ -501,94 +569,109 @@ exprTree opts !bOMB_OUT_SIZE svars expr
-----------------------------
-- size_up_app is used when there's ONE OR MORE value args
- size_up_app :: CoreExpr -> [CoreExpr] -> ExprTree
+ go_app :: ETVars -> CoreExpr -> [CoreExpr] -> ExprTree
-- args are the non-void value args
- size_up_app (App fun arg) args
- | arg_is_free arg = size_up_app fun args
- | otherwise = size_up arg `et_add`
- size_up_app fun (arg:args)
- size_up_app (Var fun) args = callTree opts svars fun args
- size_up_app (Tick _ expr) args = size_up_app expr args
- size_up_app (Cast expr _) args = size_up_app expr args
- size_up_app other args = vanillaCallSize (length args) `etAddN`
- size_up other
+ go_app vs (App fun arg) args
+ | arg_is_free arg = go_app vs fun args
+ | otherwise = go vs arg `et_add`
+ go_app vs fun (arg:args)
+ go_app vs (Var fun) args = callTree opts vs fun args
+ go_app vs (Tick _ expr) args = go_app vs expr args
+ go_app vs (Cast expr _) args = go_app vs expr args
+ go_app vs other args = vanillaCallSize (length args) `etAddN`
+ go vs other
-- if the lhs is not an App or a Var, or an invisible thing like a
-- Tick or Cast, then we should charge for a complete call plus the
-- size of the lhs itself.
-----------------------------
- size_up_case scrut [] = size_up scrut
+ -- Empty case
+ go_case vs scrut _ [] = go vs scrut
-- case e of {} never returns, so take size of scrutinee
- size_up_case scrut alts -- Now alts is non-empty
- | Just v <- interesting_id svars scrut -- We are scrutinising an argument variable
- = size_up scrut `et_add`
- etZero { et_cases = unitBag (CaseOf v (map alt_alt_tree alts)) }
+ -- Record a CaseOf
+ go_case vs@(avs,lvs) scrut b alts -- Now alts is non-empty
+ | Just v <- recordCaseOf vs scrut
+ = -- pprTrace "recordCaseOf" (ppr v $$ ppr lvs $$ ppr scrut $$ ppr alts) $
+ go vs scrut `et_add`
+ etZero { et_cases = unitBag (CaseOf v b (map (alt_alt_tree v) alts)) }
+ where
+ alt_alt_tree :: Id -> Alt Var -> AltTree
+ alt_alt_tree v (Alt con bs rhs)
+ = AltTree con bs (10 `etAddN` go (add_alt_bndrs v bs) rhs)
+
+ add_alt_bndrs v bs
+ | v `elemVarSet` avs = (avs `extendVarSetList` (b:bs), lvs)
+ -- Don't forget to add the case binder, b
+ | otherwise = vs
+
+ -- Don't record a CaseOf
+ go_case vs scrut b alts -- alts is non-empty
+ = caseSize scrut alts `etAddN` -- A bit odd that this is only in one branch
+ go vs scrut `et_add`
+ foldr1 et_add_alt (map alt_expr_tree alts)
+ where
+ alt_expr_tree :: Alt Var -> ExprTree
+ alt_expr_tree (Alt _con bs rhs)
+ = 10 `etAddN` go (vs `add_lvs` (b:bs)) rhs
+ -- Don't charge for bndrs, so that wrappers look cheap
+ -- (See comments about wrappers with Case)
+ -- Don't forget to add the case binder, b, to lvs.
+ --
+ -- IMPORTANT: *do* charge 10 for the alternative, else we
+ -- find that giant case nests are treated as practically free
+ -- A good example is Foreign.C.Error.errnoToIOError
+
+caseSize :: CoreExpr -> [CoreAlt] -> Int
+caseSize scrut alts
+ | is_inline_scrut scrut, lengthAtMost alts 1 = -10
+ | otherwise = 0
+ -- Normally we don't charge for the case itself, but
+ -- we charge one per alternative (see size_up_alt,
+ -- below) to account for the cost of the info table
+ -- and comparisons.
+ --
+ -- However, in certain cases (see is_inline_scrut
+ -- below), no code is generated for the case unless
+ -- there are multiple alts. In these cases we
+ -- subtract one, making the first alt free.
+ -- e.g. case x# +# y# of _ -> ... should cost 1
+ -- case touch# x# of _ -> ... should cost 0
+ -- (see #4978)
+ --
+ -- I would like to not have the "lengthAtMost alts 1"
+ -- condition above, but without that some programs got worse
+ -- (spectral/hartel/event and spectral/para). I don't fully
+ -- understand why. (SDM 24/5/11)
+ -- Unboxed variables, inline primops and unsafe foreign calls
+ -- are all "inline" things:
+ where
+ is_inline_scrut (Var v) =
+ isUnliftedType (idType v)
+ -- isUnliftedType is OK here: scrutinees have a fixed RuntimeRep (search for FRRCase)
+ is_inline_scrut scrut
+ | (Var f, _) <- collectArgs scrut
+ = case idDetails f of
+ FCallId fc -> not (isSafeForeignCall fc)
+ PrimOpId op _ -> not (primOpOutOfLine op)
+ _other -> False
| otherwise
- = case_size `etAddN` -- A bit odd that this is only in one branch
- size_up scrut `et_add`
- foldr1 et_add_alt (map alt_expr_tree alts)
-
- where
- alt_alt_tree :: Alt Var -> AltTree
- alt_alt_tree (Alt con bs rhs)
- = AltTree con bs (exprTree opts bOMB_OUT_SIZE svars' rhs)
- where
- svars' = svars `extendVarSetList` bs
-
- alt_expr_tree :: Alt Var -> ExprTree
- alt_expr_tree (Alt _con _bndrs rhs) = 10 `etAddN` size_up rhs
- -- Don't charge for bndrs, so that wrappers look cheap
- -- (See comments about wrappers with Case)
- --
- -- IMPORTANT: *do* charge 10 for the alternative, else we
- -- find that giant case nests are treated as practically free
- -- A good example is Foreign.C.Error.errnoToIOError
-
- case_size
- | is_inline_scrut scrut, lengthAtMost alts 1 = -10
- | otherwise = 0
- -- Normally we don't charge for the case itself, but
- -- we charge one per alternative (see size_up_alt,
- -- below) to account for the cost of the info table
- -- and comparisons.
- --
- -- However, in certain cases (see is_inline_scrut
- -- below), no code is generated for the case unless
- -- there are multiple alts. In these cases we
- -- subtract one, making the first alt free.
- -- e.g. case x# +# y# of _ -> ... should cost 1
- -- case touch# x# of _ -> ... should cost 0
- -- (see #4978)
- --
- -- I would like to not have the "lengthAtMost alts 1"
- -- condition above, but without that some programs got worse
- -- (spectral/hartel/event and spectral/para). I don't fully
- -- understand why. (SDM 24/5/11)
-
- -- Unboxed variables, inline primops and unsafe foreign calls
- -- are all "inline" things:
-
- is_inline_scrut (Var v) =
- isUnliftedType (idType v)
- -- isUnliftedType is OK here: scrutinees have a fixed RuntimeRep (search for FRRCase)
- is_inline_scrut scrut
- | (Var f, _) <- collectArgs scrut
- = case idDetails f of
- FCallId fc -> not (isSafeForeignCall fc)
- PrimOpId op _ -> not (primOpOutOfLine op)
- _other -> False
- | otherwise
- = False
- ------------
-
-interesting_id :: VarSet -> CoreExpr -> Maybe Id
-interesting_id svars (Var v)
- | v `elemVarSet` svars = Just v
-interesting_id svars (Tick _ e) = interesting_id svars e
-interesting_id svars (Cast e _) = interesting_id svars e
-interesting_id _ _ = Nothing
+ = False
+
+add_lv :: ETVars -> Var -> ETVars
+add_lv (avs,lvs) b = (avs, lvs `extendVarSet` b)
+
+add_lvs :: ETVars -> [Var] -> ETVars
+add_lvs (avs,lvs) bs = (avs, lvs `extendVarSetList` bs)
+
+recordCaseOf :: ETVars -> CoreExpr -> Maybe Id
+recordCaseOf (_,lvs) (Var v)
+ | v `elemVarSet` lvs = Nothing
+ | otherwise = Just v
+recordCaseOf vs (Tick _ e) = recordCaseOf vs e
+recordCaseOf vs (Cast e _) = recordCaseOf vs e
+recordCaseOf _ _ = Nothing
arg_is_free :: CoreExpr -> Bool
-- "free" means we don't charge for this
@@ -620,15 +703,15 @@ litSize _other = 0 -- Must match size of nullary constructors
-- (eg via case binding)
----------------------------
-callTree :: UnfoldingOpts -> VarSet -> Id -> [CoreExpr] -> ExprTree
-callTree opts svars fun val_args
+callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
+callTree opts vs fun val_args
= case idDetails fun of
FCallId _ -> exprTreeN (vanillaCallSize n_val_args)
JoinId {} -> exprTreeN (jumpSize n_val_args)
PrimOpId op _ -> exprTreeN (primOpSize op n_val_args)
DataConWorkId dc -> conSize dc n_val_args
- ClassOpId {} -> classOpSize opts svars val_args
- _ -> funSize opts svars fun n_val_args
+ ClassOpId {} -> classOpSize opts vs fun val_args
+ _ -> funSize opts vs fun n_val_args
where
n_val_args = length val_args
@@ -646,14 +729,15 @@ jumpSize n_val_args = 2 * (1 + n_val_args)
-- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
-- better solution?
-classOpSize :: UnfoldingOpts -> VarSet -> [CoreExpr] -> ExprTree
+classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
-- See Note [Conlike is interesting]
-classOpSize _ _ []
+classOpSize _ _ _ []
= etZero
-classOpSize opts svars val_args
+classOpSize opts vs fn val_args
| arg1 : _ <- val_args
- , Just dict <- interesting_id svars arg1
- = vanillaCallSize (length val_args) `etAddN`
+ , Just dict <- recordCaseOf vs arg1
+ = warnPprTrace (not (isId dict)) "classOpSize" (ppr fn <+> ppr val_args) $
+ vanillaCallSize (length val_args) `etAddN`
etZero { et_cases = unitBag (ScrutOf dict (unfoldingDictDiscount opts)) }
-- If the class op is scrutinising a lambda bound dictionary then
-- give it a discount, to encourage the inlining of this function
@@ -661,10 +745,10 @@ classOpSize opts svars val_args
| otherwise
= exprTreeN (vanillaCallSize (length val_args))
-funSize :: UnfoldingOpts -> VarSet -> Id -> Int -> ExprTree
+funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> ExprTree
-- Size for function calls that are not constructors or primops
-- Note [Function applications]
-funSize opts svars fun n_val_args
+funSize opts (avs,_) fun n_val_args
| fun `hasKey` buildIdKey = etZero -- Wwant to inline applications of build/augment
| fun `hasKey` augmentIdKey = etZero -- so we give size zero to the whole call
| otherwise = SizeIs { et_size = size
@@ -677,7 +761,7 @@ funSize opts svars fun n_val_args
-- Discount if this is an interesting variable, and is applied
-- Discount is enough to make the application free (but not negative!)
-- See Note [Function and non-function discounts]
- cases | n_val_args > 0, fun `elemVarSet` svars
+ cases | n_val_args > 0, fun `elemVarSet` avs
= unitBag (ScrutOf fun size)
| otherwise
= emptyBag
@@ -920,6 +1004,10 @@ etZero = SizeIs { et_size = 0, et_cases = emptyBag, et_ret = 0 }
data Size = STooBig | SSize {-# UNPACK #-} !Int
+instance Outputable Size where
+ ppr STooBig = text "STooBig"
+ ppr (SSize n) = int n
+
sizeN :: Int -> Size
sizeN n = SSize n
@@ -931,6 +1019,10 @@ addSizeN :: Int -> Size -> Size
addSizeN n1 (SSize n2) = SSize (n1+n2)
addSizeN _ STooBig = STooBig
+adjustSize :: (Int -> Int) -> Size -> Size
+adjustSize f (SSize n) = SSize (f n)
+adjustSize _ STooBig = STooBig
+
leqSize :: Size -> Int -> Bool
leqSize STooBig _ = False
leqSize (SSize n) m = n <= m
@@ -944,6 +1036,27 @@ data InlineContext
}
-------------------------
+exprTreeWillInline :: Int -> ExprTree -> Bool
+-- (cheapExprTreeSize limit et) takes an upper bound `n` on the
+-- size of et; i.e. without discounts etc.
+-- Return True if (s <- limit), False otherwise
+-- Bales out early in the False case
+exprTreeWillInline limit et
+ = go et (\n -> n <= limit) 0
+ where
+ go :: ExprTree -> (Int -> Bool) -> Int -> Bool
+ go _ _ n | n > limit = False
+ go TooBig _ _ = False
+ go (SizeIs { et_size = size, et_cases = cases }) k n
+ = foldr go_ct k cases (n+size)
+
+ go_ct :: CaseTree -> (Int -> Bool) -> Int -> Bool
+ go_ct (ScrutOf {}) k n = k n
+ go_ct (CaseOf _ _ alts) k n = foldr go_alt k alts n
+
+ go_alt :: AltTree -> (Int -> Bool) -> Int -> Bool
+ go_alt (AltTree _ _ et) k n = go et k (n+10)
+
exprTreeSize :: InlineContext -> ExprTree -> Size
exprTreeSize _ TooBig = STooBig
exprTreeSize !ic (SizeIs { et_size = size
@@ -963,16 +1076,18 @@ caseTreeSize ic (ScrutOf bndr disc)
ArgIsLam -> sizeN (-disc) -- Apply discount
ArgIsCon {} -> sizeN (-disc) -- Apply discount
-caseTreeSize ic (CaseOf var alts)
- = case lookupBndr ic var of
+caseTreeSize ic (CaseOf scrut_var case_bndr alts)
+ = case lookupBndr ic scrut_var of
ArgNoInfo -> keptCaseSize ic alts
ArgIsLam -> keptCaseSize ic alts
ArgIsNot cons -> keptCaseSize ic (trim_alts cons alts)
- ArgIsCon con args
+ arg_summ@(ArgIsCon con args)
| Just (AltTree _ bs rhs) <- find_alt con alts
- , let ic' = ic { ic_bound = ic_bound ic `extendVarEnvList`
- (bs `zip` args) }
- -- In DEFAULT case, bs is empty, so extend is a no-op
+ , let new_summaries :: [(Var,ArgSummary)]
+ new_summaries = (case_bndr,arg_summ) : bs `zip` args
+ -- Don't forget to add a summary for the case binder!
+ ic' = ic { ic_bound = ic_bound ic `extendVarEnvList` new_summaries }
+ -- In DEFAULT case, bs is empty, so extending is a no-op
-> exprTreeSize ic' rhs
| otherwise -- Happens for empty alternatives
-> keptCaseSize ic alts
@@ -1002,17 +1117,11 @@ keptCaseSize ic alts
-- If there are no alternatives (case e of {}), we get just the size of the scrutinee
where
size_alt :: AltTree -> Size
- size_alt (AltTree _ _ rhs) = sizeN 10 `addSize` exprTreeSize ic rhs
- -- Add 10 for each alternative
- -- Don't charge for args, so that wrappers look cheap
- -- (See comments about wrappers with Case)
- --
- -- IMPORTANT: *do* charge 1 for the alternative, else we
- -- find that giant case nests are treated as practically free
- -- A good example is Foreign.C.Error.errnoToIOError
-
-lookupBndr :: InlineContext -> Id -> ArgSummary
+ size_alt (AltTree _ _ rhs) = exprTreeSize ic rhs
+ -- Cost for the alternative is already in `rhs`
+
+lookupBndr :: HasDebugCallStack => InlineContext -> Id -> ArgSummary
lookupBndr (IC { ic_bound = bound_env, ic_free = lookup_free }) var
- | Just info <- lookupVarEnv bound_env var = info
+ | Just info <- assertPpr (isId var) (ppr var) $
+ lookupVarEnv bound_env var = info
| otherwise = lookup_free var
-
=====================================
compiler/GHC/Core/Unfold/Make.hs
=====================================
@@ -376,8 +376,8 @@ certainlyWillInline opts fn_info rhs'
UnfNever -> Nothing
UnfWhen {} -> Just (fn_unf { uf_src = src', uf_tmpl = tmpl' })
-- INLINE functions have UnfWhen
- UnfIfGoodArgs { ug_size = size, ug_args = args }
- -> do_cunf size args src' tmpl'
+ UnfIfGoodArgs { ug_args = args, ug_tree = tree }
+ -> do_cunf args tree src' tmpl'
where
src' | isCompulsorySource src = src -- Do not change InlineCompulsory!
| otherwise = StableSystemSrc
@@ -396,19 +396,20 @@ certainlyWillInline opts fn_info rhs'
noinline = isNoInlinePragma (inlinePragInfo fn_info)
fn_unf = unfoldingInfo fn_info -- NB: loop-breakers never inline
- -- The UnfIfGoodArgs case seems important. If we w/w small functions
- -- binary sizes go up by 10%! (This is with SplitObjs.)
- -- I'm not totally sure why.
- -- INLINABLE functions come via this path
- -- See Note [certainlyWillInline: INLINABLE]
- do_cunf size args src' tmpl'
+ -- The UnfIfGoodArgs case seems important. If we w/w small functions
+ -- binary sizes go up by 10%! (This is with SplitObjs.)
+ -- I'm not totally sure why.
+ -- INLINABLE functions come via this path
+ -- See Note [certainlyWillInline: INLINABLE]
+ do_cunf args tree src' tmpl'
| arityInfo fn_info > 0 -- See Note [certainlyWillInline: be careful of thunks]
, not (isDeadEndSig (dmdSigInfo fn_info))
-- Do not unconditionally inline a bottoming functions even if
-- it seems smallish. We've carefully lifted it out to top level,
-- so we don't want to re-inline it.
, let unf_arity = length args
- , size - (10 * (unf_arity + 1)) <= unfoldingUseThreshold opts
+ limit = unfoldingUseThreshold opts + (10 * (unf_arity + 1))
+ , exprTreeWillInline limit tree
= Just (fn_unf { uf_src = src'
, uf_tmpl = tmpl'
, uf_guidance = UnfWhen { ug_arity = unf_arity
=====================================
compiler/GHC/Driver/Config/Core/Opt/Simplify.hs
=====================================
@@ -19,15 +19,17 @@ import GHC.Driver.Config.Core.Rules ( initRuleOpts )
import GHC.Driver.Config.Core.Opt.Arity ( initArityOpts )
import GHC.Driver.DynFlags ( DynFlags(..), GeneralFlag(..), gopt )
-import GHC.Runtime.Context ( InteractiveContext(..) )
+import GHC.Runtime.Context ( InteractiveContext(..), icInteractiveModule )
import GHC.Types.Basic ( CompilerPhase(..) )
import GHC.Types.Var ( Var )
+import GHC.Unit.Types( Module )
+
initSimplifyExprOpts :: DynFlags -> InteractiveContext -> SimplifyExprOpts
initSimplifyExprOpts dflags ic = SimplifyExprOpts
{ se_fam_inst = snd $ ic_instances ic
- , se_mode = (initSimplMode dflags InitialPhase "GHCi")
+ , se_mode = (initSimplMode dflags mod InitialPhase "GHCi")
{ sm_inline = False
-- Do not do any inlining, in case we expose some
-- unboxed tuple stuff that confuses the bytecode
@@ -38,6 +40,8 @@ initSimplifyExprOpts dflags ic = SimplifyExprOpts
, te_tick_factor = simplTickFactor dflags
}
}
+ where
+ mod = icInteractiveModule ic
initSimplifyOpts :: DynFlags -> [Var] -> Int -> SimplMode -> RuleBase -> SimplifyOpts
initSimplifyOpts dflags extra_vars iterations mode hpt_rule_base = let
@@ -56,9 +60,10 @@ initSimplifyOpts dflags extra_vars iterations mode hpt_rule_base = let
}
in opts
-initSimplMode :: DynFlags -> CompilerPhase -> String -> SimplMode
-initSimplMode dflags phase name = SimplMode
- { sm_names = [name]
+initSimplMode :: DynFlags -> Module -> CompilerPhase -> String -> SimplMode
+initSimplMode dflags mod phase name = SimplMode
+ { sm_module = mod
+ , sm_names = [name]
, sm_phase = phase
, sm_rules = gopt Opt_EnableRewriteRules dflags
, sm_eta_expand = gopt Opt_DoLambdaEtaExpansion dflags
@@ -76,8 +81,8 @@ initSimplMode dflags phase name = SimplMode
, sm_co_opt_opts = initOptCoercionOpts dflags
}
-initGentleSimplMode :: DynFlags -> SimplMode
-initGentleSimplMode dflags = (initSimplMode dflags InitialPhase "Gentle")
+initGentleSimplMode :: DynFlags -> Module -> SimplMode
+initGentleSimplMode dflags mod = (initSimplMode dflags mod InitialPhase "Gentle")
{ -- Don't do case-of-case transformations.
-- This makes full laziness work better
sm_case_case = False
=====================================
compiler/GHC/Types/Id.hs
=====================================
@@ -752,7 +752,7 @@ idTagSig_maybe = tagSig . idInfo
-- loop breaker. See 'unfoldingInfo'.
--
-- If you really want the unfolding of a strong loopbreaker, call 'realIdUnfolding'.
-idUnfolding :: IdUnfoldingFun
+idUnfolding :: HasDebugCallStack => IdUnfoldingFun
idUnfolding id = unfoldingInfo (idInfo id)
noUnfoldingFun :: IdUnfoldingFun
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/97fac8936b424f47b3687f542c8485cf2d39d7b5
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/97fac8936b424f47b3687f542c8485cf2d39d7b5
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231022/67705a80/attachment-0001.html>
More information about the ghc-commits
mailing list