[Git][ghc/ghc][wip/andreask/deep_discounts] 2 commits: Prototype for deep inlining discounts.
Andreas Klebinger (@AndreasK)
gitlab at gitlab.haskell.org
Sun Oct 2 14:24:00 UTC 2022
Andreas Klebinger pushed to branch wip/andreask/deep_discounts at Glasgow Haskell Compiler / GHC
Commits:
9198be09 by Andreas Klebinger at 2022-10-02T15:13:53+02:00
Prototype for deep inlining discounts.
This is very much not finished but the basic idea is the allow for
reasonably accurate estimates how much smaller a inlined function will
be if applied to a certain argument.
See #21938 for a proper description of the idea.
Optimize UnVarSet slightly
min discount of 10 if the arg is a value
Add a depth discount to nested argInfo/argGuidance
Move some functions around to avoid a module loop
- - - - -
4cbe93cf by Andreas Klebinger at 2022-10-02T16:09:37+02:00
Remove one flag
- - - - -
17 changed files:
- compiler/GHC/Core.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- + compiler/GHC/Core/Opt/Simplify/Inline.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Core/Ppr.hs
- compiler/GHC/Core/Seq.hs
- compiler/GHC/Core/Unfold.hs
- compiler/GHC/Data/Graph/UnVar.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Types/Unique/FM.hs
- compiler/GHC/Types/Var/Env.hs
- compiler/GHC/Utils/Outputable.hs
- compiler/ghc.cabal.in
- docs/users_guide/hints.rst
- docs/users_guide/using-optimisation.rst
Changes:
=====================================
compiler/GHC/Core.hs
=====================================
@@ -55,6 +55,7 @@ module GHC.Core (
-- * Unfolding data types
Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
+ ArgDiscount(..), ConMap, ConDiscount(..),
-- ** Constructing 'Unfolding's
noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
@@ -62,12 +63,13 @@ module GHC.Core (
-- ** Predicates and deconstruction on 'Unfolding'
unfoldingTemplate, expandUnfolding_maybe,
- maybeUnfoldingTemplate, otherCons,
+ maybeUnfoldingTemplate, maybeUnfoldingGuidance, otherCons,
isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
isStableUnfolding, isInlineUnfolding, isBootUnfolding,
hasCoreUnfolding, hasSomeUnfolding,
- canUnfold, neverUnfoldGuidance, isStableSource,
+ canUnfold, neverUnfoldGuidance, argGuidance, isStableSource,
+ discountDepth,
-- * Annotated expression data types
AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt(..),
@@ -120,6 +122,7 @@ import GHC.Utils.Panic.Plain
import Data.Data hiding (TyCon)
import Data.Int
import Data.Word
+import GHC.Types.Unique.FM
infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
-- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
@@ -1367,6 +1370,44 @@ data UnfoldingSource
-- Inline absolutely always, however boring the context.
+-- Argument use is modeled as a tree
+type ConMap a = UniqFM DataCon a
+
+data ConDiscount = ConDiscount { cd_con :: !DataCon
+ , cd_discount :: !Int
+ , cd_arg_discounts :: [ArgDiscount]
+ }
+ deriving (Eq)
+
+instance Outputable ConDiscount where
+ ppr (ConDiscount c d as) = ppr c <> text ":" <> ppr d <> (ppr as)
+
+data ArgDiscount
+ -- Argument is used, either sequed with a single alternative
+ -- used as argument to a interesting function or in other ways
+ -- which could make this argument worthwhile to inline.
+ = SomeArgUse { ad_seq_discount :: !Int }
+ -- Argument is used to discriminate between case alternatives.
+ -- We give specific constructors a discount based on the alternative
+ -- they will select, and provide a generic discount if we know the arg
+ -- is a value but not what value exactly.
+ -- Only of the the two discounts might be applied for the same argument.
+ | DiscSeq { ad_seq_discount :: !Int -- ^ Discount if no specific constructor discount matches
+ , ad_con_discount :: !(ConMap ConDiscount) -- ^ Discounts for specific constructors
+ }
+ -- A discount for the use of a function.
+ | FunDisc { ad_seq_discount :: !Int, ad_fun :: !Name}
+ | NoSeqUse
+ deriving Eq
+
+instance Outputable ArgDiscount where
+ ppr (SomeArgUse n)= text "seq:" <> ppr n
+ ppr (NoSeqUse)= text "lazy use"
+ ppr (FunDisc d f ) = text "fun-"<>ppr f<>text ":"<> ppr d
+ ppr (DiscSeq d_seq m)
+ | isNullUFM m = text "disc:"<> ppr d_seq
+ | otherwise = sep (punctuate comma ((text "some_con:"<> ppr d_seq) : map ppr (nonDetEltsUFM m)))
+ -- (text "some_con:"<> ppr d_seq) <> text "||" <> braces (pprUFM m ppr)
-- | 'UnfoldingGuidance' says when unfolding should take place
data UnfoldingGuidance
@@ -1384,13 +1425,13 @@ data UnfoldingGuidance
| UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
-- result of a simple analysis of the RHS
- ug_args :: [Int], -- Discount if the argument is evaluated.
+ ug_args :: [ArgDiscount], -- Discount if the argument is evaluated.
-- (i.e., a simplification will definitely
-- be possible). One elt of the list per *value* arg.
- ug_size :: Int, -- The "size" of the unfolding.
+ ug_size :: !Int, -- The "size" of the unfolding.
- ug_res :: Int -- Scrutinee discount: the discount to subtract if the thing is in
+ ug_res :: !Int -- Scrutinee discount: the discount to subtract if the thing is in
} -- a context (case (thing args) of ...),
-- (where there are the right number of arguments.)
@@ -1494,6 +1535,10 @@ maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args
maybeUnfoldingTemplate _
= Nothing
+maybeUnfoldingGuidance :: Unfolding -> Maybe UnfoldingGuidance
+maybeUnfoldingGuidance CoreUnfolding { uf_guidance = guidance } = Just guidance
+maybeUnfoldingGuidance _ = Nothing
+
-- | The constructors that the unfolding could never be:
-- returns @[]@ if no information is available
otherCons :: Unfolding -> [AltCon]
@@ -1582,6 +1627,30 @@ neverUnfoldGuidance :: UnfoldingGuidance -> Bool
neverUnfoldGuidance UnfNever = True
neverUnfoldGuidance _ = False
+-- Returns a list of available argument discounts if any.
+argGuidance :: UnfoldingGuidance -> [ArgDiscount]
+argGuidance UnfIfGoodArgs { ug_args = arg_guides } = arg_guides
+argGuidance _ = []
+
+discountDepth :: ArgDiscount -> Int
+discountDepth dc = case dc of
+ NoSeqUse -> 0
+ FunDisc{} -> 1
+ SomeArgUse{} -> 1
+ DiscSeq { ad_con_discount = con_dc} ->
+ let max_con_depth =
+ nonDetStrictFoldUFM
+ (\(e :: ConDiscount) max_depth ->
+ max max_depth
+ (maximum (1:(map discountDepth (cd_arg_discounts e)))))
+ 0
+ con_dc
+ in max 1 max_con_depth
+
+
+
+
+
hasCoreUnfolding :: Unfolding -> Bool
-- An unfolding "has Core" if it contains a Core expression, which
-- may mention free variables. See Note [Fragile unfoldings]
@@ -1953,6 +2022,10 @@ bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
bindersOfBinds :: [Bind b] -> [b]
bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
+{-# INLINE foldBindersOf #-}
+foldBindersOf :: (a -> b -> a) -> Bind b -> a -> a
+foldBindersOf f b r = foldl' f r (bindersOf b)
+
rhssOfBind :: Bind b -> [Expr b]
rhssOfBind (NonRec _ rhs) = [rhs]
rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -22,6 +22,7 @@ module GHC.Core.Opt.Simplify.Env (
zapSubstEnv, setSubstEnv, bumpCaseDepth,
getInScope, setInScopeFromE, setInScopeFromF,
setInScopeSet, modifyInScope, addNewInScopeIds,
+ addNewInScopeId, addNewInScopeBndr,
getSimplRules, enterRecGroupRHSs,
-- * Substitution results
@@ -561,6 +562,20 @@ setInScopeFromE rhs_env here_env = rhs_env { seInScope = seInScope here_env }
setInScopeFromF :: SimplEnv -> SimplFloats -> SimplEnv
setInScopeFromF env floats = env { seInScope = sfInScope floats }
+addNewInScopeId :: SimplEnv -> CoreBndr -> SimplEnv
+addNewInScopeId env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) v
+-- See Note [Bangs in the Simplifier]
+ = let !in_scope1 = in_scope `extendInScopeSet` v
+ !id_subst1 = id_subst `delVarEnv` v
+ in
+ env { seInScope = in_scope1,
+ seIdSubst = id_subst1 }
+ -- Why delete? Consider
+ -- let x = a*b in (x, \x -> x+3)
+ -- We add [x |-> a*b] to the substitution, but we must
+ -- _delete_ it from the substitution when going inside
+ -- the (\x -> ...)!
+
addNewInScopeIds :: SimplEnv -> [CoreBndr] -> SimplEnv
-- The new Ids are guaranteed to be freshly allocated
addNewInScopeIds env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) vs
@@ -576,6 +591,21 @@ addNewInScopeIds env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) v
-- _delete_ it from the substitution when going inside
-- the (\x -> ...)!
+addNewInScopeBndr :: SimplEnv -> CoreBind -> SimplEnv
+addNewInScopeBndr env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) b
+-- See Note [Bangs in the Simplifier]
+ = let vs = bindersOf b
+ !in_scope1 = in_scope `extendInScopeSetList` vs
+ !id_subst1 = id_subst `delVarEnvList` vs
+ in
+ env { seInScope = in_scope1,
+ seIdSubst = id_subst1 }
+ -- Why delete? Consider
+ -- let x = a*b in (x, \x -> x+3)
+ -- We add [x |-> a*b] to the substitution, but we must
+ -- _delete_ it from the substitution when going inside
+ -- the (\x -> ...)!
+
modifyInScope :: SimplEnv -> CoreBndr -> SimplEnv
-- The variable should already be in scope, but
-- replace the existing version with this new one
=====================================
compiler/GHC/Core/Opt/Simplify/Inline.hs
=====================================
@@ -0,0 +1,690 @@
+-- {-# OPTIONS_GHC -ddump-simpl -ddump-to-file -ddump-stg-final -dsuppress-coercions -dsuppress-coercion-types #-}
+
+module GHC.Core.Opt.Simplify.Inline
+where
+
+import GHC.Prelude
+
+import GHC.Core
+import GHC.Core.Unfold
+import GHC.Core.TyCon (isClassTyCon)
+
+import GHC.Types.Name
+import GHC.Types.Id
+import GHC.Types.Basic
+
+import GHC.Utils.Misc
+import GHC.Utils.Outputable
+
+import Data.List ( isPrefixOf )
+import GHC.Utils.Logger
+import GHC.Driver.Flags
+import GHC.Types.Unique.FM
+import GHC.Core.Opt.Simplify.Env
+import GHC.Types.Literal
+import GHC.Core.DataCon
+import GHC.Types.Var
+import GHC.Core.Opt.Simplify.Utils
+import GHC.Utils.Panic.Plain (assert)
+import GHC.Utils.Trace
+import GHC.Types.Tickish
+
+callSiteInline :: Logger
+ -> UnfoldingOpts
+ -> Int -- Case depth
+ -> Id -- The Id
+ -> Bool -- True <=> unfolding is active
+ -> Bool -- True if there are no arguments at all (incl type args)
+ -> [ArgSummary] -- One for each value arg; True if it is interesting
+ -> CallCtxt -- True <=> continuation is interesting
+ -> Maybe CoreExpr -- Unfolding, if any
+callSiteInline logger !opts !case_depth id active_unfolding lone_variable arg_infos cont_info
+ = case idUnfolding id of
+ -- idUnfolding checks for loop-breakers, returning NoUnfolding
+ -- Things with an INLINE pragma may have an unfolding *and*
+ -- be a loop breaker (maybe the knot is not yet untied)
+ CoreUnfolding { uf_tmpl = unf_template
+ , uf_is_work_free = is_wf
+ , uf_guidance = guidance, uf_expandable = is_exp }
+ | active_unfolding -> tryUnfolding logger opts case_depth id lone_variable
+ arg_infos cont_info unf_template
+ is_wf is_exp guidance
+ | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
+ NoUnfolding -> Nothing
+ BootUnfolding -> Nothing
+ OtherCon {} -> Nothing
+ DFunUnfolding {} -> Nothing -- Never unfold a DFun
+
+-- | Report the inlining of an identifier's RHS to the user, if requested.
+traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
+traceInline logger !opts inline_id str doc result
+ -- We take care to ensure that doc is used in only one branch, ensuring that
+ -- the simplifier can push its allocation into the branch. See Note [INLINE
+ -- conditional tracing utilities].
+ | enable = logTraceMsg logger str doc result
+ | otherwise = result
+ where
+ enable
+ | logHasDumpFlag logger Opt_D_dump_verbose_inlinings
+ = True
+ | Just prefix <- unfoldingReportPrefix opts
+ = prefix `isPrefixOf` occNameString (getOccName inline_id)
+ | otherwise
+ = False
+{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
+
+{- Note [Avoid inlining into deeply nested cases]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider a function f like this:
+
+ f arg1 arg2 =
+ case ...
+ ... -> g arg1
+ ... -> g arg2
+
+This function is small. So should be safe to inline.
+However sometimes this doesn't quite work out like that.
+Consider this code:
+
+f1 arg1 arg2 ... = ...
+ case _foo of
+ alt1 -> ... f2 arg1 ...
+ alt2 -> ... f2 arg2 ...
+
+f2 arg1 arg2 ... = ...
+ case _foo of
+ alt1 -> ... f3 arg1 ...
+ alt2 -> ... f3 arg2 ...
+
+f3 arg1 arg2 ... = ...
+
+... repeats up to n times. And then f1 is
+applied to some arguments:
+
+foo = ... f1 <interestingArgs> ...
+
+Initially f2..fn are not interesting to inline so we don't.
+However we see that f1 is applied to interesting args.
+So it's an obvious choice to inline those:
+
+foo =
+ ...
+ case _foo of
+ alt1 -> ... f2 <interestingArg> ...
+ alt2 -> ... f2 <interestingArg> ...
+
+As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
+arguments and f2 is small:
+
+foo =
+ ...
+ case _foo of
+ alt1 -> ... case _foo of
+ alt1 -> ... f3 <interestingArg> ...
+ alt2 -> ... f3 <interestingArg> ...
+
+ alt2 -> ... case _foo of
+ alt1 -> ... f3 <interestingArg> ...
+ alt2 -> ... f3 <interestingArg> ...
+
+The same thing happens for each binding up to f_n, duplicating the amount of inlining
+done in each step. Until at some point we are either done or run out of simplifier
+ticks/RAM. This pattern happened #18730.
+
+To combat this we introduce one more heuristic when weighing inlining decision.
+We keep track of a "case-depth". Which increases each time we look inside a case
+expression with more than one alternative.
+
+We then apply a penalty to inlinings based on the case-depth at which they would
+be inlined. Bounding the number of inlinings in such a scenario.
+
+The heuristic can be tuned in two ways:
+
+* We can ignore the first n levels of case nestings for inlining decisions using
+ -funfolding-case-threshold.
+* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
+ Scaling can be set with -funfolding-case-scaling.
+
+Some guidance on setting these defaults:
+
+* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
+ control. We picked 2 for no particular reason.
+* Scaling the penalty by any more than 30 means the reproducer from
+ T18730 won't compile even with reasonably small values of n. Instead
+ it will run out of runs/ticks. This means to positively affect the reproducer
+ a scaling <= 30 is required.
+* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
+ (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
+* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
+ (small) regression for compiler perf benchmarks.
+
+The end result is that we are settling for a scaling of 30, with a threshold of 2.
+This gives us minimal compiler perf regressions. No nofib runtime regressions and
+will still avoid this pattern sometimes. This is a "safe" default, where we err on
+the side of compiler blowup instead of risking runtime regressions.
+
+For cases where the default falls short the flag can be changed to allow more/less inlining as
+needed on a per-module basis.
+
+-}
+
+tryUnfolding :: Logger -> UnfoldingOpts -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
+ -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance
+ -> Maybe CoreExpr
+tryUnfolding logger opts !case_depth id lone_variable
+ arg_infos cont_info unf_template
+ is_wf is_exp guidance
+ = case guidance of
+ UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
+
+ UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
+ | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
+ -- See Note [INLINE for small functions] (3)
+ -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
+ | otherwise
+ -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
+ where
+ some_benefit = calc_some_benefit uf_arity
+ enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
+
+ UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+ | unfoldingVeryAggressive opts
+ -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+ | is_wf && some_benefit && small_enough
+ -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
+ | otherwise
+ -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
+ where
+ !some_benefit = calc_some_benefit (length arg_discounts)
+ extra_doc = vcat [ text "case depth =" <+> int case_depth
+ , text "depth based penalty =" <+> int depth_penalty
+ , text "discounted size =" <+> int adjusted_size ]
+ -- See Note [Avoid inlining into deeply nested cases]
+ depth_treshold = unfoldingCaseThreshold opts
+ depth_scaling = unfoldingCaseScaling opts
+ depth_penalty | case_depth <= depth_treshold = 0
+ | otherwise = (size * (case_depth - depth_treshold)) `div` depth_scaling
+ !adjusted_size = size + depth_penalty - discount
+ !small_enough = adjusted_size <= unfoldingUseThreshold opts
+ discount = computeDiscount arg_discounts res_discount arg_infos cont_info
+
+ where
+ mk_doc some_benefit extra_doc yes_or_no
+ = vcat [ text "arg infos" <+> ppr arg_infos
+ , text "interesting continuation" <+> ppr cont_info
+ , text "some_benefit" <+> ppr some_benefit
+ , text "is exp:" <+> ppr is_exp
+ , text "is work-free:" <+> ppr is_wf
+ , text "guidance" <+> ppr guidance
+ , extra_doc
+ , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+ ctx = log_default_dump_context (logFlags logger)
+ str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
+ n_val_args = length arg_infos
+
+ -- some_benefit is used when the RHS is small enough
+ -- and the call has enough (or too many) value
+ -- arguments (ie n_val_args >= arity). But there must
+ -- be *something* interesting about some argument, or the
+ -- result context, to make it worth inlining
+ calc_some_benefit :: Arity -> Bool -- The Arity is the number of args
+ -- expected by the unfolding
+ calc_some_benefit uf_arity
+ | not saturated = interesting_args -- Under-saturated
+ -- Note [Unsaturated applications]
+ | otherwise = interesting_args -- Saturated or over-saturated
+ || interesting_call
+ where
+ saturated = n_val_args >= uf_arity
+ over_saturated = n_val_args > uf_arity
+ interesting_args = any nonTrivArg arg_infos
+ -- NB: (any nonTriv arg_infos) looks at the
+ -- over-saturated args too which is "wrong";
+ -- but if over-saturated we inline anyway.
+
+ interesting_call
+ | over_saturated
+ = True
+ | otherwise
+ = case cont_info of
+ CaseCtxt -> not (lone_variable && is_exp) -- Note [Lone variables]
+ ValAppCtxt -> True -- Note [Cast then apply]
+ RuleArgCtxt -> uf_arity > 0 -- See Note [RHS of lets]
+ DiscArgCtxt -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
+ RhsCtxt NonRecursive
+ -> uf_arity > 0 -- See Note [RHS of lets]
+ _other -> False -- See Note [Nested functions]
+
+
+{- Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~~~~
+When the call is the argument of a function with a RULE, or the RHS of a let,
+we are a little bit keener to inline (in tryUnfolding). For example
+ f y = (y,y,y)
+ g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
+We'd inline 'f' if the call was in a case context, and it kind-of-is,
+only we can't see it. Also
+ x = f v
+could be expensive whereas
+ x = case v of (a,b) -> a
+is patently cheap and may allow more eta expansion.
+
+So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a
+/non-recursive/ let as not-totally-boring. A /recursive/ let isn't
+going be inlined so there is much less point. Hence the (only reason
+for the) RecFlag in RhsCtxt
+
+Note [Unsaturated applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When a call is not saturated, we *still* inline if one of the
+arguments has interesting structure. That's sometimes very important.
+A good example is the Ord instance for Bool in Base:
+
+ Rec {
+ $fOrdBool =GHC.Classes.D:Ord
+ @ Bool
+ ...
+ $cmin_ajX
+
+ $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
+ $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
+ }
+
+But the defn of GHC.Classes.$dmmin is:
+
+ $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
+ {- Arity: 3, HasNoCafRefs, Strictness: SLL,
+ Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
+ case @ a GHC.Classes.<= @ a $dOrd x y of wild {
+ GHC.Types.False -> y GHC.Types.True -> x }) -}
+
+We *really* want to inline $dmmin, even though it has arity 3, in
+order to unravel the recursion.
+
+
+Note [Things to watch]
+~~~~~~~~~~~~~~~~~~~~~~
+* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
+ Assume x is exported, so not inlined unconditionally.
+ Then we want x to inline unconditionally; no reason for it
+ not to, and doing so avoids an indirection.
+
+* { x = I# 3; ....f x.... }
+ Make sure that x does not inline unconditionally!
+ Lest we get extra allocation.
+
+Note [Inlining an InlineRule]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An InlineRules is used for
+ (a) programmer INLINE pragmas
+ (b) inlinings from worker/wrapper
+
+For (a) the RHS may be large, and our contract is that we *only* inline
+when the function is applied to all the arguments on the LHS of the
+source-code defn. (The uf_arity in the rule.)
+
+However for worker/wrapper it may be worth inlining even if the
+arity is not satisfied (as we do in the CoreUnfolding case) so we don't
+require saturation.
+
+Note [Nested functions]
+~~~~~~~~~~~~~~~~~~~~~~~
+At one time we treated a call of a non-top-level function as
+"interesting" (regardless of how boring the context) in the hope
+that inlining it would eliminate the binding, and its allocation.
+Specifically, in the default case of interesting_call we had
+ _other -> not is_top && uf_arity > 0
+
+But actually postInlineUnconditionally does some of this and overall
+it makes virtually no difference to nofib. So I simplified away this
+special case
+
+Note [Cast then apply]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ myIndex = __inline_me ( (/\a. <blah>) |> co )
+ co :: (forall a. a -> a) ~ (forall a. T a)
+ ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
+
+We need to inline myIndex to unravel this; but the actual call (myIndex a) has
+no value arguments. The ValAppCtxt gives it enough incentive to inline.
+
+Note [Inlining in ArgCtxt]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The condition (arity > 0) here is very important, because otherwise
+we end up inlining top-level stuff into useless places; eg
+ x = I# 3#
+ f = \y. g x
+This can make a very big difference: it adds 16% to nofib 'integer' allocs,
+and 20% to 'power'.
+
+At one stage I replaced this condition by 'True' (leading to the above
+slow-down). The motivation was test eyeball/inline1.hs; but that seems
+to work ok now.
+
+NOTE: arguably, we should inline in ArgCtxt only if the result of the
+call is at least CONLIKE. At least for the cases where we use ArgCtxt
+for the RHS of a 'let', we only profit from the inlining if we get a
+CONLIKE thing (modulo lets).
+
+Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~ which appears below
+The "lone-variable" case is important. I spent ages messing about
+with unsatisfactory variants, but this is nice. The idea is that if a
+variable appears all alone
+
+ as an arg of lazy fn, or rhs BoringCtxt
+ as scrutinee of a case CaseCtxt
+ as arg of a fn ArgCtxt
+AND
+ it is bound to a cheap expression
+
+then we should not inline it (unless there is some other reason,
+e.g. it is the sole occurrence). That is what is happening at
+the use of 'lone_variable' in 'interesting_call'.
+
+Why? At least in the case-scrutinee situation, turning
+ let x = (a,b) in case x of y -> ...
+into
+ let x = (a,b) in case (a,b) of y -> ...
+and thence to
+ let x = (a,b) in let y = (a,b) in ...
+is bad if the binding for x will remain.
+
+Another example: I discovered that strings
+were getting inlined straight back into applications of 'error'
+because the latter is strict.
+ s = "foo"
+ f = \x -> ...(error s)...
+
+Fundamentally such contexts should not encourage inlining because, provided
+the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the
+context can ``see'' the unfolding of the variable (e.g. case or a
+RULE) so there's no gain.
+
+However, watch out:
+
+ * Consider this:
+ foo = _inline_ (\n. [n])
+ bar = _inline_ (foo 20)
+ baz = \n. case bar of { (m:_) -> m + n }
+ Here we really want to inline 'bar' so that we can inline 'foo'
+ and the whole thing unravels as it should obviously do. This is
+ important: in the NDP project, 'bar' generates a closure data
+ structure rather than a list.
+
+ So the non-inlining of lone_variables should only apply if the
+ unfolding is regarded as cheap; because that is when exprIsConApp_maybe
+ looks through the unfolding. Hence the "&& is_wf" in the
+ InlineRule branch.
+
+ * Even a type application or coercion isn't a lone variable.
+ Consider
+ case $fMonadST @ RealWorld of { :DMonad a b c -> c }
+ We had better inline that sucker! The case won't see through it.
+
+ For now, I'm treating treating a variable applied to types
+ in a *lazy* context "lone". The motivating example was
+ f = /\a. \x. BIG
+ g = /\a. \y. h (f a)
+ There's no advantage in inlining f here, and perhaps
+ a significant disadvantage. Hence some_val_args in the Stop case
+
+Note [Interaction of exprIsWorkFree and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutinises a lone variable whose unfolding is cheap". It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+ let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+In the 'not (lone_variable && is_wf)' test, I used to test is_value
+rather than is_wf, which was utterly wrong, because the above
+expression responds True to exprIsHNF, which is what sets is_value.
+
+This kind of thing can occur if you have
+
+ {-# INLINE foo #-}
+ foo = let x = e in (x,x)
+
+which Roman did.
+
+Note [Minimum value discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We always give *some* benefit to value arguments.
+A discount of 10 per arg because we replace the arguments
+and another of 10 if it's some non-trivial value.
+However when computing unfolding guidance we might have come to
+the conclusion that certain argument values deservere little or no
+discount. But we want to chance of inlining to only ever increase as
+more is known about the argument to keep things more predictable. So
+we always give at least 10 discount if the argument is a value. No matter
+what the actual value is.
+-}
+
+
+
+computeDiscount :: [ArgDiscount] -> Int -> [ArgSummary] -> CallCtxt
+ -> Int
+computeDiscount arg_discounts !res_discount arg_infos cont_info
+
+ = 10 -- Discount of 10 because the result replaces the call
+ -- so we count 10 for the function itself
+
+ + 10 * applied_arg_length
+ -- Discount of 10 for each arg supplied,
+ -- because the result replaces the call
+
+ + total_arg_discount + res_discount'
+ where
+ (applied_arg_length,total_arg_discount) = zipWithSumLength arg_discounts arg_infos
+ -- actual_arg_discounts = zipWith mk_arg_discount (arg_discounts) arg_infos
+ -- total_arg_discount = sum actual_arg_discounts
+
+ -- See Note [Minimum value discount]
+ mk_arg_discount :: ArgDiscount -> ArgSummary -> Int
+ mk_arg_discount _ TrivArg = 0
+ mk_arg_discount _ NonTrivArg = 10
+ mk_arg_discount NoSeqUse _ = 10
+ mk_arg_discount discount ValueArg = max 10 (ad_seq_discount discount)
+ mk_arg_discount (DiscSeq seq_discount con_discounts) (ConArg con args)
+
+ -- There is a discount specific to this constructor, use that.
+ | Just (ConDiscount _ branch_dc arg_discounts) <- lookupUFM con_discounts con
+ = max 10 $ max seq_discount (branch_dc + (sum $ zipWith mk_arg_discount arg_discounts args))
+
+ -- Otherwise give it the generic seq discount
+ | otherwise = max 10 seq_discount
+ mk_arg_discount (SomeArgUse d) ConArg{} = max 10 d
+ mk_arg_discount (FunDisc d _) (ConArg{})
+ -- How can this arise? With dictionary constructors for example.
+ -- We see C:Show foo bar and give it a FunDisc for being applied
+ -- like a function.
+ -- But when constructing ArgSummaries we treat it as constructor
+ -- since well it is one. This is harmless, but a bit odd for sure.
+ -- We just treat it like any other boring ValueArg here.
+ = -- pprTrace "Function discount for con arg" (ppr arg_infos)
+ max 10 d
+
+ -- zipWithSumLength xs ys = (length $ zip xs ys, sum $ zipWith _ xs ys)
+ zipWithSumLength :: [ArgDiscount] -> [ArgSummary] -> (Int, Int)
+ zipWithSumLength dcs args = go 0 0 dcs args
+ where
+ go !length !discount (dc:dcs) (arg:args) =
+ let arg_discount = mk_arg_discount dc arg
+ in go (1+length) (discount + arg_discount) dcs args
+ go l d [] _ = (l,d)
+ go l d _ [] = (l,d)
+
+ res_discount'
+ | LT <- arg_discounts `compareLength` arg_infos
+ = res_discount -- Over-saturated
+ | otherwise
+ = case cont_info of
+ BoringCtxt -> 0
+ CaseCtxt -> res_discount -- Presumably a constructor
+ ValAppCtxt -> res_discount -- Presumably a function
+ _ -> 40 `min` res_discount
+ -- ToDo: this 40 `min` res_discount doesn't seem right
+ -- for DiscArgCtxt it shouldn't matter because the function will
+ -- get the arg discount for any non-triv arg
+ -- for RuleArgCtxt we do want to be keener to inline; but not only
+ -- constructor results
+ -- for RhsCtxt I suppose that exposing a data con is good in general
+ -- And 40 seems very arbitrary
+ --
+ -- res_discount can be very large when a function returns
+ -- constructors; but we only want to invoke that large discount
+ -- when there's a case continuation.
+ -- Otherwise we, rather arbitrarily, threshold it. Yuk.
+ -- But we want to avoid inlining large functions that return
+ -- constructors into contexts that are simply "interesting"
+
+{- Note [Interesting arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An argument is interesting if it deserves a discount for unfoldings
+with a discount in that argument position. The idea is to avoid
+unfolding a function that is applied only to variables that have no
+unfolding (i.e. they are probably lambda bound): f x y z There is
+little point in inlining f here.
+
+Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
+we must look through lets, eg (let x = e in C a b), because the let will
+float, exposing the value, if we inline. That makes it different to
+exprIsHNF.
+
+Before 2009 we said it was interesting if the argument had *any* structure
+at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
+
+But we don't regard (f x y) as interesting, unless f is unsaturated.
+If it's saturated and f hasn't inlined, then it's probably not going
+to now!
+
+Note [Conlike is interesting]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ f d = ...((*) d x y)...
+ ... f (df d')...
+where df is con-like. Then we'd really like to inline 'f' so that the
+rule for (*) (df d) can fire. To do this
+ a) we give a discount for being an argument of a class-op (eg (*) d)
+ b) we say that a con-like argument (eg (df d)) is interesting
+-}
+
+interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
+-- See Note [Interesting arguments]
+interestingArg env e =
+ go env depth_limit 0 e
+ where
+ depth_limit = unfoldingMaxDiscountDepth . sm_uf_opts . seMode $ env
+
+ -- n is # value args to which the expression is applied
+ go :: SimplEnv -> Int -> Int -> CoreExpr -> ArgSummary
+ go !_env max_depth _n !_
+ | max_depth <= 0 = TrivArg
+ go env depth n (Var v)
+ = case substId env v of
+ DoneId v' -> go_var depth n v'
+ DoneEx e _ -> go (zapSubstEnv env) depth n e
+ ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) depth n e
+
+ go _ _depth _ (Lit l)
+ | isLitRubbish l = TrivArg -- Leads to unproductive inlining in WWRec, #20035
+ | otherwise = ValueArg
+ go _ _depth _ (Type _) = TrivArg
+ go _ _depth _ (Coercion _) = TrivArg
+ go env depth n (App fn (Type _)) = go env depth n fn
+ go env depth n e@(App _fn _arg)
+ | (fn,arg_summaries,_ticks) <- mapArgsTicksVal (go env (depth-1) 0) e
+ = let fn_summary = go env depth (n + length arg_summaries) fn
+ in case fn_summary of
+ ConArg con fn_args
+ | isClassTyCon (dataConTyCon con) -> ValueArg
+ | otherwise ->
+ assert (null fn_args) $
+ ConArg con arg_summaries
+ _ -> fn_summary
+
+ go env depth n (Tick _ a) = go env depth n a
+ go env depth n (Cast e _) = go env depth n e
+ go env depth n (Lam v e)
+ | isTyVar v = go env depth n e
+ | n>0 = NonTrivArg -- (\x.b) e is NonTriv
+ | otherwise = ValueArg
+ go _ _depth _ (Case {}) = NonTrivArg
+ go env depth n (Let b e) = case go env' depth n e of
+ ValueArg -> ValueArg
+ c at ConArg{} -> c
+ _ -> NonTrivArg
+ where
+ env' = env `addNewInScopeBndr` b
+
+ go_var depth n v
+ | Just rhs <- maybeUnfoldingTemplate (idUnfolding v)
+ , (f, arg_summaries, _ticks) <- mapArgsTicksVal (go env (depth-1) 0) rhs
+ , Var f' <- varView f
+ , Just con <- isDataConId_maybe f'
+ , not (isClassTyCon $ dataConTyCon con)
+ =
+ -- pprTrace "ConArg1" (ppr $ ConArg con $ map (go env 0) args) $
+ ConArg con arg_summaries
+
+ | Just con <- isDataConId_maybe v
+ = ConArg con []
+ | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
+ -- data constructors here
+ | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
+ | n > 0 = NonTrivArg -- Saturated or unknown call
+ | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
+ -- See Note [Conlike is interesting]
+ | otherwise = TrivArg -- n==0, no useful unfolding
+ where
+ conlike_unfolding = isConLikeUnfolding (idUnfolding v)
+
+ varView (Cast e _) = e
+ varView (Tick _ e) = e
+ varView e = e
+
+-- | Like @collectArgs@, but maps over the arguments at the same time.
+-- and also looks through casts.
+mapArgsTicksVal :: (Expr b -> c) -> Expr b
+ -> (Expr b, [c], [CoreTickish])
+mapArgsTicksVal fm expr
+ = go expr [] []
+ where
+ go (App f a) as ts
+ | isValArg a = go f (fm a:as) ts
+ | otherwise = go f as ts
+ go (Tick t e) as ts = go e as (t:ts)
+ go (Cast e _) as ts = go e as ts
+ go e as ts = (e, as, reverse ts)
+
+
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
+-- Summarises value args, discards type args and coercions
+-- The returned continuation of the call is only used to
+-- answer questions like "are you interesting?"
+contArgs cont
+ | lone cont = (True, [], cont)
+ | otherwise = go [] cont
+ where
+ lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold
+ lone (ApplyToVal {}) = False -- NB: even a type application or cast
+ lone (CastIt {}) = False -- stops it being "lone"
+ lone _ = True
+
+ go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
+ = go (is_interesting arg se : args) k
+ go args (ApplyToTy { sc_cont = k }) = go args k
+ go args (CastIt _ k) = go args k
+ go args k = (False, reverse args, k)
+
+ -- is_interesting arg _se = arg
+ is_interesting arg se = interestingArg se arg
+ -- Do *not* use short-cutting substitution here
+ -- because we want to get as much IdInfo as possible
=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -74,6 +74,7 @@ import GHC.Utils.Logger
import GHC.Utils.Misc
import Control.Monad
+import GHC.Core.Opt.Simplify.Inline
{-
The guts of the simplifier is in this module, but the driver loop for
@@ -3568,7 +3569,7 @@ mkDupableContWithDmds env _
do { let rhs_ty = contResultType cont
(m,arg_ty,_) = splitFunTy fun_ty
; arg_bndr <- newId (fsLit "arg") m arg_ty
- ; let env' = env `addNewInScopeIds` [arg_bndr]
+ ; let env' = env `addNewInScopeId` arg_bndr
; (floats, join_rhs) <- rebuildCall env' (addValArgTo fun (Var arg_bndr) fun_ty) cont
; mkDupableStrictBind env' arg_bndr (wrapFloats floats join_rhs) rhs_ty }
where
@@ -3670,7 +3671,7 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty
; let arg_info = ArgInfo { ai_fun = join_bndr
, ai_rules = Nothing, ai_args = []
, ai_encl = False, ai_dmds = repeat topDmd
- , ai_discs = repeat 0 }
+ , ai_discs = repeat NoSeqUse }
; return ( addJoinFloats (emptyFloats env) $
unitJoinFloat $
NonRec join_bndr $
=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -24,7 +24,7 @@ module GHC.Core.Opt.Simplify.Utils (
SimplCont(..), DupFlag(..), StaticEnv,
isSimplified, contIsStop,
contIsDupable, contResultType, contHoleType, contHoleScaling,
- contIsTrivial, contArgs, contIsRhs,
+ contIsTrivial, contIsRhs,
countArgs,
mkBoringStop, mkRhsStop, mkLazyArgStop,
interestingCallContext,
@@ -44,7 +44,6 @@ module GHC.Core.Opt.Simplify.Utils (
import GHC.Prelude
import GHC.Core
-import GHC.Types.Literal ( isLitRubbish )
import GHC.Core.Opt.Simplify.Env
import GHC.Core.Opt.Stats ( Tick(..) )
import qualified GHC.Core.Subst
@@ -320,8 +319,9 @@ data ArgInfo
-- that the function diverges after being given
-- that number of args
- ai_discs :: [Int] -- Discounts for remaining value arguments (beyong ai_args)
- -- non-zero => be keener to inline
+ ai_discs :: [ArgDiscount]
+ -- Discounts for remaining value arguments (beyong ai_args)
+ -- non-NoSeqUse => be keener to inline
-- Always infinite
}
@@ -525,29 +525,6 @@ countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
countArgs (CastIt _ cont) = countArgs cont
countArgs _ = 0
-contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
--- Summarises value args, discards type args and coercions
--- The returned continuation of the call is only used to
--- answer questions like "are you interesting?"
-contArgs cont
- | lone cont = (True, [], cont)
- | otherwise = go [] cont
- where
- lone (ApplyToTy {}) = False -- See Note [Lone variables] in GHC.Core.Unfold
- lone (ApplyToVal {}) = False -- NB: even a type application or cast
- lone (CastIt {}) = False -- stops it being "lone"
- lone _ = True
-
- go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
- = go (is_interesting arg se : args) k
- go args (ApplyToTy { sc_cont = k }) = go args k
- go args (CastIt _ k) = go args k
- go args k = (False, reverse args, k)
-
- is_interesting arg se = interestingArg se arg
- -- Do *not* use short-cutting substitution here
- -- because we want to get as much IdInfo as possible
-
-- | Describes how the 'SimplCont' will evaluate the hole as a 'SubDemand'.
-- This can be more insightful than the limited syntactic context that
-- 'SimplCont' provides, because the 'Stop' constructor might carry a useful
@@ -597,14 +574,14 @@ mkArgInfo env fun rules n_val_args call_cont
= ArgInfo { ai_fun = fun
, ai_args = []
, ai_rules = fun_rules
- , ai_encl = interestingArgContext rules call_cont
+ , ai_encl = computeArgShapeContext rules call_cont
, ai_dmds = add_type_strictness (idType fun) arg_dmds
, ai_discs = arg_discounts }
where
fun_rules = mkFunRules rules
- vanilla_discounts, arg_discounts :: [Int]
- vanilla_discounts = repeat 0
+ vanilla_discounts, arg_discounts :: [ArgDiscount]
+ vanilla_discounts = repeat NoSeqUse
arg_discounts = case idUnfolding fun of
CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
-> discounts ++ vanilla_discounts
@@ -758,14 +735,14 @@ lazyArgContext :: ArgInfo -> CallCtxt
-- Use this for lazy arguments
lazyArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
| encl_rules = RuleArgCtxt
- | disc:_ <- discs, disc > 0 = DiscArgCtxt -- Be keener here
+ | disc:_ <- discs, disc /= NoSeqUse = DiscArgCtxt -- Be keener here
| otherwise = BoringCtxt -- Nothing interesting
strictArgContext :: ArgInfo -> CallCtxt
strictArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
-- Use this for strict arguments
| encl_rules = RuleArgCtxt
- | disc:_ <- discs, disc > 0 = DiscArgCtxt -- Be keener here
+ | disc:_ <- discs, disc /= NoSeqUse = DiscArgCtxt -- Be keener here
| otherwise = RhsCtxt NonRecursive
-- Why RhsCtxt? if we see f (g x), and f is strict, we
-- want to be a bit more eager to inline g, because it may
@@ -814,7 +791,7 @@ interestingCallContext env cont
-- a build it's *great* to inline it here. So we must ensure that
-- the context for (f x) is not totally uninteresting.
-interestingArgContext :: [CoreRule] -> SimplCont -> Bool
+computeArgShapeContext :: [CoreRule] -> SimplCont -> Bool
-- If the argument has form (f x y), where x,y are boring,
-- and f is marked INLINE, then we don't want to inline f.
-- But if the context of the argument is
@@ -823,7 +800,7 @@ interestingArgContext :: [CoreRule] -> SimplCont -> Bool
-- exposes a rule that might fire. Similarly, if the context is
-- h (g (f x x))
-- where h has rules, then we do want to inline f; hence the
--- call_cont argument to interestingArgContext
+-- call_cont argument to computeArgShapeContext
--
-- The ai-rules flag makes this happen; if it's
-- set, the inliner gets just enough keener to inline f
@@ -833,9 +810,9 @@ interestingArgContext :: [CoreRule] -> SimplCont -> Bool
-- regardless of how boring its context is; but that seems overkill
-- For example, it'd mean that wrapper functions were always inlined
--
--- The call_cont passed to interestingArgContext is the context of
+-- The call_cont passed to computeArgShapeContext is the context of
-- the call itself, e.g. g <hole> in the example above
-interestingArgContext rules call_cont
+computeArgShapeContext rules call_cont
= notNull rules || enclosing_fn_has_rules
where
enclosing_fn_has_rules = go call_cont
@@ -850,78 +827,7 @@ interestingArgContext rules call_cont
go (Stop _ _ _) = False
go (TickIt _ c) = go c
-{- Note [Interesting arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-An argument is interesting if it deserves a discount for unfoldings
-with a discount in that argument position. The idea is to avoid
-unfolding a function that is applied only to variables that have no
-unfolding (i.e. they are probably lambda bound): f x y z There is
-little point in inlining f here.
-
-Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
-we must look through lets, eg (let x = e in C a b), because the let will
-float, exposing the value, if we inline. That makes it different to
-exprIsHNF.
-
-Before 2009 we said it was interesting if the argument had *any* structure
-at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
-But we don't regard (f x y) as interesting, unless f is unsaturated.
-If it's saturated and f hasn't inlined, then it's probably not going
-to now!
-
-Note [Conlike is interesting]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
- f d = ...((*) d x y)...
- ... f (df d')...
-where df is con-like. Then we'd really like to inline 'f' so that the
-rule for (*) (df d) can fire. To do this
- a) we give a discount for being an argument of a class-op (eg (*) d)
- b) we say that a con-like argument (eg (df d)) is interesting
--}
-
-interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
--- See Note [Interesting arguments]
-interestingArg env e = go env 0 e
- where
- -- n is # value args to which the expression is applied
- go env n (Var v)
- = case substId env v of
- DoneId v' -> go_var n v'
- DoneEx e _ -> go (zapSubstEnv env) n e
- ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
-
- go _ _ (Lit l)
- | isLitRubbish l = TrivArg -- Leads to unproductive inlining in WWRec, #20035
- | otherwise = ValueArg
- go _ _ (Type _) = TrivArg
- go _ _ (Coercion _) = TrivArg
- go env n (App fn (Type _)) = go env n fn
- go env n (App fn _) = go env (n+1) fn
- go env n (Tick _ a) = go env n a
- go env n (Cast e _) = go env n e
- go env n (Lam v e)
- | isTyVar v = go env n e
- | n>0 = NonTrivArg -- (\x.b) e is NonTriv
- | otherwise = ValueArg
- go _ _ (Case {}) = NonTrivArg
- go env n (Let b e) = case go env' n e of
- ValueArg -> ValueArg
- _ -> NonTrivArg
- where
- env' = env `addNewInScopeIds` bindersOf b
-
- go_var n v
- | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
- -- data constructors here
- | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
- | n > 0 = NonTrivArg -- Saturated or unknown call
- | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
- -- See Note [Conlike is interesting]
- | otherwise = TrivArg -- n==0, no useful unfolding
- where
- conlike_unfolding = isConLikeUnfolding (idUnfolding v)
{-
************************************************************************
=====================================
compiler/GHC/Core/Ppr.hs
=====================================
@@ -614,7 +614,8 @@ instance Outputable UnfoldingGuidance where
text "boring_ok=" <> ppr boring_ok)
ppr (UnfIfGoodArgs { ug_args = cs, ug_size = size, ug_res = discount })
= hsep [ text "IF_ARGS",
- brackets (hsep (map int cs)),
+ ppUnlessOption sdocSuppressUnfoldingGuidance
+ (brackets (hsep (map ppr cs))),
int size,
int discount ]
=====================================
compiler/GHC/Core/Seq.hs
=====================================
@@ -23,6 +23,8 @@ import GHC.Types.Var( varType, tyVarKind )
import GHC.Core.Type( seqType, isTyVar )
import GHC.Core.Coercion( seqCo )
import GHC.Types.Id( idInfo )
+import GHC.Utils.Misc (seqList)
+import GHC.Types.Unique.FM (seqEltsUFM)
-- | Evaluate all the fields of the 'IdInfo' that are generally demanded by the
-- compiler
@@ -112,5 +114,15 @@ seqUnfolding (CoreUnfolding { uf_tmpl = e, uf_is_top = top,
seqUnfolding _ = ()
seqGuidance :: UnfoldingGuidance -> ()
-seqGuidance (UnfIfGoodArgs ns n b) = n `seq` sum ns `seq` b `seq` ()
+seqGuidance (UnfIfGoodArgs ns n b) = n `seq` (seqList (map seqArgDiscount ns) ()) `seq` b `seq` ()
seqGuidance _ = ()
+
+-- seqTopDiscount :: (a, ArgDiscount) -> ()
+-- seqTopDiscount (!_,dc) = seqArgDiscount dc
+
+seqArgDiscount :: ArgDiscount -> ()
+seqArgDiscount (DiscSeq !_ sub_args) = seqEltsUFM seqConDiscount sub_args
+seqArgDiscount !_ = ()
+
+seqConDiscount :: ConDiscount -> ()
+seqConDiscount (ConDiscount !_ !_ sub_args) = seqList (map seqArgDiscount sub_args) ()
\ No newline at end of file
=====================================
compiler/GHC/Core/Unfold.hs
=====================================
@@ -26,20 +26,19 @@ module GHC.Core.Unfold (
updateFunAppDiscount, updateDictDiscount,
updateVeryAggressive, updateCaseScaling,
updateCaseThreshold, updateReportPrefix,
+ updateMaxDiscountDepth, updateMaxGuideDepth,
- ArgSummary(..),
+ ArgSummary(..), nonTrivArg,
couldBeSmallEnoughToInline, inlineBoringOk,
smallEnoughToInline,
- callSiteInline, CallCtxt(..),
+ CallCtxt(..),
calcUnfoldingGuidance
) where
import GHC.Prelude
-import GHC.Driver.Flags
-
import GHC.Core
import GHC.Core.Utils
import GHC.Types.Id
@@ -51,16 +50,19 @@ import GHC.Types.RepType ( isZeroBitTy )
import GHC.Types.Basic ( Arity, RecFlag(..) )
import GHC.Core.Type
import GHC.Builtin.Names
-import GHC.Data.Bag
-import GHC.Utils.Logger
import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Types.ForeignCall
-import GHC.Types.Name
import GHC.Types.Tickish
import qualified Data.ByteString as BS
-import Data.List (isPrefixOf)
+import GHC.Types.Unique.FM
+import Data.Maybe
+import GHC.Types.Var.Env
+import GHC.Utils.Panic.Plain (assert)
+import GHC.Data.Graph.UnVar
+import GHC.Utils.Trace (pprTraceDebug)
+
-- | Unfolding options
@@ -88,6 +90,11 @@ data UnfoldingOpts = UnfoldingOpts
, unfoldingReportPrefix :: !(Maybe String)
-- ^ Only report inlining decisions for names with this prefix
+
+ , unfoldingMaxDiscountDepth :: !Int
+ -- ^ When considering unfolding a definition look this deep
+ -- into the applied arguments and into nested argument use.
+
}
defaultUnfoldingOpts :: UnfoldingOpts
@@ -122,6 +129,8 @@ defaultUnfoldingOpts = UnfoldingOpts
-- Don't filter inlining decision reports
, unfoldingReportPrefix = Nothing
+
+ , unfoldingMaxDiscountDepth = 20
}
-- Helpers for "GHC.Driver.Session"
@@ -151,6 +160,10 @@ updateCaseScaling n opts = opts { unfoldingCaseScaling = n }
updateReportPrefix :: Maybe String -> UnfoldingOpts -> UnfoldingOpts
updateReportPrefix n opts = opts { unfoldingReportPrefix = n }
+updateMaxDiscountDepth :: Int -> UnfoldingOpts -> UnfoldingOpts
+updateMaxDiscountDepth n opts = opts { unfoldingMaxDiscountDepth = n }
+
+
{-
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -258,17 +271,11 @@ calcUnfoldingGuidance opts is_top_bottoming expr
val_bndrs = filter isId bndrs
n_val_bndrs = length val_bndrs
- mk_discount :: Bag (Id,Int) -> Id -> Int
- mk_discount cbs bndr = foldl' combine 0 cbs
- where
- combine acc (bndr', disc)
- | bndr == bndr' = acc `plus_disc` disc
- | otherwise = acc
-
- plus_disc :: Int -> Int -> Int
- plus_disc | isFunTy (idType bndr) = max
- | otherwise = (+)
- -- See Note [Function and non-function discounts]
+ mk_discount :: VarEnv ArgDiscount -> Id -> (ArgDiscount)
+ mk_discount cbs bndr =
+ let !dc = lookupWithDefaultVarEnv cbs NoSeqUse bndr
+ -- !depth = discountDepth dc
+ in (dc)
{- Note [Inline unsafeCoerce]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -402,6 +409,39 @@ the constructor would lead to a separate allocation instead of just
sharing the wrapper closure.
The solution: don’t ignore coercion arguments after all.
+
+Note [Nested discounts]
+~~~~~~~~~~~~~~~~~~~~~~~
+If we have code like:
+
+f :: Either Bool Int -> Int
+f x = case x of
+ Left l -> case l of
+ True -> 1
+ False -> sum [1..200] :: Int
+ Right r -> case r of
+ r -> r + succ r
+
+What should the discount be? I argue it should be a discount *tree* like this:
+ [some_con:62,
+ Left:43
+ [some_con:30, False:10[], True:49[]],
+ Right:70
+ [some_con:20, GHC.Types.I#:11[disc:30]]
+ ]
+
+How can we compute this? While we traverse the expression if we see a case on a interesting
+binder (e.g. x):
+* We look at all the alternatives(Left,Right), treating the constructor bound vars(l,r) as
+ additional interesting binders for their rhss.
+* We compute a default discount `some_con` which assumes we will statically choose the largest
+ alternative if we inline.
+* We compute specific discounts for each constructor alternative by attributing any
+ discount on alternative binders(l,r) to the constructor this alternative is matching on.
+* The discounts for the whole case are then represented by the combination of the flat default discount
+ as well as a list of constructor specific discounts, which carry inside them the discounts for their
+ arguments.
+
-}
uncondInline :: CoreExpr -> Arity -> Int -> Bool
@@ -412,6 +452,7 @@ uncondInline rhs arity size
| arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
| otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
+-- {-# NOINLINE sizeExpr #-}
sizeExpr :: UnfoldingOpts
-> Int -- Bomb out if it gets bigger than this
-> [Id] -- Arguments; we're interested in which of these
@@ -423,81 +464,107 @@ sizeExpr :: UnfoldingOpts
-- Forcing bOMB_OUT_SIZE early prevents repeated
-- unboxing of the Int argument.
-sizeExpr opts !bOMB_OUT_SIZE top_args expr
- = size_up expr
+sizeExpr opts !bOMB_OUT_SIZE top_args' expr
+ = size_up depth_limit (mkUnVarSet top_args') expr
where
- size_up (Cast e _) = size_up e
- size_up (Tick _ e) = size_up e
- size_up (Type _) = sizeZero -- Types cost nothing
- size_up (Coercion _) = sizeZero
- size_up (Lit lit) = sizeN (litSize lit)
- size_up (Var f) | isZeroBitId f = sizeZero
- -- Make sure we get constructor discounts even
- -- on nullary constructors
- | otherwise = size_up_call f [] 0
-
- size_up (App fun arg)
- | isTyCoArg arg = size_up fun
- | otherwise = size_up arg `addSizeNSD`
- size_up_app fun [arg] (if isZeroBitExpr arg then 1 else 0)
-
- size_up (Lam b e)
- | isId b && not (isZeroBitId b) = lamScrutDiscount opts (size_up e `addSizeN` 10)
- | otherwise = size_up e
-
- size_up (Let (NonRec binder rhs) body)
- = size_up_rhs (binder, rhs) `addSizeNSD`
- size_up body `addSizeN`
+ depth_limit = unfoldingMaxDiscountDepth opts
+ size_up :: Int -> UnVarSet -> Expr Var -> ExprSize
+ size_up !depth !arg_comps (Cast e _) = size_up depth arg_comps e
+ size_up !depth arg_comps (Tick _ e) = size_up depth arg_comps e
+ size_up !_depth _arg_comps (Type _) = sizeZero -- Types cost nothing
+ size_up !_depth _arg_comps (Coercion _) = sizeZero
+ size_up !_depth _arg_comps (Lit lit) = sizeN (litSize lit)
+ size_up !_depth arg_comps (Var f) | isZeroBitId f = sizeZero
+ -- Make sure we get constructor discounts even
+ -- on nullary constructors
+ | otherwise = size_up_call arg_comps f [] 0
+
+ size_up !depth arg_comps (App fun arg)
+ | isTyCoArg arg = size_up depth arg_comps fun
+ | otherwise = size_up depth arg_comps arg `addSizeNSD`
+ size_up_app depth arg_comps fun [arg] (if isZeroBitExpr arg then 1 else 0)
+
+ size_up !depth arg_comps (Lam b e)
+ | isId b && not (isZeroBitId b) = lamScrutDiscount opts (size_up depth (delUnVarSet arg_comps b) e `addSizeN` 10)
+ | otherwise = size_up depth (delUnVarSet arg_comps b) e
+
+ size_up !depth arg_comps (Let (NonRec binder rhs) body)
+ = let arg_comps' = delUnVarSet arg_comps binder
+ in
+ size_up_rhs depth arg_comps' (binder, rhs) `addSizeNSD`
+ size_up depth arg_comps' body `addSizeN`
size_up_alloc binder
- size_up (Let (Rec pairs) body)
- = foldr (addSizeNSD . size_up_rhs)
- (size_up body `addSizeN` sum (map (size_up_alloc . fst) pairs))
+ size_up !depth arg_comps (Let (Rec pairs) body)
+ = let lhs_bnds = map fst pairs
+ arg_comps' = delUnVarSetList arg_comps lhs_bnds
+ in
+ foldr (addSizeNSD . (size_up_rhs depth arg_comps'))
+ (size_up depth arg_comps' body `addSizeN` sum (map (size_up_alloc . fst) pairs))
pairs
- size_up (Case e _ _ alts)
+ size_up !depth arg_comps (Case e _ _ alts)
| null alts
- = size_up e -- case e of {} never returns, so take size of scrutinee
+ = size_up depth arg_comps e -- case e of {} never returns, so take size of scrutinee
- size_up (Case e _ _ alts)
+ size_up !depth arg_comps (Case e _ _ alts)
-- Now alts is non-empty
- | Just v <- is_top_arg e -- We are scrutinising an argument variable
+ -- We are scrutinising an argument variable or a subcomponent thereof.
+ | Just v <- is_top_arg e
= let
- alt_sizes = map size_up_alt alts
-
- -- alts_size tries to compute a good discount for
- -- the case when we are scrutinising an argument variable
+ -- Compute size of alternatives
+ alt_sizes = map (size_up_alt depth (Just v) arg_comps) alts
+
+ -- Apply a discount for a given constructor that brings the size down to just
+ -- the size of the alternative.
+ alt_size_discount tot_size (Alt (DataAlt con) alt_bndrs _rhs) (SizeIs alt_size _ _) =
+ let trim_discount = max 10 $ tot_size - alt_size
+ in Just (unitUFM con (ConDiscount con trim_discount (map (const NoSeqUse) alt_bndrs)))
+ alt_size_discount _tot_size _ _alt_size = Nothing
+
+ -- Add up discounts from the alternatives
+ added_alt_sizes = (foldr1 addAltSize alt_sizes)
+ -- Compute size of the largest rhs
+ largest_alt_size = (foldr (maxSize bOMB_OUT_SIZE) 0 alt_sizes)
+
+ -- alts_size tries to compute a good discount for
+ -- the case when we are scrutinising an argument variable or subcomponent thereof
alts_size (SizeIs tot tot_disc tot_scrut)
- -- Size of all alternatives
- (SizeIs max _ _)
- -- Size of biggest alternative
- = SizeIs tot (unitBag (v, 20 + tot - max)
- `unionBags` tot_disc) tot_scrut
- -- If the variable is known, we produce a
- -- discount that will take us back to 'max',
- -- the size of the largest alternative The
- -- 1+ is a little discount for reduced
- -- allocation in the caller
+ largest_alt_size
+
+ = let default_alt_discount = 20 + tot - largest_alt_size
+ alt_discounts = unitUFM v $ DiscSeq default_alt_discount $ plusUFMList $ catMaybes $ zipWith (alt_size_discount tot) alts alt_sizes
+ in
+ SizeIs tot
+ (tot_disc
+ `plusDiscountEnv` (alt_discounts))
+ tot_scrut
+ -- If the variable is known but we don't have a
+ -- specific constructor discount for it, we produce a
+ -- discount that will take us back to 'largest_alt_size',
+ -- the size of the largest alternative.
--
-- Notice though, that we return tot_disc,
-- the total discount from all branches. I
-- think that's right.
- alts_size tot_size _ = tot_size
+ alts_size tot_size _max_alt_size = tot_size -- Too Big
in
- alts_size (foldr1 addAltSize alt_sizes) -- alts is non-empty
- (foldr1 maxSize alt_sizes)
+ -- Why foldr1? We might get TooBig already after the first few alternatives
+ -- in which case we don't have to look at the remaining ones.
+ alts_size added_alt_sizes -- alts is non-empty
+ largest_alt_size
-- Good to inline if an arg is scrutinised, because
-- that may eliminate allocation in the caller
-- And it eliminates the case itself
where
- is_top_arg (Var v) | v `elem` top_args = Just v
+ is_top_arg (Var v) | v `elemUnVarSet` arg_comps = Just v
is_top_arg (Cast e _) = is_top_arg e
is_top_arg _ = Nothing
- size_up (Case e _ _ alts) = size_up e `addSizeNSD`
- foldr (addAltSize . size_up_alt) case_size alts
+ size_up !depth arg_comps (Case e _ _ alts) = size_up depth arg_comps e `addSizeNSD`
+ foldr (addAltSize . (size_up_alt depth Nothing arg_comps) ) case_size alts
where
case_size
| is_inline_scrut e, lengthAtMost alts 1 = sizeN (-10)
@@ -534,42 +601,54 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr
| otherwise
= False
- size_up_rhs (bndr, rhs)
+ size_up_rhs !depth !arg_comps (bndr, rhs)
| Just join_arity <- isJoinId_maybe bndr
-- Skip arguments to join point
- , (_bndrs, body) <- collectNBinders join_arity rhs
- = size_up body
+ , (bndrs, body) <- collectNBinders join_arity rhs
+ = size_up depth (delUnVarSetList arg_comps bndrs) body
| otherwise
- = size_up rhs
+ = size_up depth arg_comps rhs
------------
-- size_up_app is used when there's ONE OR MORE value args
- size_up_app (App fun arg) args voids
- | isTyCoArg arg = size_up_app fun args voids
- | isZeroBitExpr arg = size_up_app fun (arg:args) (voids + 1)
- | otherwise = size_up arg `addSizeNSD`
- size_up_app fun (arg:args) voids
- size_up_app (Var fun) args voids = size_up_call fun args voids
- size_up_app (Tick _ expr) args voids = size_up_app expr args voids
- size_up_app (Cast expr _) args voids = size_up_app expr args voids
- size_up_app other args voids = size_up other `addSizeN`
+ size_up_app depth !arg_comps (App fun arg) args voids
+ | isTyCoArg arg = size_up_app depth arg_comps fun args voids
+ | isZeroBitExpr arg = size_up_app depth arg_comps fun (arg:args) (voids + 1)
+ | otherwise = size_up depth arg_comps arg `addSizeNSD`
+ size_up_app depth arg_comps fun (arg:args) voids
+ size_up_app _depth arg_comps (Var fun) args voids = size_up_call arg_comps fun args voids
+ size_up_app depth arg_comps (Tick _ expr) args voids = size_up_app depth arg_comps expr args voids
+ size_up_app depth arg_comps (Cast expr _) args voids = size_up_app depth arg_comps expr args voids
+ size_up_app depth arg_comps other args voids = size_up depth arg_comps other `addSizeN`
callSize (length args) voids
-- if the lhs is not an App or a Var, or an invisible thing like a
-- Tick or Cast, then we should charge for a complete call plus the
-- size of the lhs itself.
------------
- size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
- size_up_call fun val_args voids
+ size_up_call :: UnVarSet -> Id -> [CoreExpr] -> Int -> ExprSize
+ size_up_call !arg_comps fun val_args voids
= case idDetails fun of
FCallId _ -> sizeN (callSize (length val_args) voids)
DataConWorkId dc -> conSize dc (length val_args)
PrimOpId op _ -> primOpSize op (length val_args)
- ClassOpId _ -> classOpSize opts top_args val_args
- _ -> funSize opts top_args fun (length val_args) voids
+ ClassOpId _ -> classOpSize opts arg_comps val_args
+ _ -> funSize opts arg_comps fun (length val_args) voids
------------
- size_up_alt (Alt _con _bndrs rhs) = size_up rhs `addSizeN` 10
+ -- Take into acount the binders of scrutinized argument binders
+ -- But not too deeply! Hence we check if we exhausted depth.
+ -- If so we simply ingore the case binders.
+ size_up_alt depth m_top_arg !arg_comps (Alt alt_con bndrs rhs)
+ | Just top_arg <- m_top_arg
+ , depth > 0
+ , DataAlt con <- alt_con
+ =
+ let alt_size = size_up depth (extendUnVarSetList bndrs arg_comps) rhs `addSizeN` 10
+ -- let alt_size = size_up (arg_comps) rhs `addSizeN` 10
+
+ in asExprSize top_arg alt_size con bndrs
+ size_up_alt depth _ arg_comps (Alt _con bndrs rhs) = size_up depth (delUnVarSetList arg_comps bndrs) rhs `addSizeN` 10
-- Don't charge for args, so that wrappers look cheap
-- (See comments about wrappers with Case)
--
@@ -590,6 +669,7 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr
------------
-- These addSize things have to be here because
-- I don't want to give them bOMB_OUT_SIZE as an argument
+ addSizeN :: ExprSize -> Int -> ExprSize
addSizeN TooBig _ = TooBig
addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
@@ -598,7 +678,7 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr
addAltSize _ TooBig = TooBig
addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
= mkSizeIs bOMB_OUT_SIZE (n1 + n2)
- (xs `unionBags` ys)
+ (xs `plusDiscountEnv` ys)
(d1 + d2) -- Note [addAltSize result discounts]
-- This variant ignores the result discount from its LEFT argument
@@ -607,7 +687,7 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr
addSizeNSD _ TooBig = TooBig
addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
= mkSizeIs bOMB_OUT_SIZE (n1 + n2)
- (xs `unionBags` ys)
+ (xs `plusDiscountEnv` ys)
d2 -- Ignore d1
-- don't count expressions such as State# RealWorld
@@ -631,21 +711,23 @@ litSize _other = 0 -- Must match size of nullary constructors
-- Key point: if x |-> 4, then x must inline unconditionally
-- (eg via case binding)
-classOpSize :: UnfoldingOpts -> [Id] -> [CoreExpr] -> ExprSize
+classOpSize :: UnfoldingOpts -> UnVarSet -> [CoreExpr] -> ExprSize
-- See Note [Conlike is interesting]
classOpSize _ _ []
= sizeZero
-classOpSize opts top_args (arg1 : other_args)
- = SizeIs size arg_discount 0
+classOpSize opts !top_args (arg1 : other_args)
+ = SizeIs size (arg_discount arg1) 0
where
size = 20 + (10 * length other_args)
-- If the class op is scrutinising a lambda bound dictionary then
-- give it a discount, to encourage the inlining of this function
-- The actual discount is rather arbitrarily chosen
- arg_discount = case arg1 of
- Var dict | dict `elem` top_args
- -> unitBag (dict, unfoldingDictDiscount opts)
- _other -> emptyBag
+ arg_discount dict_arg = case dict_arg of
+ Var dict | dict `elemUnVarSet` top_args
+ -> unitUFM dict (classOpArgDiscount $ unfoldingDictDiscount opts)
+ Tick _ e -> arg_discount e
+ Cast e _ -> arg_discount e
+ _other -> mempty
-- | The size of a function call
callSize
@@ -668,10 +750,10 @@ jumpSize n_val_args voids = 2 * (1 + n_val_args - voids)
-- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
-- better solution?
-funSize :: UnfoldingOpts -> [Id] -> Id -> Int -> Int -> ExprSize
+funSize :: UnfoldingOpts -> UnVarSet -> Id -> Int -> Int -> ExprSize
-- Size for functions that are not constructors or primops
-- Note [Function applications]
-funSize opts top_args fun n_val_args voids
+funSize opts !top_args fun n_val_args voids
| fun `hasKey` buildIdKey = buildSize
| fun `hasKey` augmentIdKey = augmentSize
| otherwise = SizeIs size arg_discount res_discount
@@ -685,9 +767,10 @@ funSize opts top_args fun n_val_args voids
-- DISCOUNTS
-- See Note [Function and non-function discounts]
- arg_discount | some_val_args && fun `elem` top_args
- = unitBag (fun, unfoldingFunAppDiscount opts)
- | otherwise = emptyBag
+ arg_discount | some_val_args && fun `elemUnVarSet` top_args
+ = -- pprTrace "mkFunSize" (ppr fun) $
+ unitUFM fun (FunDisc (unfoldingFunAppDiscount opts) (idName fun))
+ | otherwise = mempty
-- If the function is an argument and is applied
-- to some values, give it an arg-discount
@@ -698,13 +781,13 @@ funSize opts top_args fun n_val_args voids
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs 0 emptyBag 10 -- Like variables
+ | n_val_args == 0 = SizeIs 0 mempty 10 -- Like variables
-- See Note [Unboxed tuple size and result discount]
- | isUnboxedTupleDataCon dc = SizeIs 0 emptyBag 10
+ | isUnboxedTupleDataCon dc = SizeIs 0 mempty 10
-- See Note [Constructor size and result discount]
- | otherwise = SizeIs 10 emptyBag 10
+ | otherwise = SizeIs 10 mempty 10
{- Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -808,7 +891,7 @@ primOpSize op n_val_args
buildSize :: ExprSize
-buildSize = SizeIs 0 emptyBag 40
+buildSize = SizeIs 0 mempty 40
-- We really want to inline applications of build
-- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
-- Indeed, we should add a result_discount because build is
@@ -817,7 +900,7 @@ buildSize = SizeIs 0 emptyBag 40
-- The "4" is rather arbitrary.
augmentSize :: ExprSize
-augmentSize = SizeIs 0 emptyBag 40
+augmentSize = SizeIs 0 mempty 40
-- Ditto (augment t (\cn -> e) ys) should cost only the cost of
-- e plus ys. The -2 accounts for the \cn
@@ -899,37 +982,86 @@ Code for manipulating sizes
data ExprSize
= TooBig
| SizeIs { _es_size_is :: {-# UNPACK #-} !Int -- ^ Size found
- , _es_args :: !(Bag (Id,Int))
+ , _es_args :: !(VarEnv ArgDiscount)
-- ^ Arguments cased herein, and discount for each such
, _es_discount :: {-# UNPACK #-} !Int
-- ^ Size to subtract if result is scrutinised by a case
-- expression
}
+plusDiscountEnv :: VarEnv ArgDiscount -> VarEnv ArgDiscount -> VarEnv ArgDiscount
+plusDiscountEnv el er = plusUFM_C combineArgDiscount el er
+
+-- TODO: Might be worth giving this a larger discount if the type class is known.
+-- So that `f @T $d x = opDoStuff @T $d x ` applied to `f @Bool $dC_$Bool` is likely
+-- to inline turning the unknown into a known call.
+classOpArgDiscount :: Int -> ArgDiscount
+classOpArgDiscount n = SomeArgUse n
+
+-- After computing the discounts for an alternatives rhs we transfer discounts from the
+-- alt binders to the constructor specific discount of the scrutinee for the given constructor.
+asExprSize :: Id -> ExprSize -> DataCon -> [Id] -> ExprSize
+asExprSize _ TooBig _ _ = TooBig
+asExprSize scrut (SizeIs n arg_discs s_d) con alt_bndrs =
+ let (alt_discount_bags, top_discounts) = partitionWithKeyUFM (\k _v -> k `elem` map getUnique alt_bndrs) arg_discs
+ alt_discount_map = alt_discount_bags
+ alt_bndr_uses = map (\bndr -> lookupWithDefaultVarEnv alt_discount_map NoSeqUse bndr ) alt_bndrs :: [ArgDiscount]
+
+ in SizeIs n (unitUFM scrut (mkConUse con alt_bndr_uses) `plusUFM` top_discounts) s_d
+
+
+mkConUse :: DataCon -> [ArgDiscount] -> ArgDiscount
+mkConUse con uses =
+ DiscSeq
+ 0
+ -- We apply a penalty of 1 per case alternative, so here we apply a discount of 1 per *eliminated*
+ -- case alternative. And then one more because we get rid of a conditional branch which is always good.
+ (unitUFM con (ConDiscount con (length uses) uses))
+
+combineArgDiscount :: ArgDiscount -> ArgDiscount -> ArgDiscount
+combineArgDiscount NoSeqUse u2 = u2
+combineArgDiscount u1 NoSeqUse = u1
+combineArgDiscount (SomeArgUse d1) (SomeArgUse d2) = SomeArgUse $! d1 + d2
+combineArgDiscount (SomeArgUse d1) (DiscSeq d2 m2) = DiscSeq (d1 + d2) m2
+combineArgDiscount (DiscSeq d1 m1) (SomeArgUse d2) = DiscSeq (d1 + d2) m1
+combineArgDiscount (DiscSeq d1 m1) (DiscSeq d2 m2) = DiscSeq (d1 + d2) (plusUFM_C combineMapEntry m1 m2)
+-- See Note [Function and non-function discounts] why we need this
+combineArgDiscount f1@(FunDisc d1 _f1) f2@(FunDisc d2 _f2) = if d1 > d2 then f1 else f2
+-- This can happen either through shadowing or with things like unsafeCoerce. A good idea to warn for debug builds but we don't want to panic here.
+combineArgDiscount f1@(FunDisc _d _n) u2 = pprTraceDebug "Variable seemingly discounted as both function and constructor" (ppr f1 $$ ppr u2) f1
+combineArgDiscount u1 f2@(FunDisc _d _n) = pprTraceDebug "Variable seemingly discounted as both function and constructor" (ppr u1 $$ ppr f2) f2
+
+combineMapEntry :: ConDiscount -> ConDiscount -> ConDiscount
+combineMapEntry (ConDiscount c1 dc1 u1) (ConDiscount c2 dc2 u2) =
+ assert(c1 == c2) $ ConDiscount c1 (dc1+dc2) (combinArgDiscountLists u1 u2)
+
+combinArgDiscountLists :: [ArgDiscount] -> [ArgDiscount] -> [ArgDiscount]
+combinArgDiscountLists uses1 uses2 = zipWith combineArgDiscount uses1 uses2
+
instance Outputable ExprSize where
ppr TooBig = text "TooBig"
- ppr (SizeIs a _ c) = brackets (int a <+> int c)
+ -- ppr (SizeIs a _ c) = brackets (int a <+> int c)
+ ppr (SizeIs a d c) = brackets (int a <+> int c <+> ppr d)
-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
-- tup = (a_1, ..., a_99)
-- x = case tup of ...
--
-mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
+mkSizeIs :: Int -> Int -> VarEnv ArgDiscount -> Int -> ExprSize
mkSizeIs max n xs d | (n - d) > max = TooBig
| otherwise = SizeIs n xs d
-maxSize :: ExprSize -> ExprSize -> ExprSize
-maxSize TooBig _ = TooBig
-maxSize _ TooBig = TooBig
-maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 > n2 = s1
- | otherwise = s2
+maxSize :: Int -> ExprSize -> Int -> Int
+maxSize !max TooBig _n1 = max
+maxSize _max (SizeIs n1 _ _) n2 | n1 >= n2 = n1
+ | otherwise = n2
sizeZero :: ExprSize
sizeN :: Int -> ExprSize
-sizeZero = SizeIs 0 emptyBag 0
-sizeN n = SizeIs n emptyBag 0
+sizeZero = SizeIs 0 mempty 0
+sizeN n = SizeIs n mempty 0
{-
************************************************************************
@@ -990,15 +1122,21 @@ data ArgSummary = TrivArg -- Nothing interesting
| NonTrivArg -- Arg has structure
| ValueArg -- Arg is a con-app or PAP
-- ..or con-like. Note [Conlike is interesting]
+ | ConArg !DataCon
+ [ArgSummary] -- It's important that the sub-summaries are
+ -- computed lazily. This way they only get forced
+ -- if there is a interesting discount which matches
+ -- on them. Which improves performance quite a lot.
instance Outputable ArgSummary where
ppr TrivArg = text "TrivArg"
ppr NonTrivArg = text "NonTrivArg"
ppr ValueArg = text "ValueArg"
+ ppr (ConArg con _args) = text "ConArg:" <> ppr con -- <+> ppr args
-nonTriv :: ArgSummary -> Bool
-nonTriv TrivArg = False
-nonTriv _ = True
+nonTrivArg :: ArgSummary -> Bool
+nonTrivArg TrivArg = False
+nonTrivArg _ = True
data CallCtxt
= BoringCtxt
@@ -1021,474 +1159,3 @@ instance Outputable CallCtxt where
ppr DiscArgCtxt = text "DiscArgCtxt"
ppr RuleArgCtxt = text "RuleArgCtxt"
-callSiteInline :: Logger
- -> UnfoldingOpts
- -> Int -- Case depth
- -> Id -- The Id
- -> Bool -- True <=> unfolding is active
- -> Bool -- True if there are no arguments at all (incl type args)
- -> [ArgSummary] -- One for each value arg; True if it is interesting
- -> CallCtxt -- True <=> continuation is interesting
- -> Maybe CoreExpr -- Unfolding, if any
-callSiteInline logger opts !case_depth id active_unfolding lone_variable arg_infos cont_info
- = case idUnfolding id of
- -- idUnfolding checks for loop-breakers, returning NoUnfolding
- -- Things with an INLINE pragma may have an unfolding *and*
- -- be a loop breaker (maybe the knot is not yet untied)
- CoreUnfolding { uf_tmpl = unf_template
- , uf_is_work_free = is_wf
- , uf_guidance = guidance, uf_expandable = is_exp }
- | active_unfolding -> tryUnfolding logger opts case_depth id lone_variable
- arg_infos cont_info unf_template
- is_wf is_exp guidance
- | otherwise -> traceInline logger opts id "Inactive unfolding:" (ppr id) Nothing
- NoUnfolding -> Nothing
- BootUnfolding -> Nothing
- OtherCon {} -> Nothing
- DFunUnfolding {} -> Nothing -- Never unfold a DFun
-
--- | Report the inlining of an identifier's RHS to the user, if requested.
-traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a
-traceInline logger opts inline_id str doc result
- -- We take care to ensure that doc is used in only one branch, ensuring that
- -- the simplifier can push its allocation into the branch. See Note [INLINE
- -- conditional tracing utilities].
- | enable = logTraceMsg logger str doc result
- | otherwise = result
- where
- enable
- | logHasDumpFlag logger Opt_D_dump_verbose_inlinings
- = True
- | Just prefix <- unfoldingReportPrefix opts
- = prefix `isPrefixOf` occNameString (getOccName inline_id)
- | otherwise
- = False
-{-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities]
-
-{- Note [Avoid inlining into deeply nested cases]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Consider a function f like this:
-
- f arg1 arg2 =
- case ...
- ... -> g arg1
- ... -> g arg2
-
-This function is small. So should be safe to inline.
-However sometimes this doesn't quite work out like that.
-Consider this code:
-
-f1 arg1 arg2 ... = ...
- case _foo of
- alt1 -> ... f2 arg1 ...
- alt2 -> ... f2 arg2 ...
-
-f2 arg1 arg2 ... = ...
- case _foo of
- alt1 -> ... f3 arg1 ...
- alt2 -> ... f3 arg2 ...
-
-f3 arg1 arg2 ... = ...
-
-... repeats up to n times. And then f1 is
-applied to some arguments:
-
-foo = ... f1 <interestingArgs> ...
-
-Initially f2..fn are not interesting to inline so we don't.
-However we see that f1 is applied to interesting args.
-So it's an obvious choice to inline those:
-
-foo =
- ...
- case _foo of
- alt1 -> ... f2 <interestingArg> ...
- alt2 -> ... f2 <interestingArg> ...
-
-As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting
-arguments and f2 is small:
-
-foo =
- ...
- case _foo of
- alt1 -> ... case _foo of
- alt1 -> ... f3 <interestingArg> ...
- alt2 -> ... f3 <interestingArg> ...
-
- alt2 -> ... case _foo of
- alt1 -> ... f3 <interestingArg> ...
- alt2 -> ... f3 <interestingArg> ...
-
-The same thing happens for each binding up to f_n, duplicating the amount of inlining
-done in each step. Until at some point we are either done or run out of simplifier
-ticks/RAM. This pattern happened #18730.
-
-To combat this we introduce one more heuristic when weighing inlining decision.
-We keep track of a "case-depth". Which increases each time we look inside a case
-expression with more than one alternative.
-
-We then apply a penalty to inlinings based on the case-depth at which they would
-be inlined. Bounding the number of inlinings in such a scenario.
-
-The heuristic can be tuned in two ways:
-
-* We can ignore the first n levels of case nestings for inlining decisions using
- -funfolding-case-threshold.
-* The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling.
- Scaling can be set with -funfolding-case-scaling.
-
-Some guidance on setting these defaults:
-
-* A low treshold (<= 2) is needed to prevent exponential cases from spiraling out of
- control. We picked 2 for no particular reason.
-* Scaling the penalty by any more than 30 means the reproducer from
- T18730 won't compile even with reasonably small values of n. Instead
- it will run out of runs/ticks. This means to positively affect the reproducer
- a scaling <= 30 is required.
-* A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks.
- (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps)
-* A scaling of >= 25 showed no regressions on nofib. However it showed a number of
- (small) regression for compiler perf benchmarks.
-
-The end result is that we are settling for a scaling of 30, with a threshold of 2.
-This gives us minimal compiler perf regressions. No nofib runtime regressions and
-will still avoid this pattern sometimes. This is a "safe" default, where we err on
-the side of compiler blowup instead of risking runtime regressions.
-
-For cases where the default falls short the flag can be changed to allow more/less inlining as
-needed on a per-module basis.
-
--}
-
-tryUnfolding :: Logger -> UnfoldingOpts -> Int -> Id -> Bool -> [ArgSummary] -> CallCtxt
- -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance
- -> Maybe CoreExpr
-tryUnfolding logger opts !case_depth id lone_variable
- arg_infos cont_info unf_template
- is_wf is_exp guidance
- = case guidance of
- UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing
-
- UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
- | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts)
- -- See Note [INLINE for small functions] (3)
- -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template)
- | otherwise
- -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing
- where
- some_benefit = calc_some_benefit uf_arity
- enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
-
- UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
- | unfoldingVeryAggressive opts
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
- | is_wf && some_benefit && small_enough
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template)
- | otherwise
- -> traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing
- where
- some_benefit = calc_some_benefit (length arg_discounts)
- extra_doc = vcat [ text "case depth =" <+> int case_depth
- , text "depth based penalty =" <+> int depth_penalty
- , text "discounted size =" <+> int adjusted_size ]
- -- See Note [Avoid inlining into deeply nested cases]
- depth_treshold = unfoldingCaseThreshold opts
- depth_scaling = unfoldingCaseScaling opts
- depth_penalty | case_depth <= depth_treshold = 0
- | otherwise = (size * (case_depth - depth_treshold)) `div` depth_scaling
- adjusted_size = size + depth_penalty - discount
- small_enough = adjusted_size <= unfoldingUseThreshold opts
- discount = computeDiscount arg_discounts res_discount arg_infos cont_info
-
- where
- mk_doc some_benefit extra_doc yes_or_no
- = vcat [ text "arg infos" <+> ppr arg_infos
- , text "interesting continuation" <+> ppr cont_info
- , text "some_benefit" <+> ppr some_benefit
- , text "is exp:" <+> ppr is_exp
- , text "is work-free:" <+> ppr is_wf
- , text "guidance" <+> ppr guidance
- , extra_doc
- , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
-
- ctx = log_default_dump_context (logFlags logger)
- str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id)
- n_val_args = length arg_infos
-
- -- some_benefit is used when the RHS is small enough
- -- and the call has enough (or too many) value
- -- arguments (ie n_val_args >= arity). But there must
- -- be *something* interesting about some argument, or the
- -- result context, to make it worth inlining
- calc_some_benefit :: Arity -> Bool -- The Arity is the number of args
- -- expected by the unfolding
- calc_some_benefit uf_arity
- | not saturated = interesting_args -- Under-saturated
- -- Note [Unsaturated applications]
- | otherwise = interesting_args -- Saturated or over-saturated
- || interesting_call
- where
- saturated = n_val_args >= uf_arity
- over_saturated = n_val_args > uf_arity
- interesting_args = any nonTriv arg_infos
- -- NB: (any nonTriv arg_infos) looks at the
- -- over-saturated args too which is "wrong";
- -- but if over-saturated we inline anyway.
-
- interesting_call
- | over_saturated
- = True
- | otherwise
- = case cont_info of
- CaseCtxt -> not (lone_variable && is_exp) -- Note [Lone variables]
- ValAppCtxt -> True -- Note [Cast then apply]
- RuleArgCtxt -> uf_arity > 0 -- See Note [RHS of lets]
- DiscArgCtxt -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
- RhsCtxt NonRecursive
- -> uf_arity > 0 -- See Note [RHS of lets]
- _other -> False -- See Note [Nested functions]
-
-
-{- Note [RHS of lets]
-~~~~~~~~~~~~~~~~~~~~~
-When the call is the argument of a function with a RULE, or the RHS of a let,
-we are a little bit keener to inline (in tryUnfolding). For example
- f y = (y,y,y)
- g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
-We'd inline 'f' if the call was in a case context, and it kind-of-is,
-only we can't see it. Also
- x = f v
-could be expensive whereas
- x = case v of (a,b) -> a
-is patently cheap and may allow more eta expansion.
-
-So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a
-/non-recursive/ let as not-totally-boring. A /recursive/ let isn't
-going be inlined so there is much less point. Hence the (only reason
-for the) RecFlag in RhsCtxt
-
-Note [Unsaturated applications]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When a call is not saturated, we *still* inline if one of the
-arguments has interesting structure. That's sometimes very important.
-A good example is the Ord instance for Bool in Base:
-
- Rec {
- $fOrdBool =GHC.Classes.D:Ord
- @ Bool
- ...
- $cmin_ajX
-
- $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
- $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
- }
-
-But the defn of GHC.Classes.$dmmin is:
-
- $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
- {- Arity: 3, HasNoCafRefs, Strictness: SLL,
- Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
- case @ a GHC.Classes.<= @ a $dOrd x y of wild {
- GHC.Types.False -> y GHC.Types.True -> x }) -}
-
-We *really* want to inline $dmmin, even though it has arity 3, in
-order to unravel the recursion.
-
-
-Note [Things to watch]
-~~~~~~~~~~~~~~~~~~~~~~
-* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
- Assume x is exported, so not inlined unconditionally.
- Then we want x to inline unconditionally; no reason for it
- not to, and doing so avoids an indirection.
-
-* { x = I# 3; ....f x.... }
- Make sure that x does not inline unconditionally!
- Lest we get extra allocation.
-
-Note [Inlining an InlineRule]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-An InlineRules is used for
- (a) programmer INLINE pragmas
- (b) inlinings from worker/wrapper
-
-For (a) the RHS may be large, and our contract is that we *only* inline
-when the function is applied to all the arguments on the LHS of the
-source-code defn. (The uf_arity in the rule.)
-
-However for worker/wrapper it may be worth inlining even if the
-arity is not satisfied (as we do in the CoreUnfolding case) so we don't
-require saturation.
-
-Note [Nested functions]
-~~~~~~~~~~~~~~~~~~~~~~~
-At one time we treated a call of a non-top-level function as
-"interesting" (regardless of how boring the context) in the hope
-that inlining it would eliminate the binding, and its allocation.
-Specifically, in the default case of interesting_call we had
- _other -> not is_top && uf_arity > 0
-
-But actually postInlineUnconditionally does some of this and overall
-it makes virtually no difference to nofib. So I simplified away this
-special case
-
-Note [Cast then apply]
-~~~~~~~~~~~~~~~~~~~~~~
-Consider
- myIndex = __inline_me ( (/\a. <blah>) |> co )
- co :: (forall a. a -> a) ~ (forall a. T a)
- ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
-
-We need to inline myIndex to unravel this; but the actual call (myIndex a) has
-no value arguments. The ValAppCtxt gives it enough incentive to inline.
-
-Note [Inlining in ArgCtxt]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-The condition (arity > 0) here is very important, because otherwise
-we end up inlining top-level stuff into useless places; eg
- x = I# 3#
- f = \y. g x
-This can make a very big difference: it adds 16% to nofib 'integer' allocs,
-and 20% to 'power'.
-
-At one stage I replaced this condition by 'True' (leading to the above
-slow-down). The motivation was test eyeball/inline1.hs; but that seems
-to work ok now.
-
-NOTE: arguably, we should inline in ArgCtxt only if the result of the
-call is at least CONLIKE. At least for the cases where we use ArgCtxt
-for the RHS of a 'let', we only profit from the inlining if we get a
-CONLIKE thing (modulo lets).
-
-Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables]
-~~~~~~~~~~~~~~~~~~~~~ which appears below
-The "lone-variable" case is important. I spent ages messing about
-with unsatisfactory variants, but this is nice. The idea is that if a
-variable appears all alone
-
- as an arg of lazy fn, or rhs BoringCtxt
- as scrutinee of a case CaseCtxt
- as arg of a fn ArgCtxt
-AND
- it is bound to a cheap expression
-
-then we should not inline it (unless there is some other reason,
-e.g. it is the sole occurrence). That is what is happening at
-the use of 'lone_variable' in 'interesting_call'.
-
-Why? At least in the case-scrutinee situation, turning
- let x = (a,b) in case x of y -> ...
-into
- let x = (a,b) in case (a,b) of y -> ...
-and thence to
- let x = (a,b) in let y = (a,b) in ...
-is bad if the binding for x will remain.
-
-Another example: I discovered that strings
-were getting inlined straight back into applications of 'error'
-because the latter is strict.
- s = "foo"
- f = \x -> ...(error s)...
-
-Fundamentally such contexts should not encourage inlining because, provided
-the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the
-context can ``see'' the unfolding of the variable (e.g. case or a
-RULE) so there's no gain.
-
-However, watch out:
-
- * Consider this:
- foo = _inline_ (\n. [n])
- bar = _inline_ (foo 20)
- baz = \n. case bar of { (m:_) -> m + n }
- Here we really want to inline 'bar' so that we can inline 'foo'
- and the whole thing unravels as it should obviously do. This is
- important: in the NDP project, 'bar' generates a closure data
- structure rather than a list.
-
- So the non-inlining of lone_variables should only apply if the
- unfolding is regarded as cheap; because that is when exprIsConApp_maybe
- looks through the unfolding. Hence the "&& is_wf" in the
- InlineRule branch.
-
- * Even a type application or coercion isn't a lone variable.
- Consider
- case $fMonadST @ RealWorld of { :DMonad a b c -> c }
- We had better inline that sucker! The case won't see through it.
-
- For now, I'm treating treating a variable applied to types
- in a *lazy* context "lone". The motivating example was
- f = /\a. \x. BIG
- g = /\a. \y. h (f a)
- There's no advantage in inlining f here, and perhaps
- a significant disadvantage. Hence some_val_args in the Stop case
-
-Note [Interaction of exprIsWorkFree and lone variables]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The lone-variable test says "don't inline if a case expression
-scrutinises a lone variable whose unfolding is cheap". It's very
-important that, under these circumstances, exprIsConApp_maybe
-can spot a constructor application. So, for example, we don't
-consider
- let x = e in (x,x)
-to be cheap, and that's good because exprIsConApp_maybe doesn't
-think that expression is a constructor application.
-
-In the 'not (lone_variable && is_wf)' test, I used to test is_value
-rather than is_wf, which was utterly wrong, because the above
-expression responds True to exprIsHNF, which is what sets is_value.
-
-This kind of thing can occur if you have
-
- {-# INLINE foo #-}
- foo = let x = e in (x,x)
-
-which Roman did.
-
-
--}
-
-computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt
- -> Int
-computeDiscount arg_discounts res_discount arg_infos cont_info
-
- = 10 -- Discount of 10 because the result replaces the call
- -- so we count 10 for the function itself
-
- + 10 * length actual_arg_discounts
- -- Discount of 10 for each arg supplied,
- -- because the result replaces the call
-
- + total_arg_discount + res_discount'
- where
- actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
- total_arg_discount = sum actual_arg_discounts
-
- mk_arg_discount _ TrivArg = 0
- mk_arg_discount _ NonTrivArg = 10
- mk_arg_discount discount ValueArg = discount
-
- res_discount'
- | LT <- arg_discounts `compareLength` arg_infos
- = res_discount -- Over-saturated
- | otherwise
- = case cont_info of
- BoringCtxt -> 0
- CaseCtxt -> res_discount -- Presumably a constructor
- ValAppCtxt -> res_discount -- Presumably a function
- _ -> 40 `min` res_discount
- -- ToDo: this 40 `min` res_discount doesn't seem right
- -- for DiscArgCtxt it shouldn't matter because the function will
- -- get the arg discount for any non-triv arg
- -- for RuleArgCtxt we do want to be keener to inline; but not only
- -- constructor results
- -- for RhsCtxt I suppose that exposing a data con is good in general
- -- And 40 seems very arbitrary
- --
- -- res_discount can be very large when a function returns
- -- constructors; but we only want to invoke that large discount
- -- when there's a case continuation.
- -- Otherwise we, rather arbitrarily, threshold it. Yuk.
- -- But we want to avoid inlining large functions that return
- -- constructors into contexts that are simply "interesting"
=====================================
compiler/GHC/Data/Graph/UnVar.hs
=====================================
@@ -14,6 +14,8 @@ It does not normalize the graphs. This means that g `unionUnVarGraph` g is
equal to g, but twice as expensive and large.
-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
module GHC.Data.Graph.UnVar
( UnVarSet
, emptyUnVarSet, mkUnVarSet, varEnvDom, unionUnVarSet, unionUnVarSets
@@ -45,8 +47,9 @@ import qualified Data.IntSet as S
-- Set of uniques, i.e. for adjancet nodes
newtype UnVarSet = UnVarSet (S.IntSet)
- deriving Eq
+ deriving (Eq,Semigroup,Monoid)
+{-# INLINE k #-}
k :: Var -> Int
k v = getKey (getUnique v)
@@ -64,7 +67,8 @@ delUnVarSet :: UnVarSet -> Var -> UnVarSet
delUnVarSet (UnVarSet s) v = UnVarSet $ k v `S.delete` s
delUnVarSetList :: UnVarSet -> [Var] -> UnVarSet
-delUnVarSetList s vs = s `minusUnVarSet` mkUnVarSet vs
+delUnVarSetList (UnVarSet s) vs =
+ UnVarSet $ foldl' (\s v -> S.delete (k v) s) s vs
minusUnVarSet :: UnVarSet -> UnVarSet -> UnVarSet
minusUnVarSet (UnVarSet s) (UnVarSet s') = UnVarSet $ s `S.difference` s'
@@ -73,7 +77,7 @@ sizeUnVarSet :: UnVarSet -> Int
sizeUnVarSet (UnVarSet s) = S.size s
mkUnVarSet :: [Var] -> UnVarSet
-mkUnVarSet vs = UnVarSet $ S.fromList $ map k vs
+mkUnVarSet vs = UnVarSet $ foldl' (\s v -> S.insert (k v) s) S.empty vs
varEnvDom :: VarEnv a -> UnVarSet
varEnvDom ae = UnVarSet $ ufmToSet_Directly ae
@@ -82,7 +86,8 @@ extendUnVarSet :: Var -> UnVarSet -> UnVarSet
extendUnVarSet v (UnVarSet s) = UnVarSet $ S.insert (k v) s
extendUnVarSetList :: [Var] -> UnVarSet -> UnVarSet
-extendUnVarSetList vs s = s `unionUnVarSet` mkUnVarSet vs
+extendUnVarSetList vs (UnVarSet s) =
+ UnVarSet $ foldl' (\s v -> S.insert (k v) s) s vs
unionUnVarSet :: UnVarSet -> UnVarSet -> UnVarSet
unionUnVarSet (UnVarSet set1) (UnVarSet set2) = UnVarSet (set1 `S.union` set2)
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -415,6 +415,7 @@ data GeneralFlag
-- Suppress separate type signatures in core, but leave types on
-- lambda bound vars
| Opt_SuppressUnfoldings
+ | Opt_SuppressUnfoldingGuidance
-- Suppress the details of even stable unfoldings
| Opt_SuppressTypeSignatures
-- Suppress unique ids on variables.
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2825,6 +2825,8 @@ dynamic_flags_deps = [
(intSuffix (\n d -> d { unfoldingOpts = updateCaseThreshold n (unfoldingOpts d)}))
, make_ord_flag defFlag "funfolding-case-scaling"
(intSuffix (\n d -> d { unfoldingOpts = updateCaseScaling n (unfoldingOpts d)}))
+ , make_ord_flag defFlag "funfolding-discount-depth"
+ (intSuffix (\n d -> d { unfoldingOpts = updateMaxDiscountDepth n (unfoldingOpts d)}))
, make_dep_flag defFlag "funfolding-keeness-factor"
(floatSuffix (\_ d -> d))
@@ -3355,7 +3357,8 @@ dFlagsDeps = [
flagSpec "suppress-type-signatures" Opt_SuppressTypeSignatures,
flagSpec "suppress-uniques" Opt_SuppressUniques,
flagSpec "suppress-var-kinds" Opt_SuppressVarKinds,
- flagSpec "suppress-core-sizes" Opt_SuppressCoreSizes
+ flagSpec "suppress-core-sizes" Opt_SuppressCoreSizes,
+ flagSpec "suppress-guidance" Opt_SuppressUnfoldingGuidance
]
-- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@
@@ -5017,6 +5020,7 @@ initSDocContext dflags style = SDC
, sdocSuppressCoercions = gopt Opt_SuppressCoercions dflags
, sdocSuppressCoercionTypes = gopt Opt_SuppressCoercionTypes dflags
, sdocSuppressUnfoldings = gopt Opt_SuppressUnfoldings dflags
+ , sdocSuppressUnfoldingGuidance = gopt Opt_SuppressUnfoldingGuidance dflags
, sdocSuppressVarKinds = gopt Opt_SuppressVarKinds dflags
, sdocSuppressUniques = gopt Opt_SuppressUniques dflags
, sdocSuppressModulePrefixes = gopt Opt_SuppressModulePrefixes dflags
=====================================
compiler/GHC/Types/Unique/FM.hs
=====================================
@@ -79,12 +79,12 @@ module GHC.Types.Unique.FM (
ufmToSet_Directly,
nonDetUFMToList, ufmToIntMap, unsafeIntMapToUFM,
unsafeCastUFMKey,
- pprUniqFM, pprUFM, pprUFMWithKeys, pluralUFM
+ pprUniqFM, pprUFM, pprUFMWithKeys, pluralUFM,partitionWithKeyUFM
) where
import GHC.Prelude
-import GHC.Types.Unique ( Uniquable(..), Unique, getKey )
+import GHC.Types.Unique ( Uniquable(..), Unique, getKey, mkUniqueGrimily )
import GHC.Utils.Outputable
import GHC.Utils.Panic.Plain
import qualified Data.IntMap as M
@@ -361,6 +361,11 @@ partitionUFM p (UFM m) =
case M.partition p m of
(left, right) -> (UFM left, UFM right)
+partitionWithKeyUFM :: (Unique -> elt -> Bool) -> UniqFM key elt -> (UniqFM key elt, UniqFM key elt)
+partitionWithKeyUFM p (UFM m) =
+ case M.partitionWithKey (\k -> p (mkUniqueGrimily k)) m of
+ (left, right) -> (UFM left, UFM right)
+
sizeUFM :: UniqFM key elt -> Int
sizeUFM (UFM m) = M.size m
=====================================
compiler/GHC/Types/Var/Env.hs
=====================================
@@ -540,6 +540,7 @@ extendVarEnvList = addListToUFM
plusVarEnv_C = plusUFM_C
plusVarEnv_CD = plusUFM_CD
plusMaybeVarEnv_C = plusMaybeUFM_C
+{-# INLINE delVarEnvList #-}
delVarEnvList = delListFromUFM
delVarEnv = delFromUFM
minusVarEnv = minusUFM
=====================================
compiler/GHC/Utils/Outputable.hs
=====================================
@@ -382,7 +382,8 @@ data SDocContext = SDC
, sdocSuppressIdInfo :: !Bool
, sdocSuppressCoercions :: !Bool
, sdocSuppressCoercionTypes :: !Bool
- , sdocSuppressUnfoldings :: !Bool
+ , sdocSuppressUnfoldings :: !Bool -- Maybe it should be a cutoff for the depth instead?
+ , sdocSuppressUnfoldingGuidance :: !Bool
, sdocSuppressVarKinds :: !Bool
, sdocSuppressUniques :: !Bool
, sdocSuppressModulePrefixes :: !Bool
@@ -443,6 +444,7 @@ defaultSDocContext = SDC
, sdocSuppressCoercions = False
, sdocSuppressCoercionTypes = False
, sdocSuppressUnfoldings = False
+ , sdocSuppressUnfoldingGuidance = False
, sdocSuppressVarKinds = False
, sdocSuppressUniques = False
, sdocSuppressModulePrefixes = False
=====================================
compiler/ghc.cabal.in
=====================================
@@ -323,6 +323,7 @@ Library
GHC.Core.Opt.Simplify
GHC.Core.Opt.Simplify.Env
GHC.Core.Opt.Simplify.Iteration
+ GHC.Core.Opt.Simplify.Inline
GHC.Core.Opt.Simplify.Monad
GHC.Core.Opt.Simplify.Utils
GHC.Core.Opt.SpecConstr
=====================================
docs/users_guide/hints.rst
=====================================
@@ -404,6 +404,8 @@ decision about inlining a specific binding.
* :ghc-flag:`-funfolding-case-scaling=⟨n⟩`
* :ghc-flag:`-funfolding-dict-discount=⟨n⟩`
* :ghc-flag:`-funfolding-fun-discount=⟨n⟩`
+* :ghc-flag:`-funfolding-max-guide-depth=⟨n⟩`
+* :ghc-flag:`-funfolding-discount-depth=⟨n⟩`
Should the simplifier run out of ticks because of a inlining loop
users are encouraged to try decreasing :ghc-flag:`-funfolding-case-threshold=⟨n⟩`
=====================================
docs/users_guide/using-optimisation.rst
=====================================
@@ -1651,6 +1651,37 @@ by saying ``-fno-wombat``.
while still allowing GHC to compile modules containing such inlining loops.
+.. ghc-flag:: -funfolding-discount-depth=⟨n⟩
+ :shortdesc: *default: 20.* Don't look deeper than `n` levels into function argument use.
+ :type: dynamic
+ :category:
+
+ :default: 20
+
+ .. index::
+ single: inlining, controlling
+ single: unfolding, controlling
+
+ If we have a function application `f (Succ (Succ Zero))` with the function `f`::
+
+ f x =
+ case x of
+ Zero -> 0
+ Succ y -> case y of
+ Zero -> 1
+ Succ z -> case z of
+ Zero -> 2
+ _ -> error "Large"
+
+ Then GHC can consider the nested structure of the argument as well as how
+ deeply the function looks into the argument to make inlining decisions.
+
+ This allows us to properly estimate the result code size from applying arguments
+ with complex structure to functions taking these arguments appart.
+
+ However inspecting deeply nested arguments can be costly in terms of compile
+ time overhead. So we restrict these considerations to a certain depth.
+
.. ghc-flag:: -fworker-wrapper
:shortdesc: Enable the worker/wrapper transformation.
:type: dynamic
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bac008ae520815bc7cabd42240ca78565d0a52e0...4cbe93cf04b788857a93676c780b40e48cdbb0dc
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bac008ae520815bc7cabd42240ca78565d0a52e0...4cbe93cf04b788857a93676c780b40e48cdbb0dc
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221002/5a695bf6/attachment-0001.html>
More information about the ghc-commits
mailing list