[Git][ghc/ghc][wip/spj-unf-size] Take care with void args
Simon Peyton Jones (@simonpj)
gitlab at gitlab.haskell.org
Tue Oct 24 09:32:43 UTC 2023
Simon Peyton Jones pushed to branch wip/spj-unf-size at Glasgow Haskell Compiler / GHC
Commits:
abf63bf8 by Simon Peyton Jones at 2023-10-24T10:32:26+01:00
Take care with void args
- - - - -
2 changed files:
- compiler/GHC/Core/Opt/Simplify/Inline.hs
- compiler/GHC/Core/Unfold.hs
Changes:
=====================================
compiler/GHC/Core/Opt/Simplify/Inline.hs
=====================================
@@ -31,7 +31,6 @@ import GHC.Types.Name
import GHC.Types.Var.Env
import GHC.Utils.Logger
-import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic
@@ -221,14 +220,18 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
| otherwise
-> traceInline logger env fn str (mk_doc some_benefit extra_doc False) Nothing
where
- some_benefit = calc_some_benefit (length arg_bndrs)
+ n_bndrs = length arg_bndrs
+ some_benefit = calc_some_benefit n_bndrs
+ small_enough = adjusted_size `leqSize` unfoldingUseThreshold opts
+ adjusted_size = adjustSize adjust_size rhs_size
+ -------- Compute the size of the ExprTree in this context -----------
+ rhs_size = exprTreeSize context expr_tree
want_result
- | LT <- arg_bndrs `compareLength` arg_infos
- = True -- Over-saturated
- | otherwise = case cont_info of
- BoringCtxt -> False
- _ -> True
+ | n_bndrs < n_val_args = True -- Over-saturated
+ | otherwise = case cont_info of
+ BoringCtxt -> False
+ _ -> True
bound_env = mkVarEnv (arg_bndrs `zip` (arg_infos ++ repeat ArgNoInfo))
-- Crucial to include /all/ arg_bndrs, lest we treat
@@ -236,11 +239,8 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
context = IC { ic_bound = bound_env
, ic_free = getFreeSummary
, ic_want_res = want_result }
- size :: Size
- size = exprTreeSize context expr_tree
in_scope = seInScope env
-
getFreeSummary :: Id -> ArgSummary
-- Get the ArgSummary of a free variable
getFreeSummary x
@@ -254,28 +254,37 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
-> exprSummary env expr
_ -> ArgNoInfo
+ -------- adjust_size ----------------
+ adjust_size size = size - call_size + depth_penalty size
+
+ -- Subtract size of the call, because the result replaces the call
+ -- We count 10 for the function itself, 10 for each arg supplied,
+ call_size = 10 + 10*n_val_args
-- Adjust by the depth scaling
-- See Note [Avoid inlining into deeply nested cases]
depth_threshold = unfoldingCaseThreshold opts
depth_scaling = unfoldingCaseScaling opts
- add_depth_penalty size = size + (size * (case_depth - depth_threshold))
- `div` depth_scaling
- final_size | case_depth <= depth_threshold = size
- | otherwise = adjustSize add_depth_penalty size
-
- small_enough = final_size `leqSize` unfoldingUseThreshold opts
+ depth_penalty size
+ | case_depth <= depth_threshold = 0
+ | otherwise = (size * (case_depth - depth_threshold)) `div` depth_scaling
- extra_doc = vcat [ text "size =" <+> ppr size
+ extra_doc = vcat [ text "size =" <+> ppr rhs_size
, text "case depth =" <+> int case_depth
- , text "final_size =" <+> ppr final_size ]
+ , text "adjusted_size =" <+> ppr adjusted_size ]
+
where
(arg_infos, call_cont) = contArgs cont
- lone_variable = loneVariable cont
- cont_info = interestingCallContext env call_cont
- case_depth = seCaseDepth env
- opts = seUnfoldingOpts env
+ n_val_args = length arg_infos
+ lone_variable = loneVariable cont
+ cont_info = interestingCallContext env call_cont
+ case_depth = seCaseDepth env
+ opts = seUnfoldingOpts env
+ interesting_args = any hasArgInfo arg_infos
+ -- NB: (any hasArgInfo arg_infos) looks at the
+ -- over-saturated args too which is "wrong";
+ -- but if over-saturated we inline anyway.
-- Unpack the UnfoldingCache lazily because it may not be needed, and all
-- its fields are strict; so evaluating unf_cache at all forces all the
@@ -284,20 +293,6 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
-- See Note [UnfoldingCache] in GHC.Core
UnfoldingCache{ uf_is_work_free = is_wf, uf_expandable = is_exp } = unf_cache
- mk_doc some_benefit extra_doc yes_or_no
- = vcat [ text "arg infos" <+> ppr arg_infos
- , text "interesting continuation" <+> ppr cont_info
- , text "some_benefit" <+> ppr some_benefit
- , text "is exp:" <+> ppr is_exp
- , text "is work-free:" <+> ppr is_wf
- , text "guidance" <+> ppr guidance
- , extra_doc
- , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
-
- ctx = log_default_dump_context (logFlags logger)
- str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
- n_val_args = length arg_infos
-
-- some_benefit is used when the RHS is small enough
-- and the call has enough (or too many) value
-- arguments (ie n_val_args >= arity). But there must
@@ -313,10 +308,6 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
where
saturated = n_val_args >= uf_arity
over_saturated = n_val_args > uf_arity
- interesting_args = any hasArgInfo arg_infos
- -- NB: (any nonTriv arg_infos) looks at the
- -- over-saturated args too which is "wrong";
- -- but if over-saturated we inline anyway.
interesting_call
| over_saturated
@@ -331,6 +322,19 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
-> uf_arity > 0 -- See Note [RHS of lets]
_other -> False -- See Note [Nested functions]
+ mk_doc some_benefit extra_doc yes_or_no
+ = vcat [ text "arg infos" <+> ppr arg_infos
+ , text "interesting continuation" <+> ppr cont_info
+ , text "some_benefit" <+> ppr some_benefit
+ , text "is exp:" <+> ppr is_exp
+ , text "is work-free:" <+> ppr is_wf
+ , text "guidance" <+> ppr guidance
+ , extra_doc
+ , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+ ctx = log_default_dump_context (logFlags logger)
+ str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
+
{- Note [RHS of lets]
~~~~~~~~~~~~~~~~~~~~~
When the call is the argument of a function with a RULE, or the RHS of a let,
=====================================
compiler/GHC/Core/Unfold.hs
=====================================
@@ -518,8 +518,8 @@ exprTree opts args expr
go _ (Lit lit) = exprTreeN (litSize lit)
go vs (Lam b e)
- | isId b, not (id_is_free b) = go vs' e `et_add` lamSize opts
- | otherwise = go vs' e
+ | isId b, not (isZeroBitId b) = go vs' e `et_add` lamSize opts
+ | otherwise = go vs' e
where
vs' = vs `add_lv` b
@@ -532,12 +532,10 @@ exprTree opts args expr
where
vs' = vs `add_lvs` map fst pairs
- go vs e@(App {}) = go_app vs e []
-
- go vs (Var f) | id_is_free f = exprTreeN 0
- -- Use calLSize to ensure we get constructor
+ go vs e@(App {}) = go_app vs e [] 0
+ go vs (Var f) = callTree opts vs f [] 0
+ -- Use callTree to ensure we get constructor
-- discounts even on nullary constructors
- | otherwise = callTree opts vs f []
go vs (Case e b _ alts) = go_case vs e b alts
@@ -561,17 +559,18 @@ exprTree opts args expr
-----------------------------
-- size_up_app is used when there's ONE OR MORE value args
- go_app :: ETVars -> CoreExpr -> [CoreExpr] -> ExprTree
+ go_app :: ETVars -> CoreExpr -> [CoreExpr] -> Int -> ExprTree
-- args are the value args
- go_app vs (App fun arg) args
- | isTypeArg arg = go_app vs fun args
- | otherwise = go vs arg `et_add`
- go_app vs fun (arg:args)
- go_app vs (Var fun) args = callTree opts vs fun args
- go_app vs (Tick _ expr) args = go_app vs expr args
- go_app vs (Cast expr _) args = go_app vs expr args
- go_app vs other args = vanillaCallSize (length args) `etAddN`
- go vs other
+ go_app vs (App fun arg) args voids
+ | isTypeArg arg = go_app vs fun args voids
+ | isZeroBitArg arg = go_app vs fun (arg:args) (voids+1)
+ | otherwise = go vs arg `et_add`
+ go_app vs fun (arg:args) voids
+ go_app vs (Var fun) args voids = callTree opts vs fun args voids
+ go_app vs (Tick _ expr) args voids = go_app vs expr args voids
+ go_app vs (Cast expr _) args voids = go_app vs expr args voids
+ go_app vs other args voids = vanillaCallSize (length args) voids `etAddN`
+ go vs other
-- if the lhs is not an App or a Var, or an invisible thing like a
-- Tick or Cast, then we should charge for a complete call plus the
-- size of the lhs itself.
@@ -667,23 +666,18 @@ recordCaseOf vs (Tick _ e) = recordCaseOf vs e
recordCaseOf vs (Cast e _) = recordCaseOf vs e
recordCaseOf _ _ = Nothing
-{-
-arg_is_free :: CoreExpr -> Bool
--- "free" means we don't charge for this
--- occurrence in a function application
-arg_is_free (Var id) = id_is_free id
-arg_is_free (Tick _ e) = arg_is_free e
-arg_is_free (Cast e _) = arg_is_free e
-arg_is_free (Type {}) = True
-arg_is_free (Coercion {}) = True
-arg_is_free _ = False
--}
+isZeroBitArg :: CoreExpr -> Bool
+-- We could take ticks and casts into account, but it makes little
+-- difference, and avoiding a recursive function here is good.
+isZeroBitArg (Var id) = isZeroBitId id
+isZeroBitArg _ = False
-id_is_free :: Id -> Bool
-id_is_free id = not (isJoinId id) && isZeroBitTy (idType id)
- -- Don't count expressions such as State# RealWorld
- -- exclude join points, because they can be rep-polymorphic
- -- and typePrimRep will crash
+isZeroBitId :: Id -> Bool
+-- Don't count expressions such as State# RealWorld
+isZeroBitId id = assertPpr (not (isJoinId id)) (ppr id) $
+ -- Exclude join points, because they can be rep-polymorphic
+ -- and typePrimRep will crash
+ isZeroBitTy (idType id)
-- | Finds a nominal size of a string literal.
@@ -699,52 +693,52 @@ litSize _other = 0 -- Must match size of nullary constructors
-- (eg via case binding)
----------------------------
-callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
-callTree opts vs fun val_args
+callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> Int -> ExprTree
+callTree opts vs fun val_args voids
= case idDetails fun of
- FCallId _ -> exprTreeN (vanillaCallSize n_val_args)
- JoinId {} -> exprTreeN (jumpSize n_val_args)
+ FCallId _ -> exprTreeN (vanillaCallSize n_val_args voids)
+ JoinId {} -> exprTreeN (jumpSize n_val_args voids)
PrimOpId op _ -> exprTreeN (primOpSize op n_val_args)
DataConWorkId dc -> conSize dc n_val_args
- ClassOpId {} -> classOpSize opts vs fun val_args
- _ -> funSize opts vs fun n_val_args
+ ClassOpId {} -> classOpSize opts vs fun val_args voids
+ _ -> funSize opts vs fun n_val_args voids
where
n_val_args = length val_args
-- | The size of a function call
-vanillaCallSize :: Int -> Int
-vanillaCallSize n_val_args = 10 * (1 + n_val_args)
+vanillaCallSize :: Int -> Int -> Int
+vanillaCallSize n_val_args voids = 10 * (1 + n_val_args - voids)
-- The 1+ is for the function itself
-- Add 1 for each non-trivial value arg
-- | The size of a jump to a join point
-jumpSize :: Int -> Int
-jumpSize n_val_args = 2 * (1 + n_val_args)
+jumpSize :: Int -> Int -> Int
+jumpSize n_val_args voids = 2 * (1 + n_val_args - voids)
-- A jump is 20% the size of a function call. Making jumps free reopens
-- bug #6048, but making them any more expensive loses a 21% improvement in
-- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
-- better solution?
-classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
+classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> Int -> ExprTree
-- See Note [Conlike is interesting]
-classOpSize _ _ _ []
+classOpSize _ _ _ [] _
= etZero
-classOpSize opts vs fn val_args
+classOpSize opts vs fn val_args voids
| arg1 : _ <- val_args
, Just dict <- recordCaseOf vs arg1
= warnPprTrace (not (isId dict)) "classOpSize" (ppr fn <+> ppr val_args) $
- vanillaCallSize (length val_args) `etAddN`
+ vanillaCallSize (length val_args) voids `etAddN`
etOneCase (ScrutOf dict (unfoldingDictDiscount opts))
-- If the class op is scrutinising a lambda bound dictionary then
-- give it a discount, to encourage the inlining of this function
-- The actual discount is rather arbitrarily chosen
| otherwise
- = exprTreeN (vanillaCallSize (length val_args))
+ = exprTreeN (vanillaCallSize (length val_args) voids)
-funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> ExprTree
+funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> Int -> ExprTree
-- Size for function calls that are not constructors or primops
-- Note [Function applications]
-funSize opts (avs,_) fun n_val_args
+funSize opts (avs,_) fun n_val_args voids
| fun `hasKey` buildIdKey = etZero -- Wwant to inline applications of build/augment
| fun `hasKey` augmentIdKey = etZero -- so we give size zero to the whole call
| otherwise = SizeIs { et_size = size
@@ -752,13 +746,14 @@ funSize opts (avs,_) fun n_val_args
, et_ret = res_discount }
where
size | n_val_args == 0 = 0
- | otherwise = vanillaCallSize n_val_args
+ | otherwise = vanillaCallSize n_val_args voids
-- Discount if this is an interesting variable, and is applied
- -- Discount is enough to make the application free (but not negative!)
+ -- If the function is an argument and is applied to some values,
+ -- give it a discount -- maybe we can apply that lambda.
-- See Note [Function and non-function discounts]
cases | n_val_args > 0, fun `elemVarSet` avs
- = unitBag (ScrutOf fun size)
+ = unitBag (ScrutOf fun (unfoldingFunAppDiscount opts))
| otherwise
= emptyBag
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/abf63bf8b0934dd400e8c9e202e877b956807dc6
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/abf63bf8b0934dd400e8c9e202e877b956807dc6
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231024/3d1ab905/attachment-0001.html>
More information about the ghc-commits
mailing list