[Git][ghc/ghc][wip/spj-unf-size] Take care with void args

Simon Peyton Jones (@simonpj) gitlab at gitlab.haskell.org
Tue Oct 24 09:32:43 UTC 2023



Simon Peyton Jones pushed to branch wip/spj-unf-size at Glasgow Haskell Compiler / GHC


Commits:
abf63bf8 by Simon Peyton Jones at 2023-10-24T10:32:26+01:00
Take care with void args

- - - - -


2 changed files:

- compiler/GHC/Core/Opt/Simplify/Inline.hs
- compiler/GHC/Core/Unfold.hs


Changes:

=====================================
compiler/GHC/Core/Opt/Simplify/Inline.hs
=====================================
@@ -31,7 +31,6 @@ import GHC.Types.Name
 import GHC.Types.Var.Env
 
 import GHC.Utils.Logger
-import GHC.Utils.Misc
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 
@@ -221,14 +220,18 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
         | otherwise
         -> traceInline logger env fn str (mk_doc some_benefit extra_doc False) Nothing
         where
-          some_benefit = calc_some_benefit (length arg_bndrs)
+          n_bndrs = length arg_bndrs
+          some_benefit  = calc_some_benefit n_bndrs
+          small_enough  = adjusted_size `leqSize` unfoldingUseThreshold opts
+          adjusted_size = adjustSize adjust_size rhs_size
 
+          --------  Compute the size of the ExprTree in this context -----------
+          rhs_size = exprTreeSize context expr_tree
           want_result
-             | LT <- arg_bndrs `compareLength` arg_infos
-                         = True  -- Over-saturated
-             | otherwise = case cont_info of
-                              BoringCtxt -> False
-                              _          -> True
+             | n_bndrs < n_val_args = True  -- Over-saturated
+             | otherwise            = case cont_info of
+                                        BoringCtxt -> False
+                                        _          -> True
 
           bound_env = mkVarEnv (arg_bndrs `zip` (arg_infos ++ repeat ArgNoInfo))
                       -- Crucial to include /all/ arg_bndrs, lest we treat
@@ -236,11 +239,8 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
           context = IC { ic_bound    = bound_env
                        , ic_free     = getFreeSummary
                        , ic_want_res = want_result }
-          size :: Size
-          size = exprTreeSize context expr_tree
 
           in_scope = seInScope env
-
           getFreeSummary :: Id -> ArgSummary
           -- Get the ArgSummary of a free variable
           getFreeSummary x
@@ -254,28 +254,37 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
                         -> exprSummary env expr
                 _ -> ArgNoInfo
 
+          -------- adjust_size ----------------
+          adjust_size size = size - call_size + depth_penalty size
+
+          -- Subtract size of the call, because the result replaces the call
+          -- We count 10 for the function itself, 10 for each arg supplied,
+          call_size = 10 + 10*n_val_args
 
           -- Adjust by the depth scaling
           -- See Note [Avoid inlining into deeply nested cases]
           depth_threshold = unfoldingCaseThreshold opts
           depth_scaling   = unfoldingCaseScaling opts
 
-          add_depth_penalty size = size + (size * (case_depth - depth_threshold))
-                                                  `div` depth_scaling
-          final_size | case_depth <= depth_threshold = size
-                     | otherwise = adjustSize add_depth_penalty size
-
-          small_enough = final_size `leqSize` unfoldingUseThreshold opts
+          depth_penalty size
+            | case_depth <= depth_threshold = 0
+            | otherwise = (size * (case_depth - depth_threshold)) `div` depth_scaling
 
-          extra_doc = vcat [ text "size =" <+> ppr size
+          extra_doc = vcat [ text "size =" <+> ppr rhs_size
                            , text "case depth =" <+> int case_depth
-                           , text "final_size =" <+> ppr final_size ]
+                           , text "adjusted_size =" <+> ppr adjusted_size ]
+
   where
     (arg_infos, call_cont) = contArgs cont
-    lone_variable = loneVariable cont
-    cont_info     = interestingCallContext env call_cont
-    case_depth    = seCaseDepth env
-    opts          = seUnfoldingOpts env
+    n_val_args       = length arg_infos
+    lone_variable    = loneVariable cont
+    cont_info        = interestingCallContext env call_cont
+    case_depth       = seCaseDepth env
+    opts             = seUnfoldingOpts env
+    interesting_args = any hasArgInfo arg_infos
+                -- NB: (any hasArgInfo arg_infos) looks at the
+                -- over-saturated args too which is "wrong";
+                -- but if over-saturated we inline anyway.
 
     -- Unpack the UnfoldingCache lazily because it may not be needed, and all
     -- its fields are strict; so evaluating unf_cache at all forces all the
@@ -284,20 +293,6 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
     -- See Note [UnfoldingCache] in GHC.Core
     UnfoldingCache{ uf_is_work_free = is_wf, uf_expandable = is_exp } = unf_cache
 
-    mk_doc some_benefit extra_doc yes_or_no
-      = vcat [ text "arg infos" <+> ppr arg_infos
-             , text "interesting continuation" <+> ppr cont_info
-             , text "some_benefit" <+> ppr some_benefit
-             , text "is exp:" <+> ppr is_exp
-             , text "is work-free:" <+> ppr is_wf
-             , text "guidance" <+> ppr guidance
-             , extra_doc
-             , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
-
-    ctx = log_default_dump_context (logFlags logger)
-    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
-    n_val_args = length arg_infos
-
            -- some_benefit is used when the RHS is small enough
            -- and the call has enough (or too many) value
            -- arguments (ie n_val_args >= arity). But there must
@@ -313,10 +308,6 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
       where
         saturated      = n_val_args >= uf_arity
         over_saturated = n_val_args > uf_arity
-        interesting_args = any hasArgInfo arg_infos
-                -- NB: (any nonTriv arg_infos) looks at the
-                -- over-saturated args too which is "wrong";
-                -- but if over-saturated we inline anyway.
 
         interesting_call
           | over_saturated
@@ -331,6 +322,19 @@ tryUnfolding logger env fn cont unf_template unf_cache guidance
                           -> uf_arity > 0  -- See Note [RHS of lets]
               _other      -> False         -- See Note [Nested functions]
 
+    mk_doc some_benefit extra_doc yes_or_no
+      = vcat [ text "arg infos" <+> ppr arg_infos
+             , text "interesting continuation" <+> ppr cont_info
+             , text "some_benefit" <+> ppr some_benefit
+             , text "is exp:" <+> ppr is_exp
+             , text "is work-free:" <+> ppr is_wf
+             , text "guidance" <+> ppr guidance
+             , extra_doc
+             , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
+
+    ctx = log_default_dump_context (logFlags logger)
+    str = "Considering inlining: " ++ showSDocOneLine ctx (ppr fn)
+
 {- Note [RHS of lets]
 ~~~~~~~~~~~~~~~~~~~~~
 When the call is the argument of a function with a RULE, or the RHS of a let,


=====================================
compiler/GHC/Core/Unfold.hs
=====================================
@@ -518,8 +518,8 @@ exprTree opts args expr
     go _  (Lit lit)    = exprTreeN (litSize lit)
 
     go vs (Lam b e)
-      | isId b, not (id_is_free b) = go vs' e `et_add` lamSize opts
-      | otherwise                  = go vs' e
+      | isId b, not (isZeroBitId b) = go vs' e `et_add` lamSize opts
+      | otherwise                   = go vs' e
       where
         vs' = vs `add_lv` b
 
@@ -532,12 +532,10 @@ exprTree opts args expr
       where
         vs' = vs `add_lvs` map fst pairs
 
-    go vs e@(App {}) = go_app vs e []
-
-    go vs (Var f) | id_is_free f = exprTreeN 0
-                    -- Use calLSize to ensure we get constructor
+    go vs e@(App {}) = go_app vs e [] 0
+    go vs (Var f)    = callTree opts vs f [] 0
+                    -- Use callTree to ensure we get constructor
                     -- discounts even on nullary constructors
-                    | otherwise  = callTree opts vs f []
 
     go vs (Case e b _ alts) = go_case vs e b alts
 
@@ -561,17 +559,18 @@ exprTree opts args expr
 
     -----------------------------
     -- size_up_app is used when there's ONE OR MORE value args
-    go_app :: ETVars -> CoreExpr -> [CoreExpr] -> ExprTree
+    go_app :: ETVars -> CoreExpr -> [CoreExpr] -> Int -> ExprTree
                    -- args are the value args
-    go_app vs (App fun arg) args
-               | isTypeArg arg   = go_app vs fun args
-               | otherwise       = go vs arg `et_add`
-                                   go_app vs fun (arg:args)
-    go_app vs (Var fun)     args = callTree opts vs fun args
-    go_app vs (Tick _ expr) args = go_app vs expr args
-    go_app vs (Cast expr _) args = go_app vs expr args
-    go_app vs other         args = vanillaCallSize (length args) `etAddN`
-                                   go vs other
+    go_app vs (App fun arg) args voids
+                  | isTypeArg arg      = go_app vs fun args voids
+                  | isZeroBitArg arg   = go_app vs fun (arg:args) (voids+1)
+                  | otherwise          = go vs arg `et_add`
+                                         go_app vs fun (arg:args) voids
+    go_app vs (Var fun)     args voids = callTree opts vs fun args voids
+    go_app vs (Tick _ expr) args voids = go_app vs expr args voids
+    go_app vs (Cast expr _) args voids = go_app vs expr args voids
+    go_app vs other         args voids = vanillaCallSize (length args) voids `etAddN`
+                                         go vs other
        -- if the lhs is not an App or a Var, or an invisible thing like a
        -- Tick or Cast, then we should charge for a complete call plus the
        -- size of the lhs itself.
@@ -667,23 +666,18 @@ recordCaseOf vs (Tick _ e) = recordCaseOf vs e
 recordCaseOf vs (Cast e _) = recordCaseOf vs e
 recordCaseOf _     _       = Nothing
 
-{-
-arg_is_free :: CoreExpr -> Bool
--- "free" means we don't charge for this
--- occurrence in a function application
-arg_is_free (Var id)      = id_is_free id
-arg_is_free (Tick _ e)    = arg_is_free e
-arg_is_free (Cast e _)    = arg_is_free e
-arg_is_free (Type {})     = True
-arg_is_free (Coercion {}) = True
-arg_is_free _             = False
--}
+isZeroBitArg :: CoreExpr -> Bool
+-- We could take ticks and casts into account, but it makes little
+-- difference, and avoiding a recursive function here is good.
+isZeroBitArg (Var id) = isZeroBitId id
+isZeroBitArg _        = False
 
-id_is_free :: Id -> Bool
-id_is_free id = not (isJoinId id) && isZeroBitTy (idType id)
-   -- Don't count expressions such as State# RealWorld
-   -- exclude join points, because they can be rep-polymorphic
-   -- and typePrimRep will crash
+isZeroBitId :: Id -> Bool
+-- Don't count expressions such as State# RealWorld
+isZeroBitId id = assertPpr (not (isJoinId id)) (ppr id) $
+                   -- Exclude join points, because they can be rep-polymorphic
+                   -- and typePrimRep will crash
+                 isZeroBitTy (idType id)
 
 
 -- | Finds a nominal size of a string literal.
@@ -699,52 +693,52 @@ litSize _other = 0    -- Must match size of nullary constructors
                       --            (eg via case binding)
 
 ----------------------------
-callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
-callTree opts vs fun val_args
+callTree :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> Int -> ExprTree
+callTree opts vs fun val_args voids
   = case idDetails fun of
-      FCallId _        -> exprTreeN (vanillaCallSize n_val_args)
-      JoinId {}        -> exprTreeN (jumpSize        n_val_args)
+      FCallId _        -> exprTreeN (vanillaCallSize n_val_args voids)
+      JoinId {}        -> exprTreeN (jumpSize        n_val_args voids)
       PrimOpId op _    -> exprTreeN (primOpSize op   n_val_args)
       DataConWorkId dc -> conSize dc n_val_args
-      ClassOpId {}     -> classOpSize opts vs fun val_args
-      _                -> funSize opts vs fun n_val_args
+      ClassOpId {}     -> classOpSize opts vs fun val_args voids
+      _                -> funSize opts vs fun n_val_args voids
   where
     n_val_args = length val_args
 
 -- | The size of a function call
-vanillaCallSize :: Int -> Int
-vanillaCallSize n_val_args = 10 * (1 + n_val_args)
+vanillaCallSize :: Int -> Int -> Int
+vanillaCallSize n_val_args voids = 10 * (1 + n_val_args - voids)
         -- The 1+ is for the function itself
         -- Add 1 for each non-trivial value arg
 
 -- | The size of a jump to a join point
-jumpSize :: Int -> Int
-jumpSize n_val_args = 2 * (1 + n_val_args)
+jumpSize :: Int -> Int -> Int
+jumpSize n_val_args voids = 2 * (1 + n_val_args - voids)
   -- A jump is 20% the size of a function call. Making jumps free reopens
   -- bug #6048, but making them any more expensive loses a 21% improvement in
   -- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
   -- better solution?
 
-classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> ExprTree
+classOpSize :: UnfoldingOpts -> ETVars -> Id -> [CoreExpr] -> Int -> ExprTree
 -- See Note [Conlike is interesting]
-classOpSize _ _ _ []
+classOpSize _ _ _ [] _
   = etZero
-classOpSize opts vs fn val_args
+classOpSize opts vs fn val_args voids
   | arg1 : _ <- val_args
   , Just dict <- recordCaseOf vs arg1
   = warnPprTrace (not (isId dict)) "classOpSize" (ppr fn <+> ppr val_args) $
-    vanillaCallSize (length val_args) `etAddN`
+    vanillaCallSize (length val_args) voids `etAddN`
     etOneCase (ScrutOf dict (unfoldingDictDiscount opts))
            -- If the class op is scrutinising a lambda bound dictionary then
            -- give it a discount, to encourage the inlining of this function
            -- The actual discount is rather arbitrarily chosen
   | otherwise
-  = exprTreeN (vanillaCallSize (length val_args))
+  = exprTreeN (vanillaCallSize (length val_args) voids)
 
-funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> ExprTree
+funSize :: UnfoldingOpts -> ETVars -> Id -> Int -> Int -> ExprTree
 -- Size for function calls that are not constructors or primops
 -- Note [Function applications]
-funSize opts (avs,_) fun n_val_args
+funSize opts (avs,_) fun n_val_args voids
   | fun `hasKey` buildIdKey   = etZero  -- Wwant to inline applications of build/augment
   | fun `hasKey` augmentIdKey = etZero  -- so we give size zero to the whole call
   | otherwise = SizeIs { et_size  = size
@@ -752,13 +746,14 @@ funSize opts (avs,_) fun n_val_args
                        , et_ret   = res_discount }
   where
     size | n_val_args == 0 = 0
-         | otherwise       = vanillaCallSize n_val_args
+         | otherwise       = vanillaCallSize n_val_args voids
 
     -- Discount if this is an interesting variable, and is applied
-    -- Discount is enough to make the application free (but not negative!)
+    -- If the function is an argument and is applied to some values,
+    -- give it a discount -- maybe we can apply that lambda.
     --  See Note [Function and non-function discounts]
     cases | n_val_args > 0, fun `elemVarSet` avs
-          = unitBag (ScrutOf fun size)
+          = unitBag (ScrutOf fun (unfoldingFunAppDiscount opts))
           | otherwise
           = emptyBag
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/abf63bf8b0934dd400e8c9e202e877b956807dc6

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/abf63bf8b0934dd400e8c9e202e877b956807dc6
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231024/3d1ab905/attachment-0001.html>


More information about the ghc-commits mailing list