[Git][ghc/ghc][master] Add a fast path for data constructor workers

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Fri Nov 11 18:18:27 UTC 2022



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
d0c691b6 by Simon Peyton Jones at 2022-11-11T13:18:07-05:00
Add a fast path for data constructor workers

See Note [Fast path for data constructors] in
GHC.Core.Opt.Simplify.Iteration

This bypasses lots of expensive logic, in the special case of
applications of data constructors.  It is a surprisingly worthwhile
improvement, as you can see in the figures below.

Metrics: compile_time/bytes allocated
------------------------------------------------
          CoOpt_Read(normal)   -2.0%
    CoOpt_Singletons(normal)   -2.0%
    ManyConstructors(normal)   -1.3%
              T10421(normal)   -1.9% GOOD
             T10421a(normal)   -1.5%
              T10858(normal)   -1.6%
              T11545(normal)   -1.7%
              T12234(optasm)   -1.3%
              T12425(optasm)   -1.9% GOOD
              T13035(normal)   -1.0% GOOD
              T13056(optasm)   -1.8%
              T13253(normal)   -3.3% GOOD
              T15164(normal)   -1.7%
              T15304(normal)   -3.4%
              T15630(normal)   -2.8%
              T16577(normal)   -4.3% GOOD
              T17096(normal)   -1.1%
              T17516(normal)   -3.1%
              T18282(normal)   -1.9%
              T18304(normal)   -1.2%
             T18698a(normal)   -1.2% GOOD
             T18698b(normal)   -1.5% GOOD
              T18923(normal)   -1.3%
               T1969(normal)   -1.3% GOOD
              T19695(normal)   -4.4% GOOD
             T21839c(normal)   -2.7% GOOD
             T21839r(normal)   -2.7% GOOD
               T4801(normal)   -3.8% GOOD
               T5642(normal)   -3.1% GOOD
               T6048(optasm)   -2.5% GOOD
               T9020(optasm)   -2.7% GOOD
               T9630(normal)   -2.1% GOOD
               T9961(normal)  -11.7% GOOD
               WWRec(normal)   -1.0%

                   geo. mean   -1.1%
                   minimum    -11.7%
                   maximum     +0.1%

Metric Decrease:
    T10421
    T12425
    T13035
    T13253
    T16577
    T18698a
    T18698b
    T1969
    T19695
    T21839c
    T21839r
    T4801
    T5642
    T6048
    T9020
    T9630
    T9961

- - - - -


3 changed files:

- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Types/Id/Make.hs


Changes:

=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -1497,9 +1497,10 @@ rebuild env expr cont
       ApplyToTy  { sc_arg_ty = ty, sc_cont = cont}
         -> rebuild env (App expr (Type ty)) cont
 
-      ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag, sc_cont = cont}
+      ApplyToVal { sc_arg = arg, sc_env = se, sc_dup = dup_flag
+                 , sc_cont = cont, sc_hole_ty = fun_ty }
         -- See Note [Avoid redundant simplification]
-        -> do { (_, _, arg') <- simplArg env dup_flag se arg
+        -> do { (_, _, arg') <- simplArg env dup_flag fun_ty se arg
               ; rebuild env (App expr arg') cont }
 
 completeBindX :: SimplEnv
@@ -1598,7 +1599,8 @@ simplCast env body co0 cont0
         --         co1 :: t1 ~ s1
         --         co2 :: s2 ~ t2
         addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
-                                      , sc_dup = dup, sc_cont = tail })
+                                      , sc_dup = dup, sc_cont = tail
+                                      , sc_hole_ty = fun_ty })
           | Just (m_co1, m_co2) <- pushCoValArg co
           , fixed_rep m_co1
           = {-#SCC "addCoerce-pushCoValArg" #-}
@@ -1610,7 +1612,7 @@ simplCast env body co0 cont0
                       -- See Note [Avoiding exponential behaviour]
 
                    MCo co1 ->
-            do { (dup', arg_se', arg') <- simplArg env dup arg_se arg
+            do { (dup', arg_se', arg') <- simplArg env dup fun_ty arg_se arg
                     -- When we build the ApplyTo we can't mix the OutCoercion
                     -- 'co' with the InExpr 'arg', so we simplify
                     -- to make it all consistent.  It's a bit messy.
@@ -1636,14 +1638,16 @@ simplCast env body co0 cont0
           -- See Note [Representation polymorphism invariants] in GHC.Core
           -- test: typecheck/should_run/EtaExpandLevPoly
 
-simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
+simplArg :: SimplEnv -> DupFlag
+         -> OutType                 -- Type of the function applied to this arg
+         -> StaticEnv -> CoreExpr   -- Expression with its static envt
          -> SimplM (DupFlag, StaticEnv, OutExpr)
-simplArg env dup_flag arg_env arg
+simplArg env dup_flag fun_ty arg_env arg
   | isSimplified dup_flag
   = return (dup_flag, arg_env, arg)
   | otherwise
   = do { let arg_env' = arg_env `setInScopeFromE` env
-       ; arg' <- simplExpr arg_env'  arg
+       ; arg' <- simplExprC arg_env' arg (mkBoringStop (funArgTy fun_ty))
        ; return (Simplified, zapSubstEnv arg_env', arg') }
          -- Return a StaticEnv that includes the in-scope set from 'env',
          -- because arg' may well mention those variables (#20639)
@@ -2029,6 +2033,21 @@ zap the SubstEnv.  This is VITAL.  Consider
 
 We'll clone the inner \x, adding x->x' in the id_subst Then when we
 inline y, we must *not* replace x by x' in the inlined copy!!
+
+Note [Fast path for data constructors]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For applications of a data constructor worker, the full glory of
+rebuildCall is a waste of effort;
+* They never inline, obviously
+* They have no rewrite rules
+* They are not strict (see Note [Data-con worker strictness]
+  in GHC.Core.DataCon)
+So it's fine to zoom straight to `rebuild` which just rebuilds the
+call in a very straightforward way.
+
+Some programs have a /lot/ of data constructors in the source program
+(compiler/perf/T9961 is an example), so this fast path can be very
+valuable.
 -}
 
 simplVar :: SimplEnv -> InVar -> SimplM OutExpr
@@ -2046,6 +2065,9 @@ simplVar env var
 
 simplIdF :: SimplEnv -> InId -> SimplCont -> SimplM (SimplFloats, OutExpr)
 simplIdF env var cont
+  | isDataConWorkId var         -- See Note [Fast path for data constructors]
+  = rebuild env (Var var) cont
+  | otherwise
   = case substId env var of
       ContEx tvs cvs ids e -> simplExprF env' e cont
         -- Don't trimJoinCont; haven't already simplified e,
@@ -2315,6 +2337,8 @@ field of the ArgInfo record is the state of a little state-machine:
   If we inline `f` before simplifying `BIG` well use preInlineUnconditionally,
   and we'll simplify BIG once, at x's occurrence, rather than twice.
 
+* GHC.Core.Opt.Simplify.Utils. mkRewriteCall: if there are no rules, and no
+  unfolding, we can skip both TryRules and TryInlining, which saves work.
 
 Note [Avoid redundant simplification]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3645,7 +3669,7 @@ mkDupableContWithDmds env dmds
     do  { let (dmd:cont_dmds) = dmds   -- Never fails
         ; (floats1, cont') <- mkDupableContWithDmds env cont_dmds cont
         ; let env' = env `setInScopeFromF` floats1
-        ; (_, se', arg') <- simplArg env' dup se arg
+        ; (_, se', arg') <- simplArg env' dup hole_ty se arg
         ; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg'
         ; let all_floats = floats1 `addLetFloats` let_floats2
         ; return ( all_floats


=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -425,12 +425,22 @@ decArgCount :: RewriteCall -> RewriteCall
 decArgCount (TryRules n rules) = TryRules (n-1) rules
 decArgCount rew                = rew
 
-mkTryRules :: [CoreRule] -> RewriteCall
+mkRewriteCall :: Id -> RuleEnv -> RewriteCall
 -- See Note [Rewrite rules and inlining] in GHC.Core.Opt.Simplify.Iteration
-mkTryRules [] = TryInlining
-mkTryRules rs = TryRules n_required rs
+-- We try to skip any unnecessary stages:
+--    No rules     => skip TryRules
+--    No unfolding => skip TryInlining
+-- This skipping is "just" for efficiency.  But rebuildCall is
+-- quite a heavy hammer, so skipping stages is a good plan.
+-- And it's extremely simple to do.
+mkRewriteCall fun rule_env
+  | not (null rules) = TryRules n_required rules
+  | canUnfold unf    = TryInlining
+  | otherwise        = TryNothing
   where
-    n_required = maximum (map ruleArity rs)
+    n_required = maximum (map ruleArity rules)
+    rules = getRules rule_env fun
+    unf   = idUnfolding fun
 
 {-
 ************************************************************************
@@ -604,21 +614,23 @@ mkArgInfo :: SimplEnv -> RuleEnv -> Id -> SimplCont -> ArgInfo
 mkArgInfo env rule_base fun cont
   | n_val_args < idArity fun            -- Note [Unsaturated functions]
   = ArgInfo { ai_fun = fun, ai_args = []
-            , ai_rewrite = fun_rules
+            , ai_rewrite = fun_rewrite
             , ai_encl = False
             , ai_dmds = vanilla_dmds
             , ai_discs = vanilla_discounts }
   | otherwise
   = ArgInfo { ai_fun   = fun
             , ai_args  = []
-            , ai_rewrite = fun_rules
-            , ai_encl  = notNull rules || contHasRules cont
+            , ai_rewrite = fun_rewrite
+            , ai_encl  = fun_has_rules || contHasRules cont
             , ai_dmds  = add_type_strictness (idType fun) arg_dmds
             , ai_discs = arg_discounts }
   where
-    rules      = getRules rule_base fun
-    fun_rules  = mkTryRules rules
-    n_val_args = countValArgs cont
+    n_val_args    = countValArgs cont
+    fun_rewrite   = mkRewriteCall fun rule_base
+    fun_has_rules = case fun_rewrite of
+                      TryRules {} -> True
+                      _           -> False
 
     vanilla_discounts, arg_discounts :: [Int]
     vanilla_discounts = repeat 0


=====================================
compiler/GHC/Types/Id/Make.hs
=====================================
@@ -585,6 +585,7 @@ mkDataConWorkId wkr_name data_con
                    `setInlinePragInfo`     wkr_inline_prag
                    `setUnfoldingInfo`      evaldUnfolding  -- Record that it's evaluated,
                                                            -- even if arity = 0
+          -- No strictness: see Note [Data-con worker strictness] in GHC.Core.DataCon
 
     wkr_inline_prag = defaultInlinePragma { inl_rule = ConLike }
     wkr_arity = dataConRepArity data_con



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d0c691b6110b11a43d5ea2685d17bc001d2298da

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d0c691b6110b11a43d5ea2685d17bc001d2298da
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221111/f132a9da/attachment-0001.html>


More information about the ghc-commits mailing list