[commit: ghc] master: extend '-fmax-worker-args' limit to specialiser (Trac #11565) (f93c363)

git at git.haskell.org git at git.haskell.org
Fri Sep 2 20:45:28 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/f93c363fab1ac8ce6f0b474f5967b0b097995827/ghc

>---------------------------------------------------------------

commit f93c363fab1ac8ce6f0b474f5967b0b097995827
Author: Sergei Trofimovich <slyfox at gentoo.org>
Date:   Fri Sep 2 18:47:56 2016 +0100

    extend '-fmax-worker-args' limit to specialiser (Trac #11565)
    
    It's a complementary change to
        a48de37dcca98e7d477040b0ed298bcd1b3ab303
        restore -fmax-worker-args handling (Trac #11565)
    
    I don't have a small example but I've noticed another
    discrepancy when was profiling GHC for performance
    
        cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
    
    was specialised by 'spec_one' down to a function with arity 159.
    As a result 'perf record' pointed at it as at slowest
    function in whole ghc library.
    
    I've extended -fmax-worker-args effect to 'spec_one'
    as it does the same worker/wrapper split to push
    arguments to the heap.
    
    The change decreases heap usage on a synth.bash benchmark
    (Trac #9221) from 67G down to 64G (-4%). Benchmark runtime
    decreased from 14.5 s down to 14.s (-7%).
    
    Signed-off-by: Sergei Trofimovich <siarheit at google.com>
    
    Reviewers: ezyang, simonpj, austin, goldfire, bgamari
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2507
    
    GHC Trac Issues: #11565


>---------------------------------------------------------------

f93c363fab1ac8ce6f0b474f5967b0b097995827
 compiler/specialise/SpecConstr.hs | 12 ++++++++----
 compiler/stranal/WwLib.hs         | 14 +++++++++-----
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index 8cc393c..1cf3d44 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -29,7 +29,7 @@ import CoreFVs          ( exprsFreeVarsList )
 import CoreMonad
 import Literal          ( litIsLifted )
 import HscTypes         ( ModGuts(..) )
-import WwLib            ( mkWorkerArgs )
+import WwLib            ( isWorkerSmallEnough, mkWorkerArgs )
 import DataCon
 import Coercion         hiding( substCo )
 import Rules
@@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
 
   | Just all_calls <- lookupVarEnv bind_calls fn
   = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $
-    do  { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
-
+    do  { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls
                 -- Bale out if too many specialisations
-        ; let n_pats      = length pats
+        ; let pats = filter (is_small_enough . fst) all_pats
+              is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars
+                  -- We are about to construct w/w pair in 'spec_one'.
+                  -- Omit specialisation leading to high arity workers.
+                  -- See Note [Limit w/w arity]
+              n_pats      = length pats
               spec_count' = n_pats + spec_count
         ; case sc_count env of
             Just max | not (sc_force env) && spec_count' > max
diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
index 1a09605..64de0e0 100644
--- a/compiler/stranal/WwLib.hs
+++ b/compiler/stranal/WwLib.hs
@@ -8,6 +8,7 @@
 
 module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs
              , deepSplitProductType_maybe, findTypeShape
+             , isWorkerSmallEnough
  ) where
 
 #include "HsVersions.h"
@@ -143,7 +144,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info
               wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var
               worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args
 
-        ; if is_small_enough work_args && (useful1 && not only_one_void_argument || useful2)
+        ; if isWorkerSmallEnough dflags work_args
+             && (useful1 && not only_one_void_argument || useful2)
           then return (Just (worker_args_dmds, wrapper_body, worker_body))
           else return Nothing
         }
@@ -163,10 +165,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info
       = True
       | otherwise
       = False
-    is_small_enough args = count isId args <= maxWorkerArgs dflags
-          -- See Note [Limit w/w arity]
-          -- We count only Free variables (isId) to skip Type, Kind
-          -- variables which have no runtime representation.
+
+-- See Note [Limit w/w arity]
+isWorkerSmallEnough :: DynFlags -> [Var] -> Bool
+isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags
+    -- We count only Free variables (isId) to skip Type, Kind
+    -- variables which have no runtime representation.
 
 {-
 Note [Always do CPR w/w]



More information about the ghc-commits mailing list