[commit: ghc] master: Don't float out (classop dict e1 e2) (949ad67)

Thu Aug 28 11:12:02 UTC 2014

Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/949ad67e2f475864a405d214c3e02f2918931eb8/ghc

>---------------------------------------------------------------

commit 949ad67e2f475864a405d214c3e02f2918931eb8
Author: Simon Peyton Jones <simonpj at microsoft.com>
Date:   Tue May 13 10:00:45 2014 +0100

    Don't float out (classop dict e1 e2)
    
    A class op applied to a dictionary doesn't do much work, so it's not
    a great idea to float it out (except possibly to the top level.
    
    See Note [Floating over-saturated applications] in SetLevels
    
    I also renamed "floatOutPartialApplications" to "floatOutOverSatApps";
    the former is deeply confusing, since there is no partial application
    involved -- quite the reverse, it is *over* saturated.


>---------------------------------------------------------------

949ad67e2f475864a405d214c3e02f2918931eb8
 compiler/simplCore/CoreMonad.lhs |  8 +++++---
 compiler/simplCore/SetLevels.lhs | 29 ++++++++++++++++++++---------
 compiler/simplCore/SimplCore.lhs | 10 +++++-----
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs
index c060360..faec02e 100644
--- a/compiler/simplCore/CoreMonad.lhs
+++ b/compiler/simplCore/CoreMonad.lhs
@@ -418,8 +418,10 @@ data FloatOutSwitches = FloatOutSwitches {
 
   floatOutConstants :: Bool,       -- ^ True <=> float constants to top level,
                                    --            even if they do not escape a lambda
-  floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
-                                            --            based on arity information.
+  floatOutOverSatApps :: Bool      -- ^ True <=> float out over-saturated applications
+                                   --            based on arity information.
+                                   -- See Note [Floating over-saturated applications]
+                                   -- in SetLevels
   }
 instance Outputable FloatOutSwitches where
     ppr = pprFloatOutSwitches
@@ -430,7 +432,7 @@ pprFloatOutSwitches sw
      sep $ punctuate comma $ 
      [ ptext (sLit "Lam =")    <+> ppr (floatOutLambdas sw)
      , ptext (sLit "Consts =") <+> ppr (floatOutConstants sw)
-     , ptext (sLit "PAPs =")   <+> ppr (floatOutPartialApplications sw) ])
+     , ptext (sLit "OverSatApps =")   <+> ppr (floatOutOverSatApps sw) ])
 
 -- The core-to-core pass ordering is derived from the DynFlags:
 runWhen :: Bool -> CoreToDo -> CoreToDo
diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs
index 52bcecf..c69687b 100644
--- a/compiler/simplCore/SetLevels.lhs
+++ b/compiler/simplCore/SetLevels.lhs
@@ -245,6 +245,20 @@ lvlTopBind env (Rec pairs)
 %*                                                                      *
 %************************************************************************
 
+Note [Floating over-saturated applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we see (f x y), and (f x) is a redex (ie f's arity is 1),
+we call (f x) an "over-saturated application"
+
+Should we float out an over-sat app, if can escape a value lambda?
+It is sometimes very beneficial (-7% runtime -4% alloc over nofib -O2).
+But we don't want to do it for class selectors, because the work saved
+is minimal, and the extra local thunks allocated cost money.
+
+Arguably we could float even class-op applications if they were going to
+top level -- but then they must be applied to a constant dictionary and
+will almost certainly be optimised away anyway.
+
 \begin{code}
 lvlExpr :: LevelEnv             -- Context
         -> CoreExprWithFVs      -- Input expression
@@ -285,13 +299,10 @@ lvlExpr env expr@(_, AnnApp _ _) = do
       (fun, args) = collectAnnArgs expr
     --
     case fun of
-         -- float out partial applications.  This is very beneficial
-         -- in some cases (-7% runtime -4% alloc over nofib -O2).
-         -- In order to float a PAP, there must be a function at the
-         -- head of the application, and the application must be
-         -- over-saturated with respect to the function's arity.
-      (_, AnnVar f) | floatPAPs env &&
-                      arity > 0 && arity < n_val_args ->
+      (_, AnnVar f) | floatOverSat env   -- See Note [Floating over-saturated applications]
+                    , arity > 0
+                    , arity < n_val_args
+                    , Nothing <- isClassOpId_maybe f ->
         do
          let (lapp, rargs) = left (n_val_args - arity) expr []
          rargs' <- mapM (lvlMFE False env) rargs
@@ -940,8 +951,8 @@ floatLams le = floatOutLambdas (le_switches le)
 floatConsts :: LevelEnv -> Bool
 floatConsts le = floatOutConstants (le_switches le)
 
-floatPAPs :: LevelEnv -> Bool
-floatPAPs le = floatOutPartialApplications (le_switches le)
+floatOverSat :: LevelEnv -> Bool
+floatOverSat le = floatOutOverSatApps (le_switches le)
 
 setCtxtLvl :: LevelEnv -> Level -> LevelEnv
 setCtxtLvl env lvl = env { le_ctxt_lvl = lvl }
diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs
index 59b39a9..1a7fd67 100644
--- a/compiler/simplCore/SimplCore.lhs
+++ b/compiler/simplCore/SimplCore.lhs
@@ -227,7 +227,7 @@ getCoreToDo dflags
            CoreDoFloatOutwards FloatOutSwitches {
                                  floatOutLambdas   = Just 0,
                                  floatOutConstants = True,
-                                 floatOutPartialApplications = False },
+                                 floatOutOverSatApps = False },
                 -- Was: gentleFloatOutSwitches
                 --
                 -- I have no idea why, but not floating constants to
@@ -239,7 +239,7 @@ getCoreToDo dflags
                 --          made 0.0% difference to any other nofib
                 --          benchmark
                 --
-                -- Not doing floatOutPartialApplications yet, we'll do
+                -- Not doing floatOutOverSatApps yet, we'll do
                 -- that later on when we've had a chance to get more
                 -- accurate arity information.  In fact it makes no
                 -- difference at all to performance if we do it here,
@@ -271,9 +271,9 @@ getCoreToDo dflags
 
         runWhen full_laziness $
            CoreDoFloatOutwards FloatOutSwitches {
-                                 floatOutLambdas   = floatLamArgs dflags,
-                                 floatOutConstants = True,
-                                 floatOutPartialApplications = True },
+                                 floatOutLambdas     = floatLamArgs dflags,
+                                 floatOutConstants   = True,
+                                 floatOutOverSatApps = True },
                 -- nofib/spectral/hartel/wang doubles in speed if you
                 -- do full laziness late in the day.  It only happens
                 -- after fusion and other stuff, so the early pass doesn't