[commit: ghc] master: Inline partially-applied wrappers (2be364a)

git at git.haskell.org git at git.haskell.org
Tue Jan 10 22:10:02 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/2be364ac8cf2f5cd3b50503e8b26f51eb46101e5/ghc

>---------------------------------------------------------------

commit 2be364ac8cf2f5cd3b50503e8b26f51eb46101e5
Author: David Feuer <david.feuer at gmail.com>
Date:   Tue Jan 10 16:33:20 2017 -0500

    Inline partially-applied wrappers
    
    Suppose we have
    
    ```
    data Node a = Node2 !Int a a | Node3 !Int a a a
    instance Traversable Node where
      traverse f (Node2 s x y) = Node2 s <$> f x <*> f y
      ...
    
    ```
    
    Since `Node2` is partially applied, we wouldn't inline its
    wrapper.  The result was that we'd box up the `Int#` to put
    the box in the closure passed to `fmap`. We now allow the wrapper
    to inline when partially applied, so GHC stores the `Int#`
    directly in the closure.
    
    Reviewers: rwbarton, mpickering, simonpj, austin, bgamari
    
    Reviewed By: simonpj, bgamari
    
    Subscribers: mpickering, thomie
    
    Differential Revision: https://phabricator.haskell.org/D2891
    
    GHC Trac Issues: #12990


>---------------------------------------------------------------

2be364ac8cf2f5cd3b50503e8b26f51eb46101e5
 compiler/basicTypes/MkId.hs                        | 46 +++++++++++++++++-----
 .../tests/deSugar/should_compile/T2431.stderr      |  2 +-
 testsuite/tests/perf/should_run/T12990.hs          | 28 +++++++++++++
 testsuite/tests/perf/should_run/all.T              | 10 +++++
 .../tests/simplCore/should_compile/T7360.stderr    |  2 +-
 5 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/compiler/basicTypes/MkId.hs b/compiler/basicTypes/MkId.hs
index dc8b4d0..7c8ffed 100644
--- a/compiler/basicTypes/MkId.hs
+++ b/compiler/basicTypes/MkId.hs
@@ -466,6 +466,32 @@ newtype DataConBoxer = DCB ([Type] -> [Var] -> UniqSM ([Var], [CoreBind]))
                        -- Bind these src-level vars, returning the
                        -- rep-level vars to bind in the pattern
 
+{-
+Note [Inline partially-applied constructor wrappers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We allow the wrapper to inline when partially applied to avoid
+boxing values unnecessarily. For example, consider
+
+   data Foo a = Foo !Int a
+
+   instance Traversable Foo where
+     traverse f (Foo i a) = Foo i <$> f a
+
+This desugars to
+
+   traverse f foo = case foo of
+        Foo i# a -> let i = I# i#
+                    in map ($WFoo i) (f a)
+
+If the wrapper `$WFoo` is not inlined, we get a fruitless reboxing of `i`.
+But if we inline the wrapper, we get
+
+   map (\a. case i of I# i# a -> Foo i# a) (f a)
+
+and now case-of-known-constructor eliminates the redundant allocation.
+-}
+
 mkDataConRep :: DynFlags
              -> FamInstEnvs
              -> Name
@@ -498,16 +524,16 @@ mkDataConRep dflags fam_envs wrap_name mb_bangs data_con
              wrap_arg_dmds = map mk_dmd arg_ibangs
              mk_dmd str | isBanged str = evalDmd
                         | otherwise           = topDmd
-                 -- The Cpr info can be important inside INLINE rhss, where the
-                 -- wrapper constructor isn't inlined.
-                 -- And the argument strictness can be important too; we
-                 -- may not inline a constructor when it is partially applied.
-                 -- For example:
-                 --      data W = C !Int !Int !Int
-                 --      ...(let w = C x in ...(w p q)...)...
-                 -- we want to see that w is strict in its two arguments
-
-             wrap_unf = mkInlineUnfolding (Just wrap_arity) wrap_rhs
+
+             -- The wrapper will usually be inlined (see wrap_unf), so its
+             -- strictness and CPR info is usually irrelevant. But this is
+             -- not always the case; GHC may choose not to inline it. In
+             -- particular, the wrapper constructor is not inlined inside
+             -- an INLINE rhs or when it is not applied to any arguments.
+             -- See Note [Inline partially-applied constructor wrappers]
+             -- Passing Nothing here allows the wrapper to inline when
+             -- unsaturated.
+             wrap_unf = mkInlineUnfolding Nothing wrap_rhs
              wrap_tvs = (univ_tvs `minusList` map eqSpecTyVar eq_spec) ++ ex_tvs
              wrap_rhs = mkLams wrap_tvs $
                         mkLams wrap_args $
diff --git a/testsuite/tests/deSugar/should_compile/T2431.stderr b/testsuite/tests/deSugar/should_compile/T2431.stderr
index ff1047d..797c6c7 100644
--- a/testsuite/tests/deSugar/should_compile/T2431.stderr
+++ b/testsuite/tests/deSugar/should_compile/T2431.stderr
@@ -9,7 +9,7 @@ T2431.$WRefl [InlPrag=INLINE] :: forall a. a :~: a
  Str=m,
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
-         Guidance=ALWAYS_IF(arity=0,unsat_ok=False,boring_ok=False)
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)
          Tmpl= \ (@ a) ->
                  T2431.Refl @ a @ a @~ (<a>_N :: (a :: *) GHC.Prim.~# (a :: *))}]
 T2431.$WRefl =
diff --git a/testsuite/tests/perf/should_run/T12990.hs b/testsuite/tests/perf/should_run/T12990.hs
new file mode 100644
index 0000000..f7655ac
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T12990.hs
@@ -0,0 +1,28 @@
+-- We used to inline constructor wrapper functions only when fully applied.
+-- This led to unnecessary boxing when partially applying to unpacked fields.
+
+module Main where
+import Control.DeepSeq
+import Data.Functor.Identity
+import Control.Exception (evaluate)
+
+data AList = Cons !Int !Int !Int !Int !Int !Int !Int !Int !Int AList | Nil
+
+-- We need to write this instance manually because the Generic-derived
+-- instance allocates a ton of intermediate junk, obscuring the interesting
+-- differences.
+instance NFData AList where
+  rnf Nil = ()
+  rnf (Cons _1 _2 _3 _4 _5 _6 _7 _8 _9 xs) = rnf xs
+
+-- If GHC is allowed to specialize it to Identity, the partial application of
+-- Cons will become a fully saturated one, defeating the test. So we NOINLINE
+-- it.
+buildalist :: Applicative f => Int -> f AList
+buildalist n
+  | n <= 0 = pure Nil
+  | otherwise = Cons n (n+1) (n+2) (n+3) (n+4) (n+5) (n+6) (n+7) (n+8) <$>
+                  buildalist (n - 1)
+{-# NOINLINE buildalist #-}
+
+main = evaluate . rnf . runIdentity $ buildalist 100000
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 89ae3ec..333970c 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -460,3 +460,13 @@ test('T13001',
       only_ways(['normal'])],
      compile_and_run,
      ['-O2'])
+
+test('T12990',
+    [stats_num_field('bytes allocated',
+                     [ (wordsize(64), 21640904, 5) ]),
+                     # 2017-01-03     34440936  w/o inlining unsaturated
+                     #                          constructor wrappers
+                     # 2017-01-03     21640904 inline wrappers
+     only_ways(['normal'])],
+    compile_and_run,
+    ['-O2'])
diff --git a/testsuite/tests/simplCore/should_compile/T7360.stderr b/testsuite/tests/simplCore/should_compile/T7360.stderr
index 4598b3e..2b0984c 100644
--- a/testsuite/tests/simplCore/should_compile/T7360.stderr
+++ b/testsuite/tests/simplCore/should_compile/T7360.stderr
@@ -10,7 +10,7 @@ T7360.$WFoo3 [InlPrag=INLINE] :: Int -> Foo
  Str=<S,U>m3,
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
-         Guidance=ALWAYS_IF(arity=1,unsat_ok=False,boring_ok=False)
+         Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
          Tmpl= \ (dt [Occ=Once!] :: Int) ->
                  case dt of { GHC.Types.I# dt [Occ=Once] -> T7360.Foo3 dt }}]
 T7360.$WFoo3 =



More information about the ghc-commits mailing list