[GHC] #15488: GHC takes up huge amount of memory when compiling accelerate 1.2.0
GHC
ghc-devs at haskell.org
Wed Sep 26 12:52:22 UTC 2018
#15488: GHC takes up huge amount of memory when compiling accelerate 1.2.0
-------------------------------------+-------------------------------------
Reporter: noah | Owner: tdammers
Type: bug | Status: new
Priority: high | Milestone: 8.6.1
Component: Compiler | Version: 8.4.3
Resolution: | Keywords:
| accelerate,memory,compile
Operating System: Linux | Architecture: x86_64
| (amd64)
Type of failure: Compile-time | Test Case: accelerate
performance bug | 1.2.0
Blocked By: | Blocking:
Related Tickets: | Differential Rev(s):
Wiki Page: |
-------------------------------------+-------------------------------------
Comment (by tdammers):
Here's what the updated example looks like after desugaring:
{{{
encodeIntegralConst
encodeIntegralConst
= \ @ t_aaZ7 ds_dbct x_aaUu ->
case ds_dbct of {
TypeInt co_aaZJ _ ->
<>
$fSemigroupBuilder
(intHost (I# 200#))
(intHost (x_aaUu `cast` <Co:2>));
TypeInt8 co_aaZR _ ->
<>
$fSemigroupBuilder
(intHost (I# 208#))
(int8 (x_aaUu `cast` <Co:2>));
TypeInt16 co_ab00 _ ->
<>
$fSemigroupBuilder
(intHost (I# 216#))
(int16Host (x_aaUu `cast` <Co:2>));
-- ...
}
-- RHS size: {terms: 45, types: 57, coercions: 14, joins: 0/0}
encodeFloatingConst
encodeFloatingConst
= \ @ t_aaY8 ds_db5K ds_db5L ->
case ds_db5K of {
TypeHalf co_aaYn _ ->
<>
$fSemigroupBuilder
(intHost (I# 500#))
(word16Host
(((ds_db5L `cast` <Co:2>) `cast` <Co:1>) `cast` <Co:1>));
TypeFloat co_aaYC _ ->
<>
$fSemigroupBuilder
(intHost (I# 510#))
(floatHost (ds_db5L `cast` <Co:2>));
-- ...
}
-- RHS size: {terms: 10, types: 12, coercions: 0, joins: 0/0}
encodeNumConst
encodeNumConst
= \ @ t_ab24 ds_dbjh ->
case ds_dbjh of {
IntegralNumType t_aaUs -> encodeIntegralConst t_aaUs;
FloatingNumType t_aaUt -> encodeFloatingConst t_aaUt
}
-- RHS size: {terms: 5, types: 0, coercions: 0, joins: 0/0}
$trModule
$trModule = Module (TrNameS "main"#) (TrNameS "Repro"#)
-- RHS size: {terms: 11, types: 4, coercions: 0, joins: 0/0}
fromBool
fromBool
= \ ds_dbjn ->
case ds_dbjn of {
False -> fromInteger $fNumWord8 0;
True -> fromInteger $fNumWord8 1
}
-- RHS size: {terms: 46, types: 57, coercions: 13, joins: 0/0}
encodeNonNumConst
encodeNonNumConst
= \ @ t_ab2w ds_dbjr x_aaUn ->
case ds_dbjr of {
TypeBool co_ab2I _ ->
<>
$fSemigroupBuilder
(intHost (I# 0#))
(word8 (fromBool (x_aaUn `cast` <Co:2>)));
TypeChar co_ab2N _ ->
<>
$fSemigroupBuilder
(intHost (I# 100#))
(charUtf8 (x_aaUn `cast` <Co:2>));
-- ...
}
-- RHS size: {terms: 10, types: 12, coercions: 0, joins: 0/0}
encodeSingleConst
encodeSingleConst
= \ @ t_ab3l ds_dblJ ->
case ds_dblJ of {
NumSingleType t_aaUe -> encodeNumConst t_aaUe;
NonNumSingleType t_aaUf -> encodeNonNumConst t_aaUf
}
-- RHS size: {terms: 47, types: 52, coercions: 4, joins: 0/0}
encodeVectorConst
encodeVectorConst
= \ @ t_ab3r ds_dblP ds_dblQ ->
case ds_dblP of {
__DEFAULT ->
patError "Repro.hs:(29,1)-(30,133)|function encodeVectorConst"#;
Vector2Type @ a_ab3G co_ab3H t_aaUg ->
case ds_dblQ `cast` <Co:2> of { V2 a_aaUh b_aaUi ->
<>
$fSemigroupBuilder
(intHost (I# 2#))
(<>
$fSemigroupBuilder
(encodeSingleConst t_aaUg a_aaUh)
(encodeSingleConst t_aaUg b_aaUi))
};
Vector3Type @ a_ab3X co_ab3Y t_aaUj ->
case ds_dblQ `cast` <Co:2> of { V3 a_aaUk b_aaUl c_aaUm ->
<>
$fSemigroupBuilder
(intHost (I# 3#))
(<>
$fSemigroupBuilder
(encodeSingleConst t_aaUj a_aaUk)
(<>
$fSemigroupBuilder
(encodeSingleConst t_aaUj b_aaUl)
(encodeSingleConst t_aaUj c_aaUm)))
}
}
}}}
After inlining, the pattern that we get is something like:
{{{
case a of
A1 b1 ->
case b1 of
B1 c -> case c of
...
A2 b2 ->
case b2 of
B2 d -> case d of
...
}}}
But this not the case-of-case pattern at all! It's just a very large
nested `case`, resulting from excessive inlining, and the structure of the
whole thing is such that every path through the tree of `case`s is unique
at every step, so statically analyzing the `case` branches does not lead
to any simplification.
In fact, we can achieve the same kind of blowup with the following example
code (no dependencies whatsoever):
{{{#!haskell
module SimpleBlowup
where
data Foo
= Foo1 Bar
| Foo2 Bar
| Foo3 Bar
| Foo4 Bar
| Foo5 Bar
| Foo6 Bar
| Foo7 Bar
| Foo8 Bar
| Foo9 Bar
| Foo10 Bar
| Foo11 Bar
| Foo12 Bar
| Foo13 Bar
| Foo14 Bar
| Foo15 Bar
| Foo16 Bar
| Foo17 Bar
| Foo18 Bar
| Foo19 Bar
| Foo20 Bar
data Bar = Bar1 Baz | Bar2 Baz | Bar3 Baz | Bar4 Baz
data Baz
= Baz1 Int
| Baz2 Int
| Baz3 Int
| Baz4 Int
| Baz5 Int
| Baz6 Int
| Baz7 Int
| Baz8 Int
| Baz9 Int
| Baz10 Int
| Baz11 Int
| Baz12 Int
| Baz13 Int
| Baz14 Int
| Baz15 Int
| Baz16 Int
| Baz17 Int
| Baz18 Int
| Baz19 Int
| Baz20 Int
{-#INLINE encodeFoo #-}
encodeFoo :: Foo -> Int
encodeFoo (Foo1 bar) = encodeBar bar + 1
encodeFoo (Foo2 bar) = encodeBar bar + 2
encodeFoo (Foo3 bar) = encodeBar bar + 3
encodeFoo (Foo4 bar) = encodeBar bar + 4
encodeFoo (Foo5 bar) = encodeBar bar + 5
encodeFoo (Foo6 bar) = encodeBar bar + 6
encodeFoo (Foo7 bar) = encodeBar bar + 7
encodeFoo (Foo8 bar) = encodeBar bar + 8
encodeFoo (Foo9 bar) = encodeBar bar + 9
encodeFoo (Foo10 bar) = encodeBar bar + 10
encodeFoo (Foo11 bar) = encodeBar bar + 11
encodeFoo (Foo12 bar) = encodeBar bar + 12
encodeFoo (Foo13 bar) = encodeBar bar + 13
encodeFoo (Foo14 bar) = encodeBar bar + 14
encodeFoo (Foo15 bar) = encodeBar bar + 15
encodeFoo (Foo16 bar) = encodeBar bar + 16
encodeFoo (Foo17 bar) = encodeBar bar + 17
encodeFoo (Foo18 bar) = encodeBar bar + 18
encodeFoo (Foo19 bar) = encodeBar bar + 19
encodeFoo (Foo20 bar) = encodeBar bar + 20
{-#INLINE encodeBar #-}
encodeBar :: Bar -> Int
encodeBar (Bar1 baz) = encodeBaz baz + 1
encodeBar (Bar2 baz) = encodeBaz baz + 2
encodeBar (Bar3 baz) = encodeBaz baz + 3
encodeBar (Bar4 baz) = encodeBaz baz + 4
{-#INLINE encodeBaz #-}
encodeBaz :: Baz -> Int
encodeBaz (Baz1 i) = encodeInt i + 1
encodeBaz (Baz2 i) = encodeInt i + 2
encodeBaz (Baz3 i) = encodeInt i + 3
encodeBaz (Baz4 i) = encodeInt i + 4
encodeBaz (Baz5 i) = encodeInt i + 5
encodeBaz (Baz6 i) = encodeInt i + 6
encodeBaz (Baz7 i) = encodeInt i + 7
encodeBaz (Baz8 i) = encodeInt i + 8
encodeBaz (Baz9 i) = encodeInt i + 9
encodeBaz (Baz10 i) = encodeInt i + 10
encodeBaz (Baz11 i) = encodeInt i + 11
encodeBaz (Baz12 i) = encodeInt i + 12
encodeBaz (Baz13 i) = encodeInt i + 13
encodeBaz (Baz14 i) = encodeInt i + 14
encodeBaz (Baz15 i) = encodeInt i + 15
encodeBaz (Baz16 i) = encodeInt i + 16
encodeBaz (Baz17 i) = encodeInt i + 17
encodeBaz (Baz18 i) = encodeInt i + 18
encodeBaz (Baz19 i) = encodeInt i + 19
encodeBaz (Baz20 i) = encodeInt i + 20
{-#INLINE encodeInt #-}
encodeInt :: Int -> Int
encodeInt i = (i * 47 + 31) `mod` 17
}}}
This blows up Core size to about 45,000 terms. Changing all the `INLINE`s
to `NOINLINE` however, we only get 1,200 terms.
So AFAICT, this isn't case-of-case being overly eager, it's just GHC
obediently honoring those `INLINE` pragmas, which legit produces a lot of
Core, and due to the structure of the things being inlined, the usual
crossing-off of obvious non-matches isn't possible.
This is probably not something you encounter a lot in the wild, because in
order to trigger this in a noticable way, the following conditions must be
met:
- Several layers of function applications forming a nested pattern-match
- ...each of them marked `INLINE`
- ...and essentially consisting of a pattern-matching construct with many
(tens or more) branches
I presume that this would typically involve nested data types with many
constructors on multiple levels.
--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/15488#comment:17>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
More information about the ghc-tickets
mailing list