[commit: ghc] wip/tdammers/T15019: Fix performance regressions from #14737 (b2ff1ed)
git at git.haskell.org
git at git.haskell.org
Thu Apr 26 11:38:02 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/tdammers/T15019
Link : http://ghc.haskell.org/trac/ghc/changeset/b2ff1ed477fe57c5aa69fbe6a93d7e88b52d536a/ghc
>---------------------------------------------------------------
commit b2ff1ed477fe57c5aa69fbe6a93d7e88b52d536a
Author: Tobias Dammers <tdammers at gmail.com>
Date: Thu Apr 26 13:34:07 2018 +0200
Fix performance regressions from #14737
See #15019. When removing an unnecessary type equality check in #14737,
several regression tests failed. The cause was that some coercions that
are actually Refl coercions weren't passed in as such, which made the
equality check needlessly complex (Refl coercions can be discarded in
this particular check immediately, without inspecting the types at all).
We fix that, and get additional performance improvements for free.
>---------------------------------------------------------------
b2ff1ed477fe57c5aa69fbe6a93d7e88b52d536a
compiler/coreSyn/CoreOpt.hs | 13 +++++++++++--
compiler/simplCore/Simplify.hs | 10 ++++++++--
compiler/types/Coercion.hs | 9 +++++++++
testsuite/tests/perf/compiler/all.T | 12 ++++++++----
4 files changed, 36 insertions(+), 8 deletions(-)
diff --git a/compiler/coreSyn/CoreOpt.hs b/compiler/coreSyn/CoreOpt.hs
index f1ff68d..99b7a38 100644
--- a/compiler/coreSyn/CoreOpt.hs
+++ b/compiler/coreSyn/CoreOpt.hs
@@ -958,10 +958,14 @@ pushCoTyArg :: CoercionR -> Type -> Maybe (Type, Maybe CoercionR)
-- If the returned coercion is Nothing, then it would have been reflexive;
-- it's faster not to compute it, though.
pushCoTyArg co ty
+ | isReflCo co
+ = Just (ty, Nothing)
+
-- The following is inefficient - don't do `eqType` here, the coercion
-- optimizer will take care of it. See Trac #14737.
-- -- | tyL `eqType` tyR
- -- -- = Just (ty, Nothing)
+ -- -- = pprTrace "eqType fired: " (ppr (co, tyL, tyR)) $
+ -- -- Just (ty, Nothing)
| isForAllTy tyL
= ASSERT2( isForAllTy tyR, ppr co $$ ppr ty )
@@ -993,10 +997,15 @@ pushCoValArg :: CoercionR -> Maybe (Coercion, Maybe Coercion)
-- If the second returned Coercion is actually Nothing, then no cast is necessary;
-- the returned coercion would have been reflexive.
pushCoValArg co
+ -- The "easy" case; this doesn't interest us.
+ | isReflCo co
+ = Just (mkRepReflCo arg, Nothing)
+
-- The following is inefficient - don't do `eqType` here, the coercion
-- optimizer will take care of it. See Trac #14737.
-- -- | tyL `eqType` tyR
- -- -- = Just (mkRepReflCo arg, Nothing)
+ -- -- = pprTrace "eqType fired: " (ppr (co, tyL, tyR)) $
+ -- -- Just (mkRepReflCo arg, Nothing)
| isFunTy tyL
, (co1, co2) <- decomposeFunCo Representational co
diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs
index d92f6d7..6f5d65a 100644
--- a/compiler/simplCore/Simplify.hs
+++ b/compiler/simplCore/Simplify.hs
@@ -1224,9 +1224,15 @@ simplCast env body co0 cont0
addCoerce :: OutCoercion -> SimplCont -> SimplM SimplCont
+ addCoerce co cont
+ | isReflCo co = return cont
+
addCoerce co1 (CastIt co2 cont)
- = {-#SCC "addCoerce-simple-recursion" #-}
- addCoerce (mkTransCo co1 co2) cont
+ | isReflexiveCo co' = return cont
+ | otherwise = {-#SCC "addCoerce-simple-recursion" #-}
+ addCoerce co' cont
+ where
+ co' = mkTransCo co1 co2
addCoerce co cont@(ApplyToTy { sc_arg_ty = arg_ty, sc_cont = tail })
| Just (arg_ty', m_co') <- pushCoTyArg co arg_ty
diff --git a/compiler/types/Coercion.hs b/compiler/types/Coercion.hs
index ff41529..af10e91 100644
--- a/compiler/types/Coercion.hs
+++ b/compiler/types/Coercion.hs
@@ -61,6 +61,7 @@ module Coercion (
isReflCo, isReflCo_maybe, isReflexiveCo, isReflexiveCo_maybe,
isReflCoVar_maybe,
+ isEqTypeCo,
-- ** Coercion variables
mkCoVar, isCoVar, coVarName, setCoVarName, setCoVarUnique,
@@ -434,6 +435,14 @@ isReflCo :: Coercion -> Bool
isReflCo (Refl {}) = True
isReflCo _ = False
+-- | Test if this coercion amounts to a trivial type equality. This covers more
+-- cases than 'isReflCo', but is still a lot faster than the full kind and role
+-- extraction that 'isReflexiveCo' or 'eqType' perform.
+isEqTypeCo :: Coercion -> Bool
+isEqTypeCo (SymCo co) = isEqTypeCo co
+isEqTypeCo (NthCo _ _ _) = True -- isEqTypeCo co
+isEqTypeCo co = isReflCo co
+
-- | Returns the type coerced if this coercion is reflexive. Guaranteed
-- to work very quickly. Sometimes a coercion can be reflexive, but not
-- obviously so. c.f. 'isReflexiveCo_maybe'
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 2001cda..11bb920 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -595,7 +595,7 @@ test('T5321FD',
# (due to better optCoercion, 5e7406d9, #9233)
# 2016-04-06: 250757460 (x86/Linux)
- (wordsize(64), 415136648, 10)])
+ (wordsize(64), 367569448, 10)])
# prev: 418306336
# 29/08/2012: 492905640
# (increase due to new codegen)
@@ -617,6 +617,8 @@ test('T5321FD',
# 2016-07-16: 477840432
# Optimize handling of built-in OccNames
# 2017-05-14: 415136648 (amd64/Linux) Two-pass CmmLayoutStack
+ # 2018-04-26: 367569448
+ # improve simplCast (#15019, #14737)
],
compile,[''])
@@ -747,7 +749,7 @@ test('T9020',
# 2014-07-31: 343005716 (Windows) (general round of updates)
# 2017-03-24: 249904136 (x86/Linux, 64-bit machine)
- (wordsize(64), 562206104, 10)])
+ (wordsize(64), 429226072, 10)])
# prev: 795469104
# 2014-07-17: 728263536 (general round of updates)
# 2014-09-10: 785871680 post-AMP-cleanup
@@ -1040,7 +1042,7 @@ test('T12227',
test('T12425',
[ only_ways(['optasm']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 141952368, 5),
+ [(wordsize(64), 130647536, 5),
# initial: 125831400
# 2017-01-18: 133380960 Allow top-level string literals in Core
# 2017-02-17: 153611448 Type-indexed Typeable
@@ -1049,6 +1051,7 @@ test('T12425',
# 2017-04-28: 127500136 Remove exponential behaviour in simplifier
# 2017-05-23: 134780272 Addition of llvm-targets in dynflags (D3352)
# 2018-04-15: 141952368 Collateral of #14737
+ # 2018-04-26: 130647536 Fixed #14737 (via #15019)
]),
],
compile,
@@ -1118,7 +1121,7 @@ test('T13056',
test('T12707',
[ compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 1237898376, 5),
+ [(wordsize(64), 1141557680, 5),
# initial: 1271577192
# 2017-01-22: 1348865648 Allow top-level strings in Core
# 2017-01-31: 1280336112 Join points (#12988)
@@ -1127,6 +1130,7 @@ test('T12707',
# 2017-03-02: 1231809592 Drift from recent simplifier improvements
# 2017-05-14: 1163821528 (amd64/Linux) Two-pass CmmLayoutStack
# 2018-04-09: 1237898376 Inexplicable, collateral of #14737
+ # 2018-04-26: 1141557680 Fixed #14737 regression (via #15019)
]),
],
compile,
More information about the ghc-commits
mailing list