[commit: ghc] master: Optimize flattener by trying to reduce a TF before reducing its args. (8e2d858)
git at git.haskell.org
git at git.haskell.org
Sat Dec 20 02:40:45 UTC 2014
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/8e2d858bb837a322f26face78df1b6ef3898e762/ghc
>---------------------------------------------------------------
commit 8e2d858bb837a322f26face78df1b6ef3898e762
Author: Richard Eisenberg <eir at cis.upenn.edu>
Date: Wed Dec 17 23:30:15 2014 -0500
Optimize flattener by trying to reduce a TF before reducing its args.
This has a demonstrated 2x speed boost on the T9872{a,b,c} tests.
(#9872)
>---------------------------------------------------------------
8e2d858bb837a322f26face78df1b6ef3898e762
compiler/typecheck/TcFlatten.hs | 47 ++++++++++++++++++++++++++-----------
testsuite/tests/perf/compiler/all.T | 14 +++++++----
2 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/compiler/typecheck/TcFlatten.hs b/compiler/typecheck/TcFlatten.hs
index 818965d..2c72c93 100644
--- a/compiler/typecheck/TcFlatten.hs
+++ b/compiler/typecheck/TcFlatten.hs
@@ -900,7 +900,9 @@ flatten_exact_fam_app fmode tc tys
roles = tyConRolesX (feRole fmode) tc
flatten_exact_fam_app_fully fmode tc tys
- = do { (xis, cos) <- flatten_many_nom (setFEEqRel (setFEMode fmode FM_FlattenAll) NomEq) tys
+ -- See Note [Reduce type family applications eagerly]
+ = try_to_reduce tc tys False id $
+ do { (xis, cos) <- flatten_many_nom (setFEEqRel (setFEMode fmode FM_FlattenAll) NomEq) tys
; let ret_co = mkTcTyConAppCo (feRole fmode) tc cos
-- ret_co :: F xis ~ F tys
@@ -922,15 +924,7 @@ flatten_exact_fam_app_fully fmode tc tys
-- Try to reduce the family application right now
-- See Note [Reduce type family applications eagerly]
- _ -> do { mb_match <- matchFam tc xis
- ; case mb_match of {
- Just (norm_co, norm_ty)
- -> do { (xi, final_co) <- flatten_one fmode norm_ty
- ; let co = norm_co `mkTcTransCo` mkTcSymCo final_co
- ; extendFlatCache tc xis ( co, xi
- , fe_flavour fmode )
- ; return (xi, mkTcSymCo co `mkTcTransCo` ret_co) } ;
- Nothing ->
+ _ -> try_to_reduce tc xis True (`mkTcTransCo` ret_co) $
do { let fam_ty = mkTyConApp tc xis
; (ev, fsk) <- newFlattenSkolem (fe_flavour fmode)
(fe_loc fmode)
@@ -951,7 +945,28 @@ flatten_exact_fam_app_fully fmode tc tys
; return (fsk_ty, maybeTcSubCo (fe_eq_rel fmode)
(mkTcSymCo co)
`mkTcTransCo` ret_co) }
- } } }
+ }
+
+ where
+ try_to_reduce :: TyCon -- F, family tycon
+ -> [Type] -- args, not necessarily flattened
+ -> Bool -- add to the flat cache?
+ -> ( TcCoercion -- :: xi ~ F args
+ -> TcCoercion ) -- what to return from outer function
+ -> TcS (Xi, TcCoercion) -- continuation upon failure
+ -> TcS (Xi, TcCoercion)
+ try_to_reduce tc tys cache update_co k
+ = do { mb_match <- matchFam tc tys
+ ; case mb_match of
+ Just (norm_co, norm_ty)
+ -> do { traceTcS "Eager T.F. reduction success" $
+ vcat [ppr tc, ppr tys, ppr norm_ty, ppr cache]
+ ; (xi, final_co) <- flatten_one fmode norm_ty
+ ; let co = norm_co `mkTcTransCo` mkTcSymCo final_co
+ ; when cache $
+ extendFlatCache tc tys (co, xi, fe_flavour fmode)
+ ; return (xi, update_co $ mkTcSymCo co) }
+ Nothing -> k }
{- Note [Reduce type family applications eagerly]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -960,9 +975,13 @@ then, rather than flattening to a skolem etc, we may as well just reduce
it on the spot to (Cons x t). This saves a lot of intermediate steps.
Examples that are helped are tests T9872, and T5321Fun.
-So just before we create the new skolem, we attempt to reduce it by one
-step (using matchFam). If that works, then recursively flatten the rhs,
-which may in turn do lots more reductions.
+Performance testing indicates that it's best to try this *twice*, once
+before flattening arguments and once after flattening arguments.
+Adding the extra reduction attempt before flattening arguments cut
+the allocation amounts for the T9872{a,b,c} tests by half. Testing
+also indicated that the early reduction should not use the flat-cache,
+but that the later reduction should. It's possible that with more
+examples, we might learn that these knobs should be set differently.
Once we've got a flat rhs, we extend the flatten-cache to record the
result. Doing so can save lots of work when the same redex shows up
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 45fc504..517d284 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -559,9 +559,10 @@ test('T9675',
test('T9872a',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 5848657456, 5)
+ [(wordsize(64), 2680733672, 5)
# 2014-12-10 5521332656 Initally created
# 2014-12-16 5848657456 Flattener parameterized over roles
+ # 2014-12-18 2680733672 Reduce type families even more eagerly
]),
],
compile_fail,
@@ -570,9 +571,10 @@ test('T9872a',
test('T9872b',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 6892251912, 5)
+ [(wordsize(64), 3480212048, 5)
# 2014-12-10 6483306280 Initally created
# 2014-12-16 6892251912 Flattener parameterized over roles
+ # 2014-12-18 3480212048 Reduce type families even more eagerly
]),
],
compile_fail,
@@ -580,9 +582,10 @@ test('T9872b',
test('T9872c',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 5842024784, 5)
+ [(wordsize(64), 2963554096, 5)
# 2014-12-10 5495850096 Initally created
# 2014-12-16 5842024784 Flattener parameterized over roles
+ # 2014-12-18 2963554096 Reduce type families even more eagerly
]),
],
compile_fail,
@@ -590,8 +593,9 @@ test('T9872c',
test('T9872d',
[ only_ways(['normal']),
compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 796071864, 5)
- # 2014-12-19 796071864 Initally created
+ [(wordsize(64), 739189056, 5)
+ # 2014-12-18 796071864 Initally created
+ # 2014-12-18 739189056 Reduce type families even more eagerly
]),
],
compile,
More information about the ghc-commits
mailing list