[commit: ghc] ghc-8.0: Read parentheses better (cca8cee)
git at git.haskell.org
git at git.haskell.org
Fri Nov 11 21:34:02 UTC 2016
Repository : ssh://git@git.haskell.org/ghc
On branch : ghc-8.0
Link : http://ghc.haskell.org/trac/ghc/changeset/cca8ceec97d6b8b031dec837825b97609dd7288f/ghc
>---------------------------------------------------------------
commit cca8ceec97d6b8b031dec837825b97609dd7288f
Author: David Feuer <david.feuer at gmail.com>
Date: Fri Nov 11 15:56:00 2016 -0500
Read parentheses better
Instead of pulling a token and looking for `'('` or `')'`,
just look for the character itself. This prevents us from
lexing every single item twice, once to see if it's a
left parenthesis and once to actually parse it.
Partially fixes #12665
Test Plan: Validate
Reviewers: austin, bgamari, hvr
Reviewed By: bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D2623
GHC Trac Issues: #12665
>---------------------------------------------------------------
cca8ceec97d6b8b031dec837825b97609dd7288f
libraries/base/GHC/Read.hs | 44 +++++++++++++++++++++++++++++++----
testsuite/tests/perf/compiler/all.T | 3 ++-
testsuite/tests/perf/should_run/all.T | 3 ++-
3 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/libraries/base/GHC/Read.hs b/libraries/base/GHC/Read.hs
index 5681899..52bed0e 100644
--- a/libraries/base/GHC/Read.hs
+++ b/libraries/base/GHC/Read.hs
@@ -262,13 +262,49 @@ lexP = lift L.lex
expectP :: L.Lexeme -> ReadPrec ()
expectP lexeme = lift (L.expect lexeme)
+expectCharP :: Char -> ReadPrec a -> ReadPrec a
+expectCharP c a = do
+ q <- get
+ if q == c
+ then a
+ else pfail
+{-# INLINE expectCharP #-}
+
+-- A version of skipSpaces that takes the next
+-- parser as an argument. That is,
+--
+-- skipSpacesThenP m = lift skipSpaces >> m
+--
+-- Since skipSpaces is recursive, it appears that we get
+-- cleaner code by providing the continuation explicitly.
+-- In particular, we avoid passing an extra continuation
+-- of the form
+--
+-- \ () -> ...
+skipSpacesThenP :: ReadPrec a -> ReadPrec a
+skipSpacesThenP m =
+ do s <- look
+ skip s
+ where
+ skip (c:s) | isSpace c = get *> skip s
+ skip _ = m
+
paren :: ReadPrec a -> ReadPrec a
-- ^ @(paren p)@ parses \"(P0)\"
-- where @p@ parses \"P0\" in precedence context zero
-paren p = do expectP (L.Punc "(")
- x <- reset p
- expectP (L.Punc ")")
- return x
+paren p = skipSpacesThenP (paren' p)
+
+-- We try very hard to make paren' efficient, because parens is ubiquitous.
+-- Earlier code used `expectP` to look for the parentheses. The problem is that
+-- this lexes a (potentially long) token just to check if it's a parenthesis or
+-- not. So the first token of pretty much every value would be fully lexed
+-- twice. Now, we look for the '(' by hand instead. Since there's no reason not
+-- to, and it allows for faster failure, we do the same for ')'. This strategy
+-- works particularly well here because neither '(' nor ')' can begin any other
+-- lexeme.
+paren' :: ReadPrec a -> ReadPrec a
+paren' p = expectCharP '(' $ reset p >>= \x ->
+ skipSpacesThenP (expectCharP ')' (pure x))
parens :: ReadPrec a -> ReadPrec a
-- ^ @(parens p)@ parses \"P\", \"(P0)\", \"((P0))\", etc,
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 0bd70c6..89d9316 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -280,7 +280,7 @@ test('T3064',
# 2014-01-22: 23 (x86/Linux)
# 2014-12-22: 23 (x86/Linux) death to silent superclasses
# 2015-07-11 28 (x86/Linux, 64-bit machine) use +RTS -G1
- (wordsize(64), 54, 20)]),
+ (wordsize(64), 66, 20)]),
# (amd64/Linux): 18
# (amd64/Linux) 2012-02-07: 26
# (amd64/Linux) 2013-02-12: 23; increased range to 10%
@@ -294,6 +294,7 @@ test('T3064',
# (amd64/Linux) 2014-12-22: 27: death to silent superclasses
# (amd64/Linux) 2015-01-22: 32: Varies from 30 to 34, at least here.
# (amd64/Linux) 2015-06-03: 54: use +RTS -G1
+ # (amd64/Linux) 2016-11-03: 66: Parenthesis reading for Read
compiler_stats_num_field('bytes allocated',
[(wordsize(32), 153261024, 10),
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index e901f56..2234bbf 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -83,9 +83,10 @@ test('T876',
[stats_num_field('bytes allocated',
[(platform('x86_64-unknown-mingw32'), 71904, 5),
# 2015-04-03: 71904 (amd64/Windows, unknown cause)
- (wordsize(64), 63216 , 5),
+ (wordsize(64), 58056 , 5),
# 2013-02-14: 1263712 (x86_64/Linux)
# 2014-02-10: 63216 (x86_64/Linux), call arity analysis
+ # 2016-11-03: 58056 (x86_64/Linux), parenthesis reading for Read
(wordsize(32), 53156, 5) ]),
# some date: 663712 (Windows, 64-bit machine)
More information about the ghc-commits
mailing list