[commit: ghc] ghc-8.0: Read parentheses better (cca8cee)

git at git.haskell.org git at git.haskell.org
Fri Nov 11 21:34:02 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : ghc-8.0
Link       : http://ghc.haskell.org/trac/ghc/changeset/cca8ceec97d6b8b031dec837825b97609dd7288f/ghc

>---------------------------------------------------------------

commit cca8ceec97d6b8b031dec837825b97609dd7288f
Author: David Feuer <david.feuer at gmail.com>
Date:   Fri Nov 11 15:56:00 2016 -0500

    Read parentheses better
    
    Instead of pulling a token and looking for `'('` or `')'`,
    just look for the character itself. This prevents us from
    lexing every single item twice, once to see if it's a
    left parenthesis and once to actually parse it.
    
    Partially fixes #12665
    
    Test Plan: Validate
    
    Reviewers: austin, bgamari, hvr
    
    Reviewed By: bgamari
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2623
    
    GHC Trac Issues: #12665


>---------------------------------------------------------------

cca8ceec97d6b8b031dec837825b97609dd7288f
 libraries/base/GHC/Read.hs            | 44 +++++++++++++++++++++++++++++++----
 testsuite/tests/perf/compiler/all.T   |  3 ++-
 testsuite/tests/perf/should_run/all.T |  3 ++-
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/libraries/base/GHC/Read.hs b/libraries/base/GHC/Read.hs
index 5681899..52bed0e 100644
--- a/libraries/base/GHC/Read.hs
+++ b/libraries/base/GHC/Read.hs
@@ -262,13 +262,49 @@ lexP = lift L.lex
 expectP :: L.Lexeme -> ReadPrec ()
 expectP lexeme = lift (L.expect lexeme)
 
+expectCharP :: Char -> ReadPrec a -> ReadPrec a
+expectCharP c a = do
+  q <- get
+  if q == c
+    then a
+    else pfail
+{-# INLINE expectCharP #-}
+
+-- A version of skipSpaces that takes the next
+-- parser as an argument. That is,
+--
+-- skipSpacesThenP m = lift skipSpaces >> m
+--
+-- Since skipSpaces is recursive, it appears that we get
+-- cleaner code by providing the continuation explicitly.
+-- In particular, we avoid passing an extra continuation
+-- of the form
+--
+-- \ () -> ...
+skipSpacesThenP :: ReadPrec a -> ReadPrec a
+skipSpacesThenP m =
+  do s <- look
+     skip s
+ where
+   skip (c:s) | isSpace c = get *> skip s
+   skip _ = m
+
 paren :: ReadPrec a -> ReadPrec a
 -- ^ @(paren p)@ parses \"(P0)\"
 --      where @p@ parses \"P0\" in precedence context zero
-paren p = do expectP (L.Punc "(")
-             x <- reset p
-             expectP (L.Punc ")")
-             return x
+paren p = skipSpacesThenP (paren' p)
+
+-- We try very hard to make paren' efficient, because parens is ubiquitous.
+-- Earlier code used `expectP` to look for the parentheses. The problem is that
+-- this lexes a (potentially long) token just to check if it's a parenthesis or
+-- not. So the first token of pretty much every value would be fully lexed
+-- twice. Now, we look for the '(' by hand instead. Since there's no reason not
+-- to, and it allows for faster failure, we do the same for ')'. This strategy
+-- works particularly well here because neither '(' nor ')' can begin any other
+-- lexeme.
+paren' :: ReadPrec a -> ReadPrec a
+paren' p = expectCharP '(' $ reset p >>= \x ->
+              skipSpacesThenP (expectCharP ')' (pure x))
 
 parens :: ReadPrec a -> ReadPrec a
 -- ^ @(parens p)@ parses \"P\", \"(P0)\", \"((P0))\", etc,
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 0bd70c6..89d9316 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -280,7 +280,7 @@ test('T3064',
             # 2014-01-22:     23 (x86/Linux)
             # 2014-12-22:     23 (x86/Linux) death to silent superclasses
             # 2015-07-11      28 (x86/Linux, 64-bit machine) use +RTS -G1
-           (wordsize(64), 54, 20)]),
+           (wordsize(64), 66, 20)]),
             # (amd64/Linux):            18
             # (amd64/Linux) 2012-02-07: 26
             # (amd64/Linux) 2013-02-12: 23; increased range to 10%
@@ -294,6 +294,7 @@ test('T3064',
             # (amd64/Linux) 2014-12-22: 27: death to silent superclasses
             # (amd64/Linux) 2015-01-22: 32: Varies from 30 to 34, at least here.
             # (amd64/Linux) 2015-06-03: 54: use +RTS -G1
+            # (amd64/Linux) 2016-11-03: 66: Parenthesis reading for Read
 
       compiler_stats_num_field('bytes allocated',
           [(wordsize(32), 153261024, 10),
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index e901f56..2234bbf 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -83,9 +83,10 @@ test('T876',
      [stats_num_field('bytes allocated',
           [(platform('x86_64-unknown-mingw32'), 71904, 5),
               # 2015-04-03: 71904 (amd64/Windows, unknown cause)
-           (wordsize(64), 63216 , 5),
+           (wordsize(64), 58056 , 5),
               # 2013-02-14: 1263712 (x86_64/Linux)
               # 2014-02-10:   63216 (x86_64/Linux), call arity analysis
+              # 2016-11-03:   58056 (x86_64/Linux), parenthesis reading for Read
 
            (wordsize(32), 53156, 5) ]),
               # some date:  663712  (Windows, 64-bit machine)



More information about the ghc-commits mailing list