[commit: ghc] master: Factor out readField (#14364) (dbd81f7)

git at git.haskell.org git at git.haskell.org
Thu Oct 26 00:03:14 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/dbd81f7e86514498218572b9d978373b1699cc5b/ghc

>---------------------------------------------------------------

commit dbd81f7e86514498218572b9d978373b1699cc5b
Author: Tobias Dammers <tdammers at gmail.com>
Date:   Wed Oct 25 19:09:03 2017 -0400

    Factor out readField (#14364)
    
    Improves compiler performance of deriving Read instances, as suggested
    in the issue.
    
    Additionally, we introduce `readSymField`, a companion to `readField`
    that parses symbol-type fields (where the field name is a symbol, e.g.
    `(#)`, rather than an alphanumeric identifier. The decision between
    these two functions is made a compile time, because we already know
    which one we need based on the field name.
    
    Reviewers: austin, hvr, bgamari, RyanGlScott
    
    Reviewed By: bgamari
    
    Subscribers: RyanGlScott, rwbarton, thomie
    
    Differential Revision: https://phabricator.haskell.org/D4108


>---------------------------------------------------------------

dbd81f7e86514498218572b9d978373b1699cc5b
 compiler/prelude/PrelNames.hs       |  4 ++++
 compiler/typecheck/TcGenDeriv.hs    | 41 ++++++++++++++++++---------------
 libraries/base/GHC/Read.hs          | 46 +++++++++++++++++++++++++++++++++++++
 testsuite/tests/perf/compiler/all.T |  3 ++-
 4 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs
index 760aea5..ae695d4 100644
--- a/compiler/prelude/PrelNames.hs
+++ b/compiler/prelude/PrelNames.hs
@@ -742,6 +742,10 @@ choose_RDR              = varQual_RDR gHC_READ (fsLit "choose")
 lexP_RDR                = varQual_RDR gHC_READ (fsLit "lexP")
 expectP_RDR             = varQual_RDR gHC_READ (fsLit "expectP")
 
+readField_RDR, readSymField_RDR :: RdrName
+readField_RDR           = varQual_RDR gHC_READ (fsLit "readField")
+readSymField_RDR        = varQual_RDR gHC_READ (fsLit "readSymField")
+
 punc_RDR, ident_RDR, symbol_RDR :: RdrName
 punc_RDR                = dataQual_RDR lEX (fsLit "Punc")
 ident_RDR               = dataQual_RDR lEX (fsLit "Ident")
diff --git a/compiler/typecheck/TcGenDeriv.hs b/compiler/typecheck/TcGenDeriv.hs
index 9e27ad5..70ceb30 100644
--- a/compiler/typecheck/TcGenDeriv.hs
+++ b/compiler/typecheck/TcGenDeriv.hs
@@ -900,9 +900,7 @@ instance Read T where
         -- Record construction binds even more tightly than application
         do expectP (Ident "T1")
            expectP (Punc '{')
-           expectP (Ident "f1")
-           expectP (Punc '=')
-           x          <- ReadP.reset Read.readPrec
+           x          <- Read.readField "f1" (ReadP.reset readPrec)
            expectP (Punc '}')
            return (T1 { f1 = x }))
       +++
@@ -1068,21 +1066,28 @@ gen_Read_binds get_fixity loc tycon
     read_arg a ty = ASSERT( not (isUnliftedType ty) )
                     noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps step_RDR [readPrec_RDR]))
 
-    read_field lbl a = read_lbl lbl ++
-                       [read_punc "=",
-                        noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps reset_RDR [readPrec_RDR]))]
-
-        -- When reading field labels we might encounter
-        --      a  = 3
-        --      _a = 3
-        -- or   (#) = 4
-        -- Note the parens!
-    read_lbl lbl | isSym lbl_str
-                 = [read_punc "(", symbol_pat lbl_str, read_punc ")"]
-                 | otherwise
-                 = ident_h_pat lbl_str
-                 where
-                   lbl_str = unpackFS lbl
+    -- When reading field labels we might encounter
+    --      a  = 3
+    --      _a = 3
+    -- or   (#) = 4
+    -- Note the parens!
+    read_field lbl a =
+        [noLoc
+          (mkBindStmt
+            (nlVarPat a)
+            (nlHsApps
+              read_field
+              [ nlHsLit (mkHsString lbl_str)
+              , nlHsVarApps reset_RDR [readPrec_RDR]
+              ]
+            )
+          )
+        ]
+        where
+          lbl_str = unpackFS lbl
+          read_field
+              | isSym lbl_str = readSymField_RDR
+              | otherwise = readField_RDR
 
 {-
 ************************************************************************
diff --git a/libraries/base/GHC/Read.hs b/libraries/base/GHC/Read.hs
index ad29cc5..2d8ee3d 100644
--- a/libraries/base/GHC/Read.hs
+++ b/libraries/base/GHC/Read.hs
@@ -36,6 +36,8 @@ module GHC.Read
   , choose
   , readListDefault, readListPrecDefault
   , readNumber
+  , readField
+  , readSymField
 
   -- Temporary
   , readParen
@@ -359,6 +361,50 @@ choose sps = foldr ((+++) . try_one) pfail sps
                                     L.Symbol s' | s==s' -> p
                                     _other              -> pfail }
 
+-- See Note [Why readField]
+
+-- | 'Read' parser for a record field, of the form @fieldName=value at . The
+-- @fieldName@ must be an alphanumeric identifier; for symbols (operator-style)
+-- field names, e.g. @(#)@, use 'readSymField'). The second argument is a
+-- parser for the field value.
+readField :: String -> ReadPrec a -> ReadPrec a
+readField fieldName readVal = do
+        expectP (L.Ident fieldName)
+        expectP (L.Punc "=")
+        readVal
+{-# NOINLINE readField #-}
+
+-- See Note [Why readField]
+
+-- | 'Read' parser for a symbol record field, of the form @(###)=value@ (where
+-- @###@ is the field name). The field name must be a symbol (operator-style),
+-- e.g. @(#)@. For regular (alphanumeric) field names, use 'readField'. The
+-- second argument is a parser for the field value.
+readSymField :: String -> ReadPrec a -> ReadPrec a
+readSymField fieldName readVal = do
+        expectP (L.Punc "(")
+        expectP (L.Symbol fieldName)
+        expectP (L.Punc ")")
+        expectP (L.Punc "=")
+        readVal
+{-# NOINLINE readSymField #-}
+
+
+-- Note [Why readField]
+--
+-- Previousy, the code for automatically deriving Read instance (in
+-- typecheck/TcGenDeriv.hs) would generate inline code for parsing fields;
+-- this, however, turned out to produce massive amounts of intermediate code,
+-- and produced a considerable performance hit in the code generator.
+-- Since Read instances are not generally supposed to be perfomance critical,
+-- the readField and readSymField functions have been factored out, and the
+-- code generator now just generates calls rather than manually inlining the
+-- parsers. For large record types (e.g. 500 fields), this produces a
+-- significant performance boost.
+--
+-- See also Trac #14364.
+
+
 --------------------------------------------------------------
 -- Simple instances of Read
 --------------------------------------------------------------
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 41b2af8..aa53d98 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -197,7 +197,7 @@ test('T3294',
            # 2013-11-13: 1478325844  (x86/Windows, 64bit machine)
            # 2014-01-12: 1565185140  (x86/Linux)
            # 2013-04-04: 1377050640  (x86/Windows, 64bit machine)
-           (wordsize(64), 2253557280, 5)]),
+           (wordsize(64), 1858491504, 5)]),
             # old:        1357587088 (amd64/Linux)
             # 29/08/2012: 2961778696 (amd64/Linux)
             # (^ increase due to new codegen, see #7198)
@@ -212,6 +212,7 @@ test('T3294',
             # 2016-07-11: 2739731144 (Windows) after fix for #12227 (ignoring)
             # 2017-02-17: 2758641264 (amd64/Linux) (Type indexed Typeable)
             # 2017-05-14: 2253557280 (amd64/Linux) Two-pass CmmLayoutStack
+            # 2017-10-24: 1858491504 (amd64/Linux) Improved linear regAlloc
       conf_3294,
 
       # Use `+RTS -G1` for more stable residency measurements. Note [residency].



More information about the ghc-commits mailing list