[commit: ghc] wip/T16212-fixed, wip/sgraf-no-exnstr: Include type info for only some exprs in HIE files (5ed48d2)

git at git.haskell.org git at git.haskell.org
Fri Feb 1 05:55:02 UTC 2019


Repository : ssh://git@git.haskell.org/ghc

On branches: wip/T16212-fixed,wip/sgraf-no-exnstr
Link       : http://ghc.haskell.org/trac/ghc/changeset/5ed48d25decc9dec29659482644b136cff91606e/ghc

>---------------------------------------------------------------

commit 5ed48d25decc9dec29659482644b136cff91606e
Author: Alec Theriault <alec.theriault at gmail.com>
Date:   Thu Jan 24 17:33:52 2019 -0800

    Include type info for only some exprs in HIE files
    
    This commit relinquishes some some type information in `.hie` files in
    exchange for better performance. See #16233 for more on this.
    
    Using `.hie` files to generate hyperlinked sources is a crucial milestone
    towards Hi Haddock (the initiative to move Haddock to work over `.hi`
    files and embed docstrings in those). Unfortunately, even after much
    optimization on the Haddock side, the `.hie` based solution is still
    considerably slower and more memory hungry than the existing implementation
    - and the @.hie@ code is to blame.
    
    This changes `.hie` file generation to track type information for only
    a limited subset of expressions (specifically, those that might eventually
    turn into hyperlinks in the Haddock's hyperlinker backend).


>---------------------------------------------------------------

5ed48d25decc9dec29659482644b136cff91606e
 compiler/hieFile/HieAst.hs                | 71 +++++++++++++++++++++++++++----
 docs/users_guide/separate_compilation.rst |  5 +++
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/compiler/hieFile/HieAst.hs b/compiler/hieFile/HieAst.hs
index 35440f0..b6b5f0c 100644
--- a/compiler/hieFile/HieAst.hs
+++ b/compiler/hieFile/HieAst.hs
@@ -30,8 +30,9 @@ import MonadUtils                 ( concatMapM, liftIO )
 import Name                       ( Name, nameSrcSpan, setNameLoc )
 import NameEnv                    ( NameEnv, emptyNameEnv, extendNameEnv, lookupNameEnv )
 import SrcLoc
-import TcHsSyn                    ( hsPatType )
-import Type                       ( Type )
+import TcHsSyn                    ( hsLitType, hsPatType )
+import Type                       ( mkFunTys, Type )
+import TysWiredIn                 ( mkListTy, mkSumTy )
 import Var                        ( Id, Var, setVarName, varName, varType )
 
 import HieTypes
@@ -435,13 +436,67 @@ instance HasType (LPat GhcTc) where
 instance HasType (LHsExpr GhcRn) where
   getTypeNode (L spn e) = makeNode e spn
 
+-- | This instance tries to construct 'HieAST' nodes which include the type of
+-- the expression. It is not yet possible to do this efficiently for all
+-- expression forms, so we skip filling in the type for those inputs.
+--
+-- 'HsApp', for example, doesn't have any type information available directly on
+-- the node. Our next recourse would be to desugar it into a 'CoreExpr' then
+-- query the type of that. Yet both the desugaring call and the type query both
+-- involve recursive calls to the function and argument! This is particularly
+-- problematic when you realize that the HIE traversal will eventually visit
+-- those nodes too and ask for their types again.
+--
+-- Since the above is quite costly, we just skip cases where computing the
+-- expression's type is going to be expensive.
+--
+-- See #16233
 instance HasType (LHsExpr GhcTc) where
-  getTypeNode e@(L spn e') = lift $ do
-    hs_env <- Hsc $ \e w -> return (e,w)
-    (_,mbe) <- liftIO $ deSugarExpr hs_env e
-    case mbe of
-      Just te -> makeTypeNode e' spn (exprType te)
-      Nothing -> makeNode e' spn
+  getTypeNode e@(L spn e') = lift $
+    -- Some expression forms have their type immediately available
+    let tyOpt = case e' of
+          HsLit _ l -> Just (hsLitType l)
+          HsOverLit _ o -> Just (overLitType o)
+
+          HsLam     _ (MG { mg_ext = groupTy }) -> Just (matchGroupType groupTy)
+          HsLamCase _ (MG { mg_ext = groupTy }) -> Just (matchGroupType groupTy)
+          HsCase _  _ (MG { mg_ext = groupTy }) -> Just (mg_res_ty groupTy)
+
+          ExplicitList  ty _ _   -> Just (mkListTy ty)
+          ExplicitSum   ty _ _ _ -> Just (mkSumTy ty)
+          HsDo          ty _ _   -> Just ty
+          HsMultiIf     ty _     -> Just ty
+
+          _ -> Nothing
+
+    in
+    case tyOpt of
+      _ | skipDesugaring e' -> fallback
+        | otherwise -> do
+            hs_env <- Hsc $ \e w -> return (e,w)
+            (_,mbe) <- liftIO $ deSugarExpr hs_env e
+            maybe fallback (makeTypeNode e' spn . exprType) mbe
+    where
+      fallback = makeNode e' spn
+
+      matchGroupType :: MatchGroupTc -> Type
+      matchGroupType (MatchGroupTc args res) = mkFunTys args res
+
+      -- | Skip desugaring of these expressions for performance reasons.
+      --
+      -- See impact on Haddock output (esp. missing type annotations or links)
+      -- before marking more things here as 'False'. See impact on Haddock
+      -- performance before marking more things as 'True'.
+      skipDesugaring :: HsExpr a -> Bool
+      skipDesugaring e = case e of
+        HsVar{}        -> False
+        HsUnboundVar{} -> False
+        HsConLikeOut{} -> False
+        HsRecFld{}     -> False
+        HsOverLabel{}  -> False
+        HsIPVar{}      -> False
+        HsWrap{}       -> False
+        _              -> True
 
 instance ( ToHie (Context (Located (IdP a)))
          , ToHie (MatchGroup a (LHsExpr a))
diff --git a/docs/users_guide/separate_compilation.rst b/docs/users_guide/separate_compilation.rst
index 338c438..8c997f0 100644
--- a/docs/users_guide/separate_compilation.rst
+++ b/docs/users_guide/separate_compilation.rst
@@ -588,6 +588,11 @@ The GHC API exposes functions for reading and writing these files.
     that are being written out. These include testing things properties such as
     variables not occuring outside of their expected scopes.
 
+The format in which GHC currently stores its typechecked AST, makes it costly
+to collect the types for some expressions nodes. For the sake of performance,
+GHC currently chooses to skip over these, so not all expression nodes should be
+expected to have type information on them. See :ghc-ticket:`16233` for more.
+
 .. _recomp:
 
 The recompilation checker



More information about the ghc-commits mailing list