[commit: haddock] ghc-head, ghc-head1, ie_avails, master, wip/revert-ttg-2017-11-20, wip/ttg-2017-10-13, wip/ttg-2017-10-31, wip/ttg-2017-11-06, wip/ttg2-2017-11-10, wip/ttg3-2017-11-12, wip/ttg4-constraints-2017-11-13: Hyperlinker: Avoid linear lookup in enrichToken (#669) (740458a)

git at git.haskell.org git at git.haskell.org
Mon Nov 20 21:09:59 UTC 2017


Repository : ssh://git@git.haskell.org/haddock

On branches: ghc-head,ghc-head1,ie_avails,master,wip/revert-ttg-2017-11-20,wip/ttg-2017-10-13,wip/ttg-2017-10-31,wip/ttg-2017-11-06,wip/ttg2-2017-11-10,wip/ttg3-2017-11-12,wip/ttg4-constraints-2017-11-13
Link       : http://git.haskell.org/haddock.git/commitdiff/740458ac4d2acf197f2ef8dc94a66f9b160b9c3c

>---------------------------------------------------------------

commit 740458ac4d2acf197f2ef8dc94a66f9b160b9c3c
Author: Alexander Biehl <alexbiehl at gmail.com>
Date:   Sat Aug 19 20:35:27 2017 +0200

    Hyperlinker: Avoid linear lookup in enrichToken (#669)
    
    * Make Span strict in Position
    
    * Hyperlinker: Use a proper map to enrich tokens


>---------------------------------------------------------------

740458ac4d2acf197f2ef8dc94a66f9b160b9c3c
 .../src/Haddock/Backends/Hyperlinker/Ast.hs        | 80 ++++++++++++----------
 .../src/Haddock/Backends/Hyperlinker/Types.hs      | 14 ++--
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
index 78beacf..9d27341 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE RankNTypes #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE RecordWildCards #-}
@@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types
 import qualified GHC
 
 import Control.Applicative
+import Control.Monad (guard)
 import Data.Data
+import qualified Data.Map.Strict as Map
 import Data.Maybe
 
+import Prelude hiding (span)
+
 everythingInRenamedSource :: (Alternative f, Data x)
   => (forall a. Data a => a -> f r) -> x -> f r
 everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f
@@ -28,25 +33,45 @@ enrich src =
         , rtkDetails = enrichToken token detailsMap
         }
   where
-    detailsMap = concatMap ($ src)
-        [ variables
-        , types
-        , decls
-        , binds
-        , imports
-        ]
+    detailsMap =
+      mkDetailsMap (concatMap ($ src)
+                     [ variables
+                     , types
+                     , decls
+                     , binds
+                     , imports
+                     ])
+
+type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]
 
 -- | A map containing association between source locations and "details" of
 -- this location.
 --
--- For the time being, it is just a list of pairs. However, looking up things
--- in such structure has linear complexity. We cannot use any hashmap-like
--- stuff because source locations are not ordered. In the future, this should
--- be replaced with interval tree data structure.
-type DetailsMap = [(GHC.SrcSpan, TokenDetails)]
+type DetailsMap = Map.Map Position (Span, TokenDetails)
+
+mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
+mkDetailsMap xs =
+  Map.fromListWith select_details [ (start, (token_span, token_details))
+                                  | (ghc_span, token_details) <- xs
+                                  , Just !token_span <- [ghcSrcSpanToSpan ghc_span]
+                                  , let start = spStart token_span
+                                  ]
+  where
+    -- favour token details which appear earlier in the list
+    select_details _new old = old
 
 lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
-lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst)
+lookupBySpan span details = do
+  (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
+  guard (tok_span `containsSpan` span )
+  return tok_details
+
+ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
+ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
+  Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
+             , spEnd   = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
+             })
+ghcSrcSpanToSpan _ = Nothing
 
 enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
 enrichToken (Token typ _ spn) dm
@@ -54,7 +79,7 @@ enrichToken (Token typ _ spn) dm
 enrichToken _ _ = Nothing
 
 -- | Obtain details map for variables ("normally" used identifiers).
-variables :: GHC.RenamedSource -> DetailsMap
+variables :: GHC.RenamedSource -> LTokenDetails
 variables =
     everythingInRenamedSource (var `Syb.combine` rec)
   where
@@ -70,7 +95,7 @@ variables =
         _ -> empty
 
 -- | Obtain details map for types.
-types :: GHC.RenamedSource -> DetailsMap
+types :: GHC.RenamedSource -> LTokenDetails
 types = everythingInRenamedSource ty
   where
     ty term = case cast term of
@@ -84,7 +109,7 @@ types = everythingInRenamedSource ty
 -- ordinary assignment (in top-level declarations, let-expressions and where
 -- clauses).
 
-binds :: GHC.RenamedSource -> DetailsMap
+binds :: GHC.RenamedSource -> LTokenDetails
 binds = everythingInRenamedSource
       (fun `Syb.combine` pat `Syb.combine` tvar)
   where
@@ -112,7 +137,7 @@ binds = everythingInRenamedSource
         _ -> empty
 
 -- | Obtain details map for top-level declarations.
-decls :: GHC.RenamedSource -> DetailsMap
+decls :: GHC.RenamedSource -> LTokenDetails
 decls (group, _, _, _) = concatMap ($ group)
     [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
     , everythingInRenamedSource fun . GHC.hs_valds
@@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group)
 --
 -- This map also includes type and variable details for items in export and
 -- import lists.
-imports :: GHC.RenamedSource -> DetailsMap
+imports :: GHC.RenamedSource -> LTokenDetails
 imports src@(_, imps, _, _) =
     everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps
   where
@@ -168,22 +193,3 @@ imports src@(_, imps, _, _) =
         let (GHC.L sspan name) = GHC.ideclName idecl
         in Just (sspan, RtkModule name)
     imp _ = Nothing
-
--- | Check whether token stream span matches GHC source span.
---
--- Currently, it is implemented as checking whether "our" span is contained
--- in GHC span. The reason for that is because GHC span are generally wider
--- and may spread across couple tokens. For example, @(>>=)@ consists of three
--- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable
--- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@
--- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span
--- associated with @quux@ contains all five elements.
-matches :: Span -> GHC.SrcSpan -> Bool
-matches tspan (GHC.RealSrcSpan aspan)
-    | saspan <= stspan && etspan <= easpan = True
-  where
-    stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan)
-    etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan)
-    saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan)
-    easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan)
-matches _ _ = False
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index b27ec4d..d8ae89e 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -10,7 +10,7 @@ import qualified Data.Map as Map
 data Token = Token
     { tkType :: TokenType
     , tkValue :: String
-    , tkSpan :: Span
+    , tkSpan :: {-# UNPACK #-} !Span
     }
     deriving (Show)
 
@@ -18,14 +18,20 @@ data Position = Position
     { posRow :: !Int
     , posCol :: !Int
     }
-    deriving (Show)
+    deriving (Eq, Ord, Show)
 
 data Span = Span
-    { spStart :: Position
-    , spEnd :: Position
+    { spStart :: !Position
+    , spEnd   :: !Position
     }
     deriving (Show)
 
+-- | Tests whether the first span "contains" the other span, meaning
+-- that it covers at least as much source code. True where spans are equal.
+containsSpan :: Span -> Span -> Bool
+containsSpan s1 s2 =
+  spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
+
 data TokenType
     = TkIdentifier
     | TkKeyword



More information about the ghc-commits mailing list