[commit: ghc] master: Caret diag.: Avoid decoding whole module if only specific line is needed (065be6e)

git at git.haskell.org git at git.haskell.org
Tue Apr 18 00:35:43 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/065be6e9eb5114c5f0e3a20626ec93042ce47f13/ghc

>---------------------------------------------------------------

commit 065be6e9eb5114c5f0e3a20626ec93042ce47f13
Author: alexbiehl <alex.biehl at gmail.com>
Date:   Mon Apr 17 12:51:10 2017 -0400

    Caret diag.: Avoid decoding whole module if only specific line is needed
    
    Before we were decoding the whole file to get to the desired line. This
    patch introduces a fast function which searches a StringBuffer for the
    desired line so we only need to utf8 decode a little portion.
    
    This is especially interesting if we have big modules with lots of
    warnings.
    
    Reviewers: austin, bgamari, Rufflewind, trofi
    
    Reviewed By: Rufflewind, trofi
    
    Subscribers: rwbarton, thomie
    
    Differential Revision: https://phabricator.haskell.org/D3440


>---------------------------------------------------------------

065be6e9eb5114c5f0e3a20626ec93042ce47f13
 compiler/main/ErrUtils.hs      | 27 +++++++++++++--------------
 compiler/utils/StringBuffer.hs | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/compiler/main/ErrUtils.hs b/compiler/main/ErrUtils.hs
index 180d18d..ded7085 100644
--- a/compiler/main/ErrUtils.hs
+++ b/compiler/main/ErrUtils.hs
@@ -64,7 +64,7 @@ import qualified PprColour as Col
 import SrcLoc
 import DynFlags
 import FastString (unpackFS)
-import StringBuffer (hGetStringBuffer, len, lexemeToString)
+import StringBuffer (atLine, hGetStringBuffer, len, lexemeToString)
 import Json
 
 import System.Directory
@@ -231,27 +231,26 @@ getSeverityColour _          = const mempty
 getCaretDiagnostic :: Severity -> SrcSpan -> IO MsgDoc
 getCaretDiagnostic _ (UnhelpfulSpan _) = pure empty
 getCaretDiagnostic severity (RealSrcSpan span) = do
-  caretDiagnostic <$> getSrcLine (srcSpanFile span) (row - 1)
+  caretDiagnostic <$> getSrcLine (srcSpanFile span) row
 
   where
-
-    getSrcLine fn i = do
-      (getLine i <$> readFile' (unpackFS fn))
-        `catchIOError` \ _ ->
+    getSrcLine fn i =
+      getLine i (unpackFS fn)
+        `catchIOError` \_ ->
           pure Nothing
 
-    getLine i contents =
-      case drop i (lines contents) of
-        srcLine : _ -> Just srcLine
-        [] -> Nothing
-
-    readFile' fn = do
+    getLine i fn = do
       -- StringBuffer has advantages over readFile:
       -- (a) no lazy IO, otherwise IO exceptions may occur in pure code
       -- (b) always UTF-8, rather than some system-dependent encoding
       --     (Haskell source code must be UTF-8 anyway)
-      buf <- hGetStringBuffer fn
-      pure (fix <$> lexemeToString buf (len buf))
+      content <- hGetStringBuffer fn
+      case atLine i content of
+        Just at_line -> pure $
+          case lines (fix <$> lexemeToString at_line (len at_line)) of
+            srcLine : _ -> Just srcLine
+            _           -> Nothing
+        _ -> pure Nothing
 
     -- allow user to visibly see that their code is incorrectly encoded
     -- (StringBuffer.nextChar uses \0 to represent undecodable characters)
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index fcc3445..d75e537 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -6,7 +6,7 @@
 Buffers for scanning string input stored in external arrays.
 -}
 
-{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
 {-# OPTIONS_GHC -O #-}
 -- We always optimise this, otherwise performance of a non-optimised
 -- compiler is severely affected
@@ -32,6 +32,7 @@ module StringBuffer
         stepOn,
         offsetBytes,
         byteDiff,
+        atLine,
 
         -- * Conversion
         lexemeToString,
@@ -240,6 +241,43 @@ byteDiff s1 s2 = cur s2 - cur s1
 atEnd :: StringBuffer -> Bool
 atEnd (StringBuffer _ l c) = l == c
 
+-- | Computes a 'StringBuffer' which points to the first character of the
+-- wanted line. Lines begin at 1.
+atLine :: Int -> StringBuffer -> Maybe StringBuffer
+atLine line sb@(StringBuffer buf len _) =
+  inlinePerformIO $
+    withForeignPtr buf $ \p -> do
+      p' <- skipToLine line len p
+      if p' == nullPtr
+        then return Nothing
+        else
+          let
+            delta = p' `minusPtr` p
+          in return $ Just (sb { cur = delta
+                               , len = len - delta
+                               })
+
+skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
+skipToLine !line !len !op0 = go 1 op0
+  where
+    !opend = op0 `plusPtr` len
+
+    go !i_line !op
+      | op >= opend    = pure nullPtr
+      | i_line == line = pure op
+      | otherwise      = do
+          w <- peek op :: IO Word8
+          case w of
+            10 -> go (i_line + 1) (plusPtr op 1)
+            13 -> do
+              -- this is safe because a 'StringBuffer' is
+              -- guaranteed to have 3 bytes sentinel values.
+              w' <- peek (plusPtr op 1) :: IO Word8
+              case w' of
+                10 -> go (i_line + 1) (plusPtr op 2)
+                _  -> go (i_line + 1) (plusPtr op 1)
+            _  -> go i_line (plusPtr op 1)
+
 -- -----------------------------------------------------------------------------
 -- Conversion
 



More information about the ghc-commits mailing list