[commit: ghc] ghc-8.2: Caret diag.: Avoid decoding whole module if only specific line is needed (ec5a49f)
git at git.haskell.org
git at git.haskell.org
Fri Apr 21 16:31:20 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : ghc-8.2
Link : http://ghc.haskell.org/trac/ghc/changeset/ec5a49fe34180da0adcd7956ad60a9f8ba04c775/ghc
>---------------------------------------------------------------
commit ec5a49fe34180da0adcd7956ad60a9f8ba04c775
Author: alexbiehl <alex.biehl at gmail.com>
Date: Mon Apr 17 12:51:10 2017 -0400
Caret diag.: Avoid decoding whole module if only specific line is needed
Before we were decoding the whole file to get to the desired line. This
patch introduces a fast function which searches a StringBuffer for the
desired line so we only need to utf8 decode a little portion.
This is especially interesting if we have big modules with lots of
warnings.
Reviewers: austin, bgamari, Rufflewind, trofi
Reviewed By: Rufflewind, trofi
Subscribers: rwbarton, thomie
Differential Revision: https://phabricator.haskell.org/D3440
(cherry picked from commit 065be6e9eb5114c5f0e3a20626ec93042ce47f13)
>---------------------------------------------------------------
ec5a49fe34180da0adcd7956ad60a9f8ba04c775
compiler/main/ErrUtils.hs | 27 +++++++++++++--------------
compiler/utils/StringBuffer.hs | 40 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/compiler/main/ErrUtils.hs b/compiler/main/ErrUtils.hs
index 180d18d..ded7085 100644
--- a/compiler/main/ErrUtils.hs
+++ b/compiler/main/ErrUtils.hs
@@ -64,7 +64,7 @@ import qualified PprColour as Col
import SrcLoc
import DynFlags
import FastString (unpackFS)
-import StringBuffer (hGetStringBuffer, len, lexemeToString)
+import StringBuffer (atLine, hGetStringBuffer, len, lexemeToString)
import Json
import System.Directory
@@ -231,27 +231,26 @@ getSeverityColour _ = const mempty
getCaretDiagnostic :: Severity -> SrcSpan -> IO MsgDoc
getCaretDiagnostic _ (UnhelpfulSpan _) = pure empty
getCaretDiagnostic severity (RealSrcSpan span) = do
- caretDiagnostic <$> getSrcLine (srcSpanFile span) (row - 1)
+ caretDiagnostic <$> getSrcLine (srcSpanFile span) row
where
-
- getSrcLine fn i = do
- (getLine i <$> readFile' (unpackFS fn))
- `catchIOError` \ _ ->
+ getSrcLine fn i =
+ getLine i (unpackFS fn)
+ `catchIOError` \_ ->
pure Nothing
- getLine i contents =
- case drop i (lines contents) of
- srcLine : _ -> Just srcLine
- [] -> Nothing
-
- readFile' fn = do
+ getLine i fn = do
-- StringBuffer has advantages over readFile:
-- (a) no lazy IO, otherwise IO exceptions may occur in pure code
-- (b) always UTF-8, rather than some system-dependent encoding
-- (Haskell source code must be UTF-8 anyway)
- buf <- hGetStringBuffer fn
- pure (fix <$> lexemeToString buf (len buf))
+ content <- hGetStringBuffer fn
+ case atLine i content of
+ Just at_line -> pure $
+ case lines (fix <$> lexemeToString at_line (len at_line)) of
+ srcLine : _ -> Just srcLine
+ _ -> Nothing
+ _ -> pure Nothing
-- allow user to visibly see that their code is incorrectly encoded
-- (StringBuffer.nextChar uses \0 to represent undecodable characters)
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index bac752a..2c16428 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -6,7 +6,7 @@
Buffers for scanning string input stored in external arrays.
-}
-{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
{-# OPTIONS_GHC -O #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected
@@ -32,6 +32,7 @@ module StringBuffer
stepOn,
offsetBytes,
byteDiff,
+ atLine,
-- * Conversion
lexemeToString,
@@ -239,6 +240,43 @@ byteDiff s1 s2 = cur s2 - cur s1
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer _ l c) = l == c
+-- | Computes a 'StringBuffer' which points to the first character of the
+-- wanted line. Lines begin at 1.
+atLine :: Int -> StringBuffer -> Maybe StringBuffer
+atLine line sb@(StringBuffer buf len _) =
+ inlinePerformIO $
+ withForeignPtr buf $ \p -> do
+ p' <- skipToLine line len p
+ if p' == nullPtr
+ then return Nothing
+ else
+ let
+ delta = p' `minusPtr` p
+ in return $ Just (sb { cur = delta
+ , len = len - delta
+ })
+
+skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
+skipToLine !line !len !op0 = go 1 op0
+ where
+ !opend = op0 `plusPtr` len
+
+ go !i_line !op
+ | op >= opend = pure nullPtr
+ | i_line == line = pure op
+ | otherwise = do
+ w <- peek op :: IO Word8
+ case w of
+ 10 -> go (i_line + 1) (plusPtr op 1)
+ 13 -> do
+ -- this is safe because a 'StringBuffer' is
+ -- guaranteed to have 3 bytes sentinel values.
+ w' <- peek (plusPtr op 1) :: IO Word8
+ case w' of
+ 10 -> go (i_line + 1) (plusPtr op 2)
+ _ -> go (i_line + 1) (plusPtr op 1)
+ _ -> go i_line (plusPtr op 1)
+
-- -----------------------------------------------------------------------------
-- Conversion
More information about the ghc-commits
mailing list