[commit: ghc] master: utils: Lazily decode UTF8 strings (1cc82d3)
git at git.haskell.org
git at git.haskell.org
Tue Apr 18 00:35:35 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1/ghc
>---------------------------------------------------------------
commit 1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1
Author: Ben Gamari <bgamari.foss at gmail.com>
Date: Mon Apr 17 12:26:56 2017 -0400
utils: Lazily decode UTF8 strings
Reviewers: austin, hvr
Subscribers: rwbarton, thomie
GHC Trac Issues: #13527
Differential Revision: https://phabricator.haskell.org/D3442
>---------------------------------------------------------------
1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1
compiler/utils/Encoding.hs | 34 +++++++++++++++++++++++-----------
compiler/utils/FastString.hs | 4 +---
compiler/utils/StringBuffer.hs | 4 +---
ghc/GHCi/UI.hs | 3 +--
4 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index 712de6c..f2b0979 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -17,7 +17,8 @@ module Encoding (
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
- utf8DecodeString,
+ utf8DecodeByteString,
+ utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
utf8EncodedLength,
@@ -33,9 +34,15 @@ module Encoding (
) where
import Foreign
+import Foreign.ForeignPtr.Unsafe
import Data.Char
import qualified Data.Char as Char
import Numeric
+import GHC.IO
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Internal as BS
+
import GHC.Exts
-- -----------------------------------------------------------------------------
@@ -115,19 +122,24 @@ utf8CharStart p = go p
then go (p `plusPtr` (-1))
else return p
-utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
-utf8DecodeString ptr len
- = unpack ptr
+utf8DecodeByteString :: ByteString -> [Char]
+utf8DecodeByteString (BS.PS ptr offset len)
+ = utf8DecodeStringLazy ptr offset len
+
+utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
+utf8DecodeStringLazy fptr offset len
+ = unsafeDupablePerformIO $ unpack start
where
- !end = ptr `plusPtr` len
+ !start = unsafeForeignPtrToPtr fptr `plusPtr` offset
+ !end = start `plusPtr` len
unpack p
- | p >= end = return []
- | otherwise =
- case utf8DecodeChar# (unPtr p) of
- (# c#, nBytes# #) -> do
- chs <- unpack (p `plusPtr#` nBytes#)
- return (C# c# : chs)
+ | p >= end = touchForeignPtr fptr >> return []
+ | otherwise =
+ case utf8DecodeChar# (unPtr p) of
+ (# c#, nBytes# #) -> do
+ rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#)
+ return (C# c# : rest)
countUTF8Chars :: Ptr Word8 -> Int -> IO Int
countUTF8Chars ptr len = go ptr 0
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 8d1bbb5..8653485 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -485,9 +485,7 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) =
- inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
- utf8DecodeString (castPtr ptr) len
+unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index ec5184a..fcc3445 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -251,9 +251,7 @@ lexemeToString :: StringBuffer
-> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
- inlinePerformIO $
- withForeignPtr buf $ \ptr ->
- utf8DecodeString (ptr `plusPtr` cur) bytes
+ utf8DecodeStringLazy buf cur bytes
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
diff --git a/ghc/GHCi/UI.hs b/ghc/GHCi/UI.hs
index deee24a..aeab85b 100644
--- a/ghc/GHCi/UI.hs
+++ b/ghc/GHCi/UI.hs
@@ -3525,8 +3525,7 @@ listAround pan do_highlight = do
prefixed = zipWith ($) highlighted bs_line_nos
output = BS.intercalate (BS.pack "\n") prefixed
- utf8Decoded <- liftIO $ BS.useAsCStringLen output
- $ \(p,n) -> utf8DecodeString (castPtr p) n
+ let utf8Decoded = utf8DecodeByteString output
liftIO $ putStrLn utf8Decoded
where
file = GHC.srcSpanFile pan
More information about the ghc-commits
mailing list