[commit: ghc] master: utils: Lazily decode UTF8 strings (1cc82d3)

git at git.haskell.org git at git.haskell.org
Tue Apr 18 00:35:35 UTC 2017


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1/ghc

>---------------------------------------------------------------

commit 1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1
Author: Ben Gamari <bgamari.foss at gmail.com>
Date:   Mon Apr 17 12:26:56 2017 -0400

    utils: Lazily decode UTF8 strings
    
    Reviewers: austin, hvr
    
    Subscribers: rwbarton, thomie
    
    GHC Trac Issues: #13527
    
    Differential Revision: https://phabricator.haskell.org/D3442


>---------------------------------------------------------------

1cc82d38759c7a5f527ccc6cb514b8ba576cc3d1
 compiler/utils/Encoding.hs     | 34 +++++++++++++++++++++++-----------
 compiler/utils/FastString.hs   |  4 +---
 compiler/utils/StringBuffer.hs |  4 +---
 ghc/GHCi/UI.hs                 |  3 +--
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index 712de6c..f2b0979 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -17,7 +17,8 @@ module Encoding (
         utf8PrevChar,
         utf8CharStart,
         utf8DecodeChar,
-        utf8DecodeString,
+        utf8DecodeByteString,
+        utf8DecodeStringLazy,
         utf8EncodeChar,
         utf8EncodeString,
         utf8EncodedLength,
@@ -33,9 +34,15 @@ module Encoding (
   ) where
 
 import Foreign
+import Foreign.ForeignPtr.Unsafe
 import Data.Char
 import qualified Data.Char as Char
 import Numeric
+import GHC.IO
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Internal as BS
+
 import GHC.Exts
 
 -- -----------------------------------------------------------------------------
@@ -115,19 +122,24 @@ utf8CharStart p = go p
                         then go (p `plusPtr` (-1))
                         else return p
 
-utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
-utf8DecodeString ptr len
-  = unpack ptr
+utf8DecodeByteString :: ByteString -> [Char]
+utf8DecodeByteString (BS.PS ptr offset len)
+  = utf8DecodeStringLazy ptr offset len
+
+utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
+utf8DecodeStringLazy fptr offset len
+  = unsafeDupablePerformIO $ unpack start
   where
-    !end = ptr `plusPtr` len
+    !start = unsafeForeignPtrToPtr fptr `plusPtr` offset
+    !end = start `plusPtr` len
 
     unpack p
-        | p >= end = return []
-        | otherwise  =
-        case utf8DecodeChar# (unPtr p) of
-           (# c#, nBytes# #) -> do
-                chs <- unpack (p `plusPtr#` nBytes#)
-                return (C# c# : chs)
+        | p >= end  = touchForeignPtr fptr >> return []
+        | otherwise =
+            case utf8DecodeChar# (unPtr p) of
+                (# c#, nBytes# #) -> do
+                  rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#)
+                  return (C# c# : rest)
 
 countUTF8Chars :: Ptr Word8 -> Int -> IO Int
 countUTF8Chars ptr len = go ptr 0
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 8d1bbb5..8653485 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -485,9 +485,7 @@ nullFS f = BS.null (fs_bs f)
 
 -- | Unpacks and decodes the FastString
 unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) =
-  inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
-        utf8DecodeString (castPtr ptr) len
+unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
 
 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
 bytesFS :: FastString -> [Word8]
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index ec5184a..fcc3445 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -251,9 +251,7 @@ lexemeToString :: StringBuffer
                -> String
 lexemeToString _ 0 = ""
 lexemeToString (StringBuffer buf _ cur) bytes =
-  inlinePerformIO $
-    withForeignPtr buf $ \ptr ->
-      utf8DecodeString (ptr `plusPtr` cur) bytes
+  utf8DecodeStringLazy buf cur bytes
 
 lexemeToFastString :: StringBuffer
                    -> Int               -- ^ @n@, the number of bytes
diff --git a/ghc/GHCi/UI.hs b/ghc/GHCi/UI.hs
index deee24a..aeab85b 100644
--- a/ghc/GHCi/UI.hs
+++ b/ghc/GHCi/UI.hs
@@ -3525,8 +3525,7 @@ listAround pan do_highlight = do
           prefixed = zipWith ($) highlighted bs_line_nos
           output   = BS.intercalate (BS.pack "\n") prefixed
 
-      utf8Decoded <- liftIO $ BS.useAsCStringLen output
-                        $ \(p,n) -> utf8DecodeString (castPtr p) n
+      let utf8Decoded = utf8DecodeByteString output
       liftIO $ putStrLn utf8Decoded
   where
         file  = GHC.srcSpanFile pan



More information about the ghc-commits mailing list