[commit: ghc] wip/T12357-unpack: FastString: Lazily unpack strings (c003779)

git at git.haskell.org git at git.haskell.org
Tue Jul 5 08:00:14 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/T12357-unpack
Link       : http://ghc.haskell.org/trac/ghc/changeset/c0037797a5e0e8a42b70cdd077d20b22b2e19d7e/ghc

>---------------------------------------------------------------

commit c0037797a5e0e8a42b70cdd077d20b22b2e19d7e
Author: Ben Gamari <ben at smart-cactus.org>
Date:   Mon Jul 4 19:58:06 2016 -0400

    FastString: Lazily unpack strings
    
    Previously we would eagerly build a [Char] of the full string contents,
    resulting in unnecessarily high allocations in cases where only a short
    bit at the beginning of the string is called for. See #12357 for
    motivation.


>---------------------------------------------------------------

c0037797a5e0e8a42b70cdd077d20b22b2e19d7e
 compiler/utils/Encoding.hs   | 17 +++++++++++++++++
 compiler/utils/FastString.hs |  4 +---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index ae727d2..d959671 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -18,6 +18,7 @@ module Encoding (
         utf8CharStart,
         utf8DecodeChar,
         utf8DecodeString,
+        utf8DecodeStringLazy,
         utf8EncodeChar,
         utf8EncodeString,
         utf8EncodedLength,
@@ -32,6 +33,9 @@ import Foreign
 import Data.Char
 import Numeric
 import ExtsCompat46
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe   as BS
+import System.IO.Unsafe ( unsafeInterleaveIO )
 
 -- -----------------------------------------------------------------------------
 -- UTF-8
@@ -110,6 +114,19 @@ utf8CharStart p = go p
                         then go (p `plusPtr` (-1))
                         else return p
 
+utf8DecodeStringLazy :: BS.ByteString -> IO [Char]
+utf8DecodeStringLazy bs
+  = unpack bs
+  where
+    unpack bs
+        | BS.null bs = return []
+        | otherwise  =
+          BS.unsafeUseAsCString bs $ \ptr ->
+            case utf8DecodeChar (castPtr ptr) of
+              (c, nBytes) -> do
+                chs <- unsafeInterleaveIO $ unpack (BS.drop nBytes bs)
+                return (c : chs)
+
 utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
 utf8DecodeString ptr len
   = unpack ptr
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 40c3882..21cbfeb 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -482,9 +482,7 @@ nullFS f = BS.null (fs_bs f)
 
 -- | Unpacks and decodes the FastString
 unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) =
-  inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
-        utf8DecodeString (castPtr ptr) len
+unpackFS (FastString _ _ bs _) = inlinePerformIO $ utf8DecodeStringLazy bs
 
 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
 bytesFS :: FastString -> [Word8]



More information about the ghc-commits mailing list