[commit: ghc] wip/T12357-unpack: FastString: Lazily unpack strings (c003779)
git at git.haskell.org
git at git.haskell.org
Tue Jul 5 08:00:14 UTC 2016
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/T12357-unpack
Link : http://ghc.haskell.org/trac/ghc/changeset/c0037797a5e0e8a42b70cdd077d20b22b2e19d7e/ghc
>---------------------------------------------------------------
commit c0037797a5e0e8a42b70cdd077d20b22b2e19d7e
Author: Ben Gamari <ben at smart-cactus.org>
Date: Mon Jul 4 19:58:06 2016 -0400
FastString: Lazily unpack strings
Previously we would eagerly build a [Char] of the full string contents,
resulting in unnecessarily high allocations in cases where only a short
bit at the beginning of the string is called for. See #12357 for
motivation.
>---------------------------------------------------------------
c0037797a5e0e8a42b70cdd077d20b22b2e19d7e
compiler/utils/Encoding.hs | 17 +++++++++++++++++
compiler/utils/FastString.hs | 4 +---
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs
index ae727d2..d959671 100644
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -18,6 +18,7 @@ module Encoding (
utf8CharStart,
utf8DecodeChar,
utf8DecodeString,
+ utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
utf8EncodedLength,
@@ -32,6 +33,9 @@ import Foreign
import Data.Char
import Numeric
import ExtsCompat46
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
+import System.IO.Unsafe ( unsafeInterleaveIO )
-- -----------------------------------------------------------------------------
-- UTF-8
@@ -110,6 +114,19 @@ utf8CharStart p = go p
then go (p `plusPtr` (-1))
else return p
+utf8DecodeStringLazy :: BS.ByteString -> IO [Char]
+utf8DecodeStringLazy bs
+ = unpack bs
+ where
+ unpack bs
+ | BS.null bs = return []
+ | otherwise =
+ BS.unsafeUseAsCString bs $ \ptr ->
+ case utf8DecodeChar (castPtr ptr) of
+ (c, nBytes) -> do
+ chs <- unsafeInterleaveIO $ unpack (BS.drop nBytes bs)
+ return (c : chs)
+
utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
utf8DecodeString ptr len
= unpack ptr
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 40c3882..21cbfeb 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -482,9 +482,7 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) =
- inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
- utf8DecodeString (castPtr ptr) len
+unpackFS (FastString _ _ bs _) = inlinePerformIO $ utf8DecodeStringLazy bs
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
More information about the ghc-commits
mailing list