[commit: ghc] master: Improve StringBuffer and FastString docs (21dde81)
git at git.haskell.org
git at git.haskell.org
Sun Dec 18 01:02:15 UTC 2016
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/21dde8126d615a082648c916a3e20d9878f22517/ghc
>---------------------------------------------------------------
commit 21dde8126d615a082648c916a3e20d9878f22517
Author: Phil Ruffwind <rf at rufflewind.com>
Date: Sat Dec 17 18:09:06 2016 -0500
Improve StringBuffer and FastString docs
This area of code contains a lot of unsafe functionality, so it might be
worth documenting to reduce the risk of misuse.
Test Plan: inspection
Reviewers: austin, bgamari
Reviewed By: bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D2872
>---------------------------------------------------------------
21dde8126d615a082648c916a3e20d9878f22517
compiler/utils/FastString.hs | 13 +++++++++++++
compiler/utils/StringBuffer.hs | 42 ++++++++++++++++++++++++++++++++++++++----
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 8f76584..8d1bbb5 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -23,6 +23,10 @@
-- * Outputing them is fast.
-- * Generated by 'sLit'.
-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+-- * Requires manual memory management.
+-- Improper use may lead to memory leaks or dangling pointers.
+-- * It assumes Latin-1 as the encoding, therefore it cannot represent
+-- arbitrary Unicode strings.
--
-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
@@ -560,14 +564,19 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
-- -----------------------------------------------------------------------------
-- LitStrings, here for convenience only.
+-- | A 'LitString' is a pointer to some null-terminated array of bytes.
type LitString = Ptr Word8
--Why do we recalculate length every time it's requested?
--If it's commonly needed, we should perhaps have
--data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int#
+-- | Wrap an unboxed address into a 'LitString'.
mkLitString# :: Addr# -> LitString
mkLitString# a# = Ptr a#
+-- | Encode a 'String' into a newly allocated 'LitString' using Latin-1
+-- encoding. The original string must not contain non-Latin-1 characters
+-- (above codepoint @0xff@).
{-# INLINE mkLitString #-}
mkLitString :: String -> LitString
mkLitString s =
@@ -583,9 +592,13 @@ mkLitString s =
return p
)
+-- | Decode a 'LitString' back into a 'String' using Latin-1 encoding.
+-- This does not free the memory associated with 'LitString'.
unpackLitString :: LitString -> String
unpackLitString (Ptr p) = unpackCString# p
+-- | Compute the length of a 'LitString', which must necessarily be
+-- null-terminated.
lengthLS :: LitString -> Int
lengthLS = ptrStrLength
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 7da9f6c..bac752a 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -89,6 +89,8 @@ instance Show StringBuffer where
-- -----------------------------------------------------------------------------
-- Creation / Destruction
+-- | Read a file into a 'StringBuffer'. The resulting buffer is automatically
+-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
h <- openBinaryFile fname ReadMode
@@ -161,6 +163,8 @@ appendStringBuffers sb1 sb2
calcLen sb = len sb - cur sb
size = sb1_len + sb2_len
+-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer
+-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer str =
unsafePerformIO $ do
@@ -175,10 +179,15 @@ stringToStringBuffer str =
-- -----------------------------------------------------------------------------
-- Grab a character
--- Getting our fingers dirty a little here, but this is performance-critical
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well
+-- the remaining portion (analogous to 'Data.List.uncons'). __Warning:__ The
+-- behavior is undefined if the 'StringBuffer' is empty. The result shares
+-- the same buffer as the original. Similar to 'utf8DecodeChar', if the
+-- character cannot be decoded as UTF-8, '\0' is returned.
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar (StringBuffer buf len (I# cur#)) =
+ -- Getting our fingers dirty a little here, but this is performance-critical
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
case utf8DecodeChar# (a# `plusAddr#` cur#) of
@@ -186,6 +195,10 @@ nextChar (StringBuffer buf len (I# cur#)) =
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
+-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character
+-- cannot be decoded as UTF-8, '\0' is returned.
currentChar :: StringBuffer -> Char
currentChar = fst . nextChar
@@ -200,29 +213,50 @@ prevChar (StringBuffer buf _ cur) _ =
-- -----------------------------------------------------------------------------
-- Moving
+-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous
+-- to 'Data.List.tail'). __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty. The result shares the same buffer as the
+-- original.
stepOn :: StringBuffer -> StringBuffer
stepOn s = snd (nextChar s)
-offsetBytes :: Int -> StringBuffer -> StringBuffer
+-- | Return a 'StringBuffer' with the first @n@ bytes removed. __Warning:__
+-- If there aren't enough characters, the returned 'StringBuffer' will be
+-- invalid and any use of it may lead to undefined behavior. The result
+-- shares the same buffer as the original.
+offsetBytes :: Int -- ^ @n@, the number of bytes
+ -> StringBuffer
+ -> StringBuffer
offsetBytes i s = s { cur = cur s + i }
+-- | Compute the difference in offset between two 'StringBuffer's that share
+-- the same buffer. __Warning:__ The behavior is undefined if the
+-- 'StringBuffer's use separate buffers.
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff s1 s2 = cur s2 - cur s1
+-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer _ l c) = l == c
-- -----------------------------------------------------------------------------
-- Conversion
-lexemeToString :: StringBuffer -> Int {-bytes-} -> String
+-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'.
+-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8,
+-- they will be replaced with '\0'.
+lexemeToString :: StringBuffer
+ -> Int -- ^ @n@, the number of bytes
+ -> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
inlinePerformIO $
withForeignPtr buf $ \ptr ->
utf8DecodeString (ptr `plusPtr` cur) bytes
-lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
+lexemeToFastString :: StringBuffer
+ -> Int -- ^ @n@, the number of bytes
+ -> FastString
lexemeToFastString _ 0 = nilFS
lexemeToFastString (StringBuffer buf _ cur) len =
inlinePerformIO $
More information about the ghc-commits
mailing list