[Git][ghc/ghc][wip/no-fptr] 7 commits: GHC.Data.ByteArray: Initial commit
Ben Gamari
gitlab at gitlab.haskell.org
Wed Nov 25 02:06:39 UTC 2020
Ben Gamari pushed to branch wip/no-fptr at Glasgow Haskell Compiler / GHC
Commits:
ece52044 by Ben Gamari at 2020-11-24T21:06:18-05:00
GHC.Data.ByteArray: Initial commit
- - - - -
292bd5b4 by Ben Gamari at 2020-11-24T21:06:22-05:00
StringBuffer: Rid it of ForeignPtrs
Bumps haddock submodule.
- - - - -
8b53778c by Ben Gamari at 2020-11-24T21:06:22-05:00
GHC.Utils.Binary: Eliminate allocating withForeignPtr uses
- - - - -
a7f9997f by Ben Gamari at 2020-11-24T21:06:22-05:00
base: Eliminate allocating withForeignPtrs from GHC.Event.Array
- - - - -
389717fc by Ben Gamari at 2020-11-24T21:06:22-05:00
base: Use unsafeWithForeignPtr in GHC.IO.Buffer
- - - - -
240498b1 by Ben Gamari at 2020-11-24T21:06:22-05:00
GHC.Event.Array: Use unsafeWithForeignPtr
- - - - -
0f4327d8 by Ben Gamari at 2020-11-24T21:06:22-05:00
Bump bytestring submodule
Teach it to use unsafeWithForeignPtr where appropriate.
- - - - -
10 changed files:
- + compiler/GHC/Data/ByteArray.hs
- compiler/GHC/Data/StringBuffer.hs
- compiler/GHC/Utils/Binary.hs
- compiler/ghc.cabal.in
- libraries/base/GHC/Event/Array.hs
- libraries/base/GHC/IO/Buffer.hs
- libraries/bytestring
- libraries/ghc-boot/GHC/Utils/Encoding.hs
- testsuite/tests/parser/should_run/CountParserDeps.stdout
- utils/haddock
Changes:
=====================================
compiler/GHC/Data/ByteArray.hs
=====================================
@@ -0,0 +1,280 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE MagicHash #-}
+
+module GHC.Data.ByteArray
+ ( -- * Immutable byte arrays
+ ByteArray
+ , getByteArray
+ , unsafeByteArrayContents
+ , withByteArrayContents
+ , sizeofByteArray
+
+ -- * Mutable byte arrays
+ , MutableByteArray
+ , getMutableByteArray
+ , unsafeMutableByteArrayContents
+ , newMutableByteArray
+ , newPinnedMutableByteArray
+ , copyByteArray
+ , copyAddrToMutableByteArray
+ , unsafeFreezeByteArray
+
+ -- * Writing
+ , writeWordArray
+ , writeWord8Array
+ , writeWord16Array
+ , writeWord32Array
+ , writeWord64Array
+ , writeIntArray
+ , writeInt8Array
+ , writeInt16Array
+ , writeInt32Array
+ , writeInt64Array
+ , writeCharArray
+
+ -- * Reading
+ , readWordArray
+ , readWord8Array
+ , readWord16Array
+ , readWord32Array
+ , readWord64Array
+ , readIntArray
+ , readInt8Array
+ , readInt16Array
+ , readInt32Array
+ , readInt64Array
+ , readCharArray
+
+ -- * Immutable indexing
+ , indexWordArray
+ , indexWord8Array
+ , indexWord16Array
+ , indexWord32Array
+ , indexWord64Array
+ , indexIntArray
+ , indexInt8Array
+ , indexInt16Array
+ , indexInt32Array
+ , indexInt64Array
+ , indexCharArray
+ ) where
+
+import GHC.Base
+import GHC.Exts
+import GHC.Word
+import GHC.Int
+import Unsafe.Coerce
+
+data MutableByteArray = MutableByteArray { getMutableByteArray :: !(MutableByteArray# RealWorld) }
+
+data ByteArray = ByteArray { getByteArray :: !ByteArray# }
+
+unsafeByteArrayContents :: ByteArray -> Ptr a
+unsafeByteArrayContents (ByteArray ba) = Ptr (byteArrayContents# ba)
+
+unsafeMutableByteArrayContents :: MutableByteArray -> Ptr a
+unsafeMutableByteArrayContents = unsafeByteArrayContents . unsafeCoerce
+
+withByteArrayContents :: ByteArray -> (Ptr a -> IO b) -> IO b
+withByteArrayContents (ByteArray ba) f = do
+ r <- f $ Ptr (byteArrayContents# ba)
+ IO $ \s -> case touch# ba s of s' -> (# s', () #)
+ return r
+
+newMutableByteArray :: Int -> IO MutableByteArray
+newMutableByteArray (I# size) = IO $ \s ->
+ case newByteArray# size s of
+ (# s', mba #) -> (# s', MutableByteArray mba #)
+
+newPinnedMutableByteArray :: Int -> IO MutableByteArray
+newPinnedMutableByteArray (I# size) = IO $ \s ->
+ case newPinnedByteArray# size s of
+ (# s', mba #) -> (# s', MutableByteArray mba #)
+
+copyByteArray
+ :: ByteArray -- ^ source
+ -> Int -- ^ source offset
+ -> MutableByteArray -- ^ destination
+ -> Int -- ^ destination offset
+ -> Int -- ^ length to copy
+ -> IO ()
+copyByteArray (ByteArray src) (I# src_ofs) (MutableByteArray dst) (I# dst_ofs) (I# len) =
+ IO $ \s ->
+ case copyByteArray# src src_ofs dst dst_ofs len s of
+ s' -> (# s', () #)
+
+copyAddrToMutableByteArray :: Ptr a -> MutableByteArray -> Int -> Int -> IO ()
+copyAddrToMutableByteArray (Ptr src) (MutableByteArray dst) (I# dst_ofs) (I# len) = IO $ \s ->
+ case copyAddrToByteArray# src dst dst_ofs len s of
+ s' -> (# s', () #)
+
+unsafeFreezeByteArray
+ :: MutableByteArray
+ -> IO ByteArray
+unsafeFreezeByteArray (MutableByteArray mba) = IO $ \s ->
+ case unsafeFreezeByteArray# mba s of
+ (# s', ba #) -> (# s', ByteArray ba #)
+
+sizeofByteArray :: ByteArray -> Int
+sizeofByteArray (ByteArray arr) = I# (sizeofByteArray# arr)
+
+
+readWordArray :: MutableByteArray -> Int -> IO Word
+readWordArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readWordArray# arr ix s0 of
+ (# s1, r #) -> (# s1, W# r #)
+
+readWord8Array :: MutableByteArray -> Int -> IO Word8
+readWord8Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readWord8Array# arr ix s0 of
+ (# s1, r #) -> (# s1, W8# r #)
+
+readWord16Array :: MutableByteArray -> Int -> IO Word16
+readWord16Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readWord16Array# arr ix s0 of
+ (# s1, r #) -> (# s1, W16# r #)
+
+readWord32Array :: MutableByteArray -> Int -> IO Word32
+readWord32Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readWord32Array# arr ix s0 of
+ (# s1, r #) -> (# s1, W32# r #)
+
+readWord64Array :: MutableByteArray -> Int -> IO Word64
+readWord64Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readWord64Array# arr ix s0 of
+ (# s1, r #) -> (# s1, W64# r #)
+
+readIntArray :: MutableByteArray -> Int -> IO Int
+readIntArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readIntArray# arr ix s0 of
+ (# s1, r #) -> (# s1, I# r #)
+
+readInt8Array :: MutableByteArray -> Int -> IO Int8
+readInt8Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readInt8Array# arr ix s0 of
+ (# s1, r #) -> (# s1, I8# r #)
+
+readInt16Array :: MutableByteArray -> Int -> IO Int16
+readInt16Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readInt16Array# arr ix s0 of
+ (# s1, r #) -> (# s1, I16# r #)
+
+readInt32Array :: MutableByteArray -> Int -> IO Int32
+readInt32Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readInt32Array# arr ix s0 of
+ (# s1, r #) -> (# s1, I32# r #)
+
+readInt64Array :: MutableByteArray -> Int -> IO Int64
+readInt64Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readInt64Array# arr ix s0 of
+ (# s1, r #) -> (# s1, I64# r #)
+
+readCharArray :: MutableByteArray -> Int -> IO Char
+readCharArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+ case readCharArray# arr ix s0 of
+ (# s1, r #) -> (# s1, C# r #)
+
+
+
+writeWordArray :: MutableByteArray -> Int -> Word -> IO ()
+writeWordArray (MutableByteArray arr) (I# ix) (W# x) = IO $ \s0 ->
+ case writeWordArray# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeWord8Array :: MutableByteArray -> Int -> Word8 -> IO ()
+writeWord8Array (MutableByteArray arr) (I# ix) (W8# x) = IO $ \s0 ->
+ case writeWord8Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeWord16Array :: MutableByteArray -> Int -> Word16 -> IO ()
+writeWord16Array (MutableByteArray arr) (I# ix) (W16# x) = IO $ \s0 ->
+ case writeWord16Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeWord32Array :: MutableByteArray -> Int -> Word32 -> IO ()
+writeWord32Array (MutableByteArray arr) (I# ix) (W32# x) = IO $ \s0 ->
+ case writeWord32Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeWord64Array :: MutableByteArray -> Int -> Word64 -> IO ()
+writeWord64Array (MutableByteArray arr) (I# ix) (W64# x) = IO $ \s0 ->
+ case writeWord64Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeIntArray :: MutableByteArray -> Int -> Int -> IO ()
+writeIntArray (MutableByteArray arr) (I# ix) (I# x) = IO $ \s0 ->
+ case writeIntArray# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeInt8Array :: MutableByteArray -> Int -> Int8 -> IO ()
+writeInt8Array (MutableByteArray arr) (I# ix) (I8# x) = IO $ \s0 ->
+ case writeInt8Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeInt16Array :: MutableByteArray -> Int -> Int16 -> IO ()
+writeInt16Array (MutableByteArray arr) (I# ix) (I16# x) = IO $ \s0 ->
+ case writeInt16Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeInt32Array :: MutableByteArray -> Int -> Int32 -> IO ()
+writeInt32Array (MutableByteArray arr) (I# ix) (I32# x) = IO $ \s0 ->
+ case writeInt32Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeInt64Array :: MutableByteArray -> Int -> Int64 -> IO ()
+writeInt64Array (MutableByteArray arr) (I# ix) (I64# x) = IO $ \s0 ->
+ case writeInt64Array# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+writeCharArray :: MutableByteArray -> Int -> Char -> IO ()
+writeCharArray (MutableByteArray arr) (I# ix) (C# x) = IO $ \s0 ->
+ case writeCharArray# arr ix x s0 of
+ s1 -> (# s1, () #)
+
+
+
+indexWordArray :: ByteArray -> Int -> Word
+indexWordArray (ByteArray arr) (I# ix) =
+ W# (indexWordArray# arr ix)
+
+indexWord8Array :: ByteArray -> Int -> Word8
+indexWord8Array (ByteArray arr) (I# ix) =
+ W8# (indexWord8Array# arr ix)
+
+indexWord16Array :: ByteArray -> Int -> Word16
+indexWord16Array (ByteArray arr) (I# ix) =
+ W16# (indexWord16Array# arr ix)
+
+indexWord32Array :: ByteArray -> Int -> Word32
+indexWord32Array (ByteArray arr) (I# ix) =
+ W32# (indexWord32Array# arr ix)
+
+indexWord64Array :: ByteArray -> Int -> Word64
+indexWord64Array (ByteArray arr) (I# ix) =
+ W64# (indexWord64Array# arr ix)
+
+indexIntArray :: ByteArray -> Int -> Int
+indexIntArray (ByteArray arr) (I# ix) =
+ I# (indexIntArray# arr ix)
+
+indexInt8Array :: ByteArray -> Int -> Int8
+indexInt8Array (ByteArray arr) (I# ix) =
+ I8# (indexInt8Array# arr ix)
+
+indexInt16Array :: ByteArray -> Int -> Int16
+indexInt16Array (ByteArray arr) (I# ix) =
+ I16# (indexInt16Array# arr ix)
+
+indexInt32Array :: ByteArray -> Int -> Int32
+indexInt32Array (ByteArray arr) (I# ix) =
+ I32# (indexInt32Array# arr ix)
+
+indexInt64Array :: ByteArray -> Int -> Int64
+indexInt64Array (ByteArray arr) (I# ix) =
+ I64# (indexInt64Array# arr ix)
+
+indexCharArray :: ByteArray -> Int -> Char
+indexCharArray (ByteArray arr) (I# ix) =
+ C# (indexCharArray# arr ix)
+
=====================================
compiler/GHC/Data/StringBuffer.hs
=====================================
@@ -17,7 +17,7 @@ Buffers for scanning string input stored in external arrays.
module GHC.Data.StringBuffer
(
- StringBuffer(..),
+ StringBuffer, len, cur,
-- non-abstract for vs\/HaskellService
-- * Creation\/destruction
@@ -26,6 +26,7 @@ module GHC.Data.StringBuffer
hPutStringBuffer,
appendStringBuffers,
stringToStringBuffer,
+ byteStringToStringBuffer,
-- * Inspection
nextChar,
@@ -54,10 +55,13 @@ import GHC.Prelude
import GHC.Utils.Encoding
import GHC.Data.FastString
+import GHC.Data.ByteArray
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
import GHC.Utils.Misc
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
import Data.Maybe
import Control.Exception
import System.IO
@@ -65,6 +69,7 @@ import System.IO.Unsafe ( unsafePerformIO )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) )
+import GHC.Word
import GHC.Exts
import Foreign
@@ -81,7 +86,7 @@ import Foreign
--
data StringBuffer
= StringBuffer {
- buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+ buf :: {-# UNPACK #-} !ByteArray,
len :: {-# UNPACK #-} !Int, -- length
cur :: {-# UNPACK #-} !Int -- current pos
}
@@ -102,34 +107,35 @@ instance Show StringBuffer where
-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
- h <- openBinaryFile fname ReadMode
- size_i <- hFileSize h
- offset_i <- skipBOM h size_i 0 -- offset is 0 initially
- let size = fromIntegral $ size_i - offset_i
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr -> do
- r <- if size == 0 then return 0 else hGetBuf h ptr size
- hClose h
- if (r /= size)
- then ioError (userError "short read of file")
- else newUTF8StringBuffer buf ptr size
+ h <- openBinaryFile fname ReadMode
+ size_i <- hFileSize h
+ offset_i <- skipBOM h size_i 0 -- offset is 0 initially
+ let size = fromIntegral $ size_i - offset_i
+ buf <- newPinnedMutableByteArray (size+3)
+ r <- if size == 0
+ then return 0
+ else hGetBuf h (unsafeMutableByteArrayContents buf) size
+ hClose h
+ if r /= size
+ then ioError (userError "short read of file")
+ else newUTF8StringBuffer buf size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
-hGetStringBufferBlock handle wanted
- = do size_i <- hFileSize handle
- offset_i <- hTell handle >>= skipBOM handle size_i
- let size = min wanted (fromIntegral $ size_i-offset_i)
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr ->
- do r <- if size == 0 then return 0 else hGetBuf handle ptr size
- if r /= size
- then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
- else newUTF8StringBuffer buf ptr size
+hGetStringBufferBlock handle wanted = do
+ size_i <- hFileSize handle
+ offset_i <- hTell handle >>= skipBOM handle size_i
+ let size = min wanted (fromIntegral $ size_i-offset_i)
+ buf <- newPinnedMutableByteArray (size+3)
+ r <- if size == 0
+ then return 0
+ else hGetBuf handle (unsafeMutableByteArrayContents buf) size
+ if r /= size
+ then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+ else newUTF8StringBuffer buf size
hPutStringBuffer :: Handle -> StringBuffer -> IO ()
-hPutStringBuffer hdl (StringBuffer buf len cur)
- = withForeignPtr (plusForeignPtr buf cur) $ \ptr ->
- hPutBuf hdl ptr len
+hPutStringBuffer hdl (StringBuffer buf len cur) = do
+ withByteArrayContents buf $ \ptr -> hPutBuf hdl (ptr `plusPtr` cur) len
-- | Skip the byte-order mark if there is one (see #1744 and #6016),
-- and return the new position of the handle in bytes.
@@ -156,39 +162,45 @@ skipBOM h size offset =
where
safeEncoding = mkUTF8 IgnoreCodingFailure
-newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
-newUTF8StringBuffer buf ptr size = do
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+-- | @newUTF8StringBuffer buf size@ creates a 'StringBuffer' from a
+-- 'MutableByteArray' of length @size+3@ containing UTF-8 encoded text. A three
+-- byte sentinel will be added to the end of the buffer.
+newUTF8StringBuffer :: MutableByteArray -> Int -> IO StringBuffer
+newUTF8StringBuffer buf size = do
-- sentinels for UTF-8 decoding
- return $ StringBuffer buf size 0
+ writeWord8Array buf (size+0) 0
+ writeWord8Array buf (size+1) 0
+ writeWord8Array buf (size+3) 0
+ buf' <- unsafeFreezeByteArray buf
+ return $ StringBuffer buf' size 0
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
-appendStringBuffers sb1 sb2
- = do newBuf <- mallocForeignPtrArray (size+3)
- withForeignPtr newBuf $ \ptr ->
- withForeignPtr (buf sb1) $ \sb1Ptr ->
- withForeignPtr (buf sb2) $ \sb2Ptr ->
- do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
- copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
- pokeArray (ptr `advancePtr` size) [0,0,0]
- return (StringBuffer newBuf size 0)
- where sb1_len = calcLen sb1
- sb2_len = calcLen sb2
- calcLen sb = len sb - cur sb
- size = sb1_len + sb2_len
+appendStringBuffers sb1 sb2 = do
+ dst <- newPinnedMutableByteArray (size+3)
+ copyByteArray (buf sb1) (cur sb1) dst 0 sb1_len
+ copyByteArray (buf sb2) (cur sb2) dst sb1_len sb2_len
+ newUTF8StringBuffer dst size
+ where
+ sb1_len = calcLen sb1
+ sb2_len = calcLen sb2
+ calcLen sb = len sb - cur sb
+ size = sb1_len + sb2_len
+
+byteStringToStringBuffer :: BS.ByteString -> StringBuffer
+byteStringToStringBuffer bs = unsafePerformIO $ do
+ let size = BS.length bs
+ buf <- newPinnedMutableByteArray (size+3)
+ BS.unsafeUseAsCString bs (\p -> copyAddrToMutableByteArray p buf 0 size)
+ newUTF8StringBuffer buf size
-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer
-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
-stringToStringBuffer str =
- unsafePerformIO $ do
+stringToStringBuffer str = unsafePerformIO $ do
let size = utf8EncodedLength str
- buf <- mallocForeignPtrArray (size+3)
- withForeignPtr buf $ \ptr -> do
- utf8EncodeString ptr str
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- -- sentinels for UTF-8 decoding
- return (StringBuffer buf size 0)
+ buf <- newPinnedMutableByteArray (size+3)
+ utf8EncodeString (unsafeMutableByteArrayContents buf) str
+ newUTF8StringBuffer buf size
-- -----------------------------------------------------------------------------
-- Grab a character
@@ -202,12 +214,10 @@ stringToStringBuffer str =
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar (StringBuffer buf len (I# cur#)) =
-- Getting our fingers dirty a little here, but this is performance-critical
- inlinePerformIO $
- withForeignPtr buf $ \(Ptr a#) ->
- case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
- (# c#, nBytes# #) ->
- let cur' = I# (cur# +# nBytes#) in
- return (C# c#, StringBuffer buf len cur')
+ case utf8DecodeCharByteArray# (getByteArray buf) cur# of
+ (# c#, nBytes# #) ->
+ let cur' = I# (cur# +# nBytes#)
+ in (C# c#, StringBuffer buf len cur')
-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the
@@ -219,10 +229,9 @@ currentChar = fst . nextChar
prevChar :: StringBuffer -> Char -> Char
prevChar (StringBuffer _ _ 0) deflt = deflt
prevChar (StringBuffer buf _ cur) _ =
- inlinePerformIO $
- withForeignPtr buf $ \p -> do
- p' <- utf8PrevChar (p `plusPtr` cur)
- return (fst (utf8DecodeChar p'))
+ let !(I# p') = utf8PrevChar (getByteArray buf) cur
+ !(# c, _ #) = utf8DecodeCharByteArray# (getByteArray buf) p'
+ in C# c
-- -----------------------------------------------------------------------------
-- Moving
@@ -257,18 +266,18 @@ atEnd (StringBuffer _ l c) = l == c
-- wanted line. Lines begin at 1.
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine line sb@(StringBuffer buf len _) =
- inlinePerformIO $
- withForeignPtr buf $ \p -> do
- p' <- skipToLine line len p
- if p' == nullPtr
- then return Nothing
- else
- let
- delta = p' `minusPtr` p
- in return $ Just (sb { cur = delta
- , len = len - delta
- })
-
+ inlinePerformIO $ withByteArrayContents buf $ \p -> do
+ p' <- skipToLine line len p
+ if p' == nullPtr
+ then return Nothing
+ else
+ let !delta = p' `minusPtr` p
+ in return $ Just (sb { cur = delta
+ , len = len - delta
+ })
+
+-- | @skipToLine line len op0@ finds the byte offset to the beginning of
+-- the given line number.
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !line !len !op0 = go 1 op0
where
@@ -300,39 +309,42 @@ lexemeToString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
-> String
lexemeToString _ 0 = ""
-lexemeToString (StringBuffer buf _ cur) bytes =
- utf8DecodeStringLazy buf cur bytes
+lexemeToString (StringBuffer buf _ (I# cur#)) (I# bytes#) =
+ utf8DecodeByteArrayLazy# (getByteArray buf) cur# bytes#
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
-> FastString
lexemeToFastString _ 0 = nilFS
lexemeToFastString (StringBuffer buf _ cur) len =
- inlinePerformIO $
- withForeignPtr buf $ \ptr ->
- return $! mkFastStringBytes (ptr `plusPtr` cur) len
+ inlinePerformIO $
+ withByteArrayContents buf $ \ptr ->
+ return $! mkFastStringBytes (ptr `plusPtr` cur) len
-- | Return the previous @n@ characters (or fewer if we are less than @n@
-- characters into the buffer.
decodePrevNChars :: Int -> StringBuffer -> String
-decodePrevNChars n (StringBuffer buf _ cur) =
- inlinePerformIO $ withForeignPtr buf $ \p0 ->
- go p0 n "" (p0 `plusPtr` (cur - 1))
+decodePrevNChars n (StringBuffer buf0 _ cur) =
+ go (getByteArray buf0) (min n (cur - 1)) "" (cur - 1)
where
- go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
- go buf0 n acc p | n == 0 || buf0 >= p = return acc
- go buf0 n acc p = do
- p' <- utf8PrevChar p
- let (c,_) = utf8DecodeChar p'
- go buf0 (n - 1) (c:acc) p'
+ go :: ByteArray# -> Int -> String -> Int -> String
+ go buf n acc ofs
+ | n == 0 = acc
+ | otherwise =
+ let !ofs'@(I# ofs'#) = utf8PrevChar buf ofs
+ !(# c,_ #) = utf8DecodeCharByteArray# buf ofs'#
+ in go buf (n - 1) (C# c:acc) ofs'
-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
- = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
- go i x | i == len = x
- | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
- '_' -> go (i + 1) x -- skip "_" (#14473)
- char -> go (i + 1) (x * radix + toInteger (char_to_int char))
- in go 0 0
+parseUnsignedInteger (StringBuffer buf _ (I# cur)) (I# len) radix char_to_int
+ = go (len +# cur) cur 0
+ where
+ go :: Int# -> Int# -> Integer -> Integer
+ go end i !acc
+ | isTrue# (i ==# end) = acc
+ | otherwise =
+ case utf8DecodeCharByteArray# (getByteArray buf) i of
+ (# '_'#, _ #) -> go end (i +# 1#) acc -- skip "_" (#14473)
+ (# char, _ #) -> go end (i +# 1#) (acc * radix + toInteger (char_to_int (C# char)))
=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -6,6 +6,7 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- We always optimise this, otherwise performance of a non-optimised
@@ -84,6 +85,7 @@ import Data.Array
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
+import GHC.ForeignPtr
import Data.IORef
import Data.Char ( ord, chr )
import Data.Time
@@ -96,7 +98,10 @@ import GHC.Real ( Ratio(..) )
type BinArray = ForeignPtr Word8
-
+#if !MIN_VERSION_base(4,15,0)
+unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
+unsafeWithForeignPtr = withForeignPtr
+#endif
---------------------------------------------------------------
-- BinData
@@ -111,14 +116,14 @@ instance Binary BinData where
put_ bh (BinData sz dat) = do
put_ bh sz
putPrim bh sz $ \dest ->
- withForeignPtr dat $ \orig ->
+ unsafeWithForeignPtr dat $ \orig ->
copyBytes dest orig sz
--
get bh = do
sz <- get bh
dat <- mallocForeignPtrBytes sz
getPrim bh sz $ \orig ->
- withForeignPtr dat $ \dest ->
+ unsafeWithForeignPtr dat $ \dest ->
copyBytes dest orig sz
return (BinData sz dat)
@@ -226,7 +231,7 @@ writeBinMem (BinMem _ ix_r _ arr_r) fn = do
h <- openBinaryFile fn WriteMode
arr <- readIORef arr_r
ix <- readFastMutInt ix_r
- withForeignPtr arr $ \p -> hPutBuf h p ix
+ unsafeWithForeignPtr arr $ \p -> hPutBuf h p ix
hClose h
readBinMem :: FilePath -> IO BinHandle
@@ -236,7 +241,7 @@ readBinMem filename = do
filesize' <- hFileSize h
let filesize = fromIntegral filesize'
arr <- mallocForeignPtrBytes filesize
- count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
+ count <- unsafeWithForeignPtr arr $ \p -> hGetBuf h p filesize
when (count /= filesize) $
error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
hClose h
@@ -280,7 +285,7 @@ putPrim h@(BinMem _ ix_r sz_r arr_r) size f = do
when (ix + size > sz) $
expandBin h (ix + size)
arr <- readIORef arr_r
- withForeignPtr arr $ \op -> f (op `plusPtr` ix)
+ unsafeWithForeignPtr arr $ \op -> f (op `plusPtr` ix)
writeFastMutInt ix_r (ix + size)
-- -- | Similar to putPrim but advances the index by the actual number of
@@ -302,7 +307,10 @@ getPrim (BinMem _ ix_r sz_r arr_r) size f = do
when (ix + size > sz) $
ioError (mkIOError eofErrorType "Data.Binary.getPrim" Nothing Nothing)
arr <- readIORef arr_r
- w <- withForeignPtr arr $ \op -> f (op `plusPtr` ix)
+ w <- f (unsafeForeignPtrToPtr arr `plusPtr` ix)
+ touchForeignPtr arr
+ -- This is safe WRT #17760 as we we guarantee that the above line doesn't
+ -- diverge
writeFastMutInt ix_r (ix + size)
return w
=====================================
compiler/ghc.cabal.in
=====================================
@@ -356,6 +356,7 @@ Library
GHC.Data.Bag
GHC.Data.Bitmap
GHC.Data.BooleanFormula
+ GHC.Data.ByteArray
GHC.Data.EnumSet
GHC.Data.FastMutInt
GHC.Data.FastString
=====================================
libraries/base/GHC/Event/Array.hs
=====================================
@@ -33,7 +33,7 @@ import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import Foreign.Ptr (Ptr, nullPtr, plusPtr)
import Foreign.Storable (Storable(..))
import GHC.Base hiding (empty)
-import GHC.ForeignPtr (mallocPlainForeignPtrBytes, newForeignPtr_)
+import GHC.ForeignPtr (mallocPlainForeignPtrBytes, newForeignPtr_, unsafeWithForeignPtr)
import GHC.Num (Num(..))
import GHC.Real (fromIntegral)
import GHC.Show (show)
@@ -78,9 +78,9 @@ reallocArray p newSize oldSize = reallocHack undefined p
reallocHack dummy src = do
let size = sizeOf dummy
dst <- mallocPlainForeignPtrBytes (newSize * size)
- withForeignPtr src $ \s ->
+ unsafeWithForeignPtr src $ \s ->
when (s /= nullPtr && oldSize > 0) .
- withForeignPtr dst $ \d -> do
+ unsafeWithForeignPtr dst $ \d -> do
_ <- memcpy d s (fromIntegral (oldSize * size))
return ()
return dst
@@ -99,8 +99,8 @@ duplicate a = dupHack undefined a
dupHack dummy (Array ref) = do
AC es len cap <- readIORef ref
ary <- allocArray cap
- withForeignPtr ary $ \dest ->
- withForeignPtr es $ \src -> do
+ unsafeWithForeignPtr ary $ \dest ->
+ unsafeWithForeignPtr es $ \src -> do
_ <- memcpy dest src (fromIntegral (len * sizeOf dummy))
return ()
Array `fmap` newIORef (AC ary len cap)
@@ -119,8 +119,8 @@ unsafeRead :: Storable a => Array a -> Int -> IO a
unsafeRead (Array ref) ix = do
AC es _ cap <- readIORef ref
CHECK_BOUNDS("unsafeRead",cap,ix)
- withForeignPtr es $ \p ->
- peekElemOff p ix
+ unsafeWithForeignPtr es $ \ptr -> peekElemOff ptr ix
+ -- this is safe WRT #17760 as we assume that peekElemOff doesn't diverge
unsafeWrite :: Storable a => Array a -> Int -> a -> IO ()
unsafeWrite (Array ref) ix a = do
@@ -130,13 +130,15 @@ unsafeWrite (Array ref) ix a = do
unsafeWrite' :: Storable a => AC a -> Int -> a -> IO ()
unsafeWrite' (AC es _ cap) ix a =
CHECK_BOUNDS("unsafeWrite'",cap,ix)
- withForeignPtr es $ \p ->
- pokeElemOff p ix a
+ unsafeWithForeignPtr es $ \ptr -> pokeElemOff ptr ix a
+ -- this is safe WRT #17760 as we assume that peekElemOff doesn't diverge
+-- | Precondition: continuation must not diverge due to use of
+-- 'unsafeWithForeignPtr'.
unsafeLoad :: Array a -> (Ptr a -> Int -> IO Int) -> IO Int
unsafeLoad (Array ref) load = do
AC es _ cap <- readIORef ref
- len' <- withForeignPtr es $ \p -> load p cap
+ len' <- unsafeWithForeignPtr es $ \p -> load p cap
writeIORef ref (AC es len' cap)
return len'
@@ -146,7 +148,7 @@ unsafeCopyFromBuffer :: Storable a => Array a -> Ptr a -> Int -> IO ()
unsafeCopyFromBuffer (Array ref) sptr n =
readIORef ref >>= \(AC es _ cap) ->
CHECK_BOUNDS("unsafeCopyFromBuffer", cap, n)
- withForeignPtr es $ \pdest -> do
+ unsafeWithForeignPtr es $ \pdest -> do
let size = sizeOfPtr sptr undefined
_ <- memcpy pdest sptr (fromIntegral $ n * size)
writeIORef ref (AC es n cap)
@@ -198,7 +200,7 @@ forM_ ary g = forHack ary g undefined
AC es len _ <- readIORef ref
let size = sizeOf dummy
offset = len * size
- withForeignPtr es $ \p -> do
+ unsafeWithForeignPtr es $ \p -> do
let go n | n >= offset = return ()
| otherwise = do
f =<< peek (p `plusPtr` n)
@@ -269,8 +271,8 @@ copy' d dstart s sstart maxCount = copyHack d s undefined
then return dac
else do
AC dst dlen dcap <- ensureCapacity' dac (dstart + count)
- withForeignPtr dst $ \dptr ->
- withForeignPtr src $ \sptr -> do
+ unsafeWithForeignPtr dst $ \dptr ->
+ unsafeWithForeignPtr src $ \sptr -> do
_ <- memcpy (dptr `plusPtr` (dstart * size))
(sptr `plusPtr` (sstart * size))
(fromIntegral (count * size))
@@ -286,7 +288,7 @@ removeAt a i = removeHack a undefined
let size = sizeOf dummy
newLen = oldLen - 1
when (newLen > 0 && i < newLen) .
- withForeignPtr fp $ \ptr -> do
+ unsafeWithForeignPtr fp $ \ptr -> do
_ <- memmove (ptr `plusPtr` (size * i))
(ptr `plusPtr` (size * (i+1)))
(fromIntegral (size * (newLen-i)))
=====================================
libraries/base/GHC/IO/Buffer.hs
=====================================
@@ -73,6 +73,7 @@ import GHC.Show
import GHC.Real
import GHC.List
import GHC.ForeignPtr.Ops
+import GHC.ForeignPtr (unsafeWithForeignPtr)
import Foreign.C.Types
import Foreign.ForeignPtr
import Foreign.Storable
@@ -118,17 +119,17 @@ type CharBufElem = Char
type RawCharBuffer = RawBuffer CharBufElem
peekCharBuf :: RawCharBuffer -> Int -> IO Char
-peekCharBuf arr ix = withForeignPtr arr $ \p -> do
+peekCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> do
(c,_) <- readCharBufPtr p ix
return c
{-# INLINE readCharBuf #-}
readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int)
-readCharBuf arr ix = withForeignPtr arr $ \p -> readCharBufPtr p ix
+readCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> readCharBufPtr p ix
{-# INLINE writeCharBuf #-}
writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int
-writeCharBuf arr ix c = withForeignPtr arr $ \p -> writeCharBufPtr p ix c
+writeCharBuf arr ix c = unsafeWithForeignPtr arr $ \p -> writeCharBufPtr p ix c
{-# INLINE readCharBufPtr #-}
readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int)
=====================================
libraries/bytestring
=====================================
@@ -1 +1 @@
-Subproject commit e043aacfc4202a59ccae8b8c8cf0e1ad83a3f209
+Subproject commit 4cbf0cd6053411139a08ff67c7ec5eae1da87b03
=====================================
libraries/ghc-boot/GHC/Utils/Encoding.hs
=====================================
@@ -17,12 +17,15 @@
module GHC.Utils.Encoding (
-- * UTF-8
utf8DecodeCharAddr#,
+ utf8DecodeCharByteArray#,
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
utf8DecodeByteString,
+ utf8DecodeByteArray,
utf8DecodeShortByteString,
utf8CompareShortByteString,
+ utf8DecodeByteArrayLazy#,
utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
@@ -53,6 +56,7 @@ import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import Data.ByteString.Short.Internal (ShortByteString(..))
+import GHC.Word
import GHC.Exts
-- -----------------------------------------------------------------------------
@@ -131,15 +135,17 @@ utf8DecodeChar !(Ptr a#) =
-- the start of the current character is, given any position in a
-- stream. This function finds the start of the previous character,
-- assuming there *is* a previous character.
-utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
-utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+utf8PrevChar :: ByteArray# -> Int -> Int
+utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1)
-utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
-utf8CharStart p = go p
- where go p = do w <- peek p
- if w >= 0x80 && w < 0xC0
- then go (p `plusPtr` (-1))
- else return p
+utf8CharStart :: ByteArray# -> Int -> Int
+utf8CharStart = go
+ where
+ go arr ofs@(I# ofs#)
+ | w >= 0x80 && w < 0xC0 = go arr (ofs - 1)
+ | otherwise = ofs
+ where
+ w = W8# (indexWord8Array# arr ofs#)
{-# INLINE utf8DecodeLazy# #-}
utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
@@ -158,6 +164,12 @@ utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString (BS.PS fptr offset len)
= utf8DecodeStringLazy fptr offset len
+utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char]
+utf8DecodeByteArrayLazy# a# offset# len#
+ = unsafeDupablePerformIO $
+ let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#)
+ in utf8DecodeLazy# (return ()) decodeChar len#
+
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fp offset (I# len#)
= unsafeDupablePerformIO $ do
@@ -200,12 +212,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0#
| isTrue# (b1_1 `ltWord#` b2_1) -> LT
| otherwise -> go (off1 +# 1#) (off2 +# 1#)
-utf8DecodeShortByteString :: ShortByteString -> [Char]
-utf8DecodeShortByteString (SBS ba#)
+utf8DecodeByteArray :: ByteArray# -> [Char]
+utf8DecodeByteArray ba#
= unsafeDupablePerformIO $
let len# = sizeofByteArray# ba# in
utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
+utf8DecodeShortByteString :: ShortByteString -> [Char]
+utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba#
+
countUTF8Chars :: ShortByteString -> IO Int
countUTF8Chars (SBS ba) = go 0# 0#
where
=====================================
testsuite/tests/parser/should_run/CountParserDeps.stdout
=====================================
@@ -1,4 +1,4 @@
-Found 235 parser module dependencies
+Found 236 parser module dependencies
GHC.Builtin.Names
GHC.Builtin.PrimOps
GHC.Builtin.Types
@@ -62,6 +62,7 @@ GHC.Core.Utils
GHC.CoreToIface
GHC.Data.Bag
GHC.Data.BooleanFormula
+GHC.Data.ByteArray
GHC.Data.EnumSet
GHC.Data.FastMutInt
GHC.Data.FastString
=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 25fa8fde84701c010fa466c2648f8f6d10265e8f
+Subproject commit 81dcb5545c88a6113777c6d87cd687278356d3a3
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/918211013cced851784c1f5ec5c68f97317f0f13...0f4327d893252acf6a7e7d4914a8168b2f16944e
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/918211013cced851784c1f5ec5c68f97317f0f13...0f4327d893252acf6a7e7d4914a8168b2f16944e
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201124/cbb50f05/attachment-0001.html>
More information about the ghc-commits
mailing list