[Git][ghc/ghc][wip/no-fptr] 7 commits: GHC.Data.ByteArray: Initial commit

Ben Gamari gitlab at gitlab.haskell.org
Wed Nov 25 02:06:39 UTC 2020



Ben Gamari pushed to branch wip/no-fptr at Glasgow Haskell Compiler / GHC


Commits:
ece52044 by Ben Gamari at 2020-11-24T21:06:18-05:00
GHC.Data.ByteArray: Initial commit

- - - - -
292bd5b4 by Ben Gamari at 2020-11-24T21:06:22-05:00
StringBuffer: Rid it of ForeignPtrs

Bumps haddock submodule.

- - - - -
8b53778c by Ben Gamari at 2020-11-24T21:06:22-05:00
GHC.Utils.Binary: Eliminate allocating withForeignPtr uses

- - - - -
a7f9997f by Ben Gamari at 2020-11-24T21:06:22-05:00
base: Eliminate allocating withForeignPtrs from GHC.Event.Array

- - - - -
389717fc by Ben Gamari at 2020-11-24T21:06:22-05:00
base: Use unsafeWithForeignPtr in GHC.IO.Buffer

- - - - -
240498b1 by Ben Gamari at 2020-11-24T21:06:22-05:00
GHC.Event.Array: Use unsafeWithForeignPtr

- - - - -
0f4327d8 by Ben Gamari at 2020-11-24T21:06:22-05:00
Bump bytestring submodule

Teach it to use unsafeWithForeignPtr where appropriate.

- - - - -


10 changed files:

- + compiler/GHC/Data/ByteArray.hs
- compiler/GHC/Data/StringBuffer.hs
- compiler/GHC/Utils/Binary.hs
- compiler/ghc.cabal.in
- libraries/base/GHC/Event/Array.hs
- libraries/base/GHC/IO/Buffer.hs
- libraries/bytestring
- libraries/ghc-boot/GHC/Utils/Encoding.hs
- testsuite/tests/parser/should_run/CountParserDeps.stdout
- utils/haddock


Changes:

=====================================
compiler/GHC/Data/ByteArray.hs
=====================================
@@ -0,0 +1,280 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE MagicHash #-}
+
+module GHC.Data.ByteArray
+  ( -- * Immutable byte arrays
+    ByteArray
+  , getByteArray
+  , unsafeByteArrayContents
+  , withByteArrayContents
+  , sizeofByteArray
+
+    -- * Mutable byte arrays
+  , MutableByteArray
+  , getMutableByteArray
+  , unsafeMutableByteArrayContents
+  , newMutableByteArray
+  , newPinnedMutableByteArray
+  , copyByteArray
+  , copyAddrToMutableByteArray
+  , unsafeFreezeByteArray
+
+    -- * Writing
+  , writeWordArray
+  , writeWord8Array
+  , writeWord16Array
+  , writeWord32Array
+  , writeWord64Array
+  , writeIntArray
+  , writeInt8Array
+  , writeInt16Array
+  , writeInt32Array
+  , writeInt64Array
+  , writeCharArray
+
+    -- * Reading
+  , readWordArray
+  , readWord8Array
+  , readWord16Array
+  , readWord32Array
+  , readWord64Array
+  , readIntArray
+  , readInt8Array
+  , readInt16Array
+  , readInt32Array
+  , readInt64Array
+  , readCharArray
+
+    -- * Immutable indexing
+  , indexWordArray
+  , indexWord8Array
+  , indexWord16Array
+  , indexWord32Array
+  , indexWord64Array
+  , indexIntArray
+  , indexInt8Array
+  , indexInt16Array
+  , indexInt32Array
+  , indexInt64Array
+  , indexCharArray
+  ) where
+
+import GHC.Base
+import GHC.Exts
+import GHC.Word
+import GHC.Int
+import Unsafe.Coerce
+
+data MutableByteArray = MutableByteArray { getMutableByteArray :: !(MutableByteArray# RealWorld) }
+
+data ByteArray = ByteArray { getByteArray :: !ByteArray# }
+
+unsafeByteArrayContents :: ByteArray -> Ptr a
+unsafeByteArrayContents (ByteArray ba) = Ptr (byteArrayContents# ba)
+
+unsafeMutableByteArrayContents :: MutableByteArray -> Ptr a
+unsafeMutableByteArrayContents = unsafeByteArrayContents . unsafeCoerce
+
+withByteArrayContents :: ByteArray -> (Ptr a -> IO b) -> IO b
+withByteArrayContents (ByteArray ba) f = do
+  r <- f $ Ptr (byteArrayContents# ba)
+  IO $ \s -> case touch# ba s of s' -> (# s', () #)
+  return r
+
+newMutableByteArray :: Int -> IO MutableByteArray
+newMutableByteArray (I# size) = IO $ \s ->
+  case newByteArray# size s of
+    (# s', mba #) -> (# s', MutableByteArray mba #)
+
+newPinnedMutableByteArray :: Int -> IO MutableByteArray
+newPinnedMutableByteArray (I# size) = IO $ \s ->
+  case newPinnedByteArray# size s of
+    (# s', mba #) -> (# s', MutableByteArray mba #)
+
+copyByteArray
+  :: ByteArray          -- ^ source
+  -> Int                -- ^ source offset
+  -> MutableByteArray   -- ^ destination
+  -> Int                -- ^ destination offset
+  -> Int                -- ^ length to copy
+  -> IO ()
+copyByteArray (ByteArray src) (I# src_ofs) (MutableByteArray dst) (I# dst_ofs) (I# len) =
+  IO $ \s ->
+    case copyByteArray# src src_ofs dst dst_ofs len s of
+      s' -> (# s', () #)
+
+copyAddrToMutableByteArray :: Ptr a -> MutableByteArray -> Int -> Int -> IO ()
+copyAddrToMutableByteArray (Ptr src) (MutableByteArray dst) (I# dst_ofs) (I# len) = IO $ \s ->
+  case copyAddrToByteArray# src dst dst_ofs len s of
+    s' -> (# s', () #)
+
+unsafeFreezeByteArray
+  :: MutableByteArray
+  -> IO ByteArray
+unsafeFreezeByteArray (MutableByteArray mba) = IO $ \s ->
+  case unsafeFreezeByteArray# mba s of
+    (# s', ba #) -> (# s', ByteArray ba #)
+
+sizeofByteArray :: ByteArray -> Int
+sizeofByteArray (ByteArray arr) = I# (sizeofByteArray# arr)
+
+
+readWordArray :: MutableByteArray -> Int -> IO Word
+readWordArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readWordArray# arr ix s0 of
+      (# s1, r #) -> (# s1, W# r #)
+
+readWord8Array :: MutableByteArray -> Int -> IO Word8
+readWord8Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readWord8Array# arr ix s0 of
+      (# s1, r #) -> (# s1, W8# r #)
+
+readWord16Array :: MutableByteArray -> Int -> IO Word16
+readWord16Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readWord16Array# arr ix s0 of
+      (# s1, r #) -> (# s1, W16# r #)
+
+readWord32Array :: MutableByteArray -> Int -> IO Word32
+readWord32Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readWord32Array# arr ix s0 of
+      (# s1, r #) -> (# s1, W32# r #)
+
+readWord64Array :: MutableByteArray -> Int -> IO Word64
+readWord64Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readWord64Array# arr ix s0 of
+      (# s1, r #) -> (# s1, W64# r #)
+
+readIntArray :: MutableByteArray -> Int -> IO Int
+readIntArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readIntArray# arr ix s0 of
+      (# s1, r #) -> (# s1, I# r #)
+
+readInt8Array :: MutableByteArray -> Int -> IO Int8
+readInt8Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readInt8Array# arr ix s0 of
+      (# s1, r #) -> (# s1, I8# r #)
+
+readInt16Array :: MutableByteArray -> Int -> IO Int16
+readInt16Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readInt16Array# arr ix s0 of
+      (# s1, r #) -> (# s1, I16# r #)
+
+readInt32Array :: MutableByteArray -> Int -> IO Int32
+readInt32Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readInt32Array# arr ix s0 of
+      (# s1, r #) -> (# s1, I32# r #)
+
+readInt64Array :: MutableByteArray -> Int -> IO Int64
+readInt64Array (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readInt64Array# arr ix s0 of
+      (# s1, r #) -> (# s1, I64# r #)
+
+readCharArray :: MutableByteArray -> Int -> IO Char
+readCharArray (MutableByteArray arr) (I# ix) = IO $ \s0 ->
+    case readCharArray# arr ix s0 of
+      (# s1, r #) -> (# s1, C# r #)
+
+
+
+writeWordArray :: MutableByteArray -> Int -> Word -> IO ()
+writeWordArray (MutableByteArray arr) (I# ix) (W# x) = IO $ \s0 ->
+    case writeWordArray# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeWord8Array :: MutableByteArray -> Int -> Word8 -> IO ()
+writeWord8Array (MutableByteArray arr) (I# ix) (W8# x) = IO $ \s0 ->
+    case writeWord8Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeWord16Array :: MutableByteArray -> Int -> Word16 -> IO ()
+writeWord16Array (MutableByteArray arr) (I# ix) (W16# x) = IO $ \s0 ->
+    case writeWord16Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeWord32Array :: MutableByteArray -> Int -> Word32 -> IO ()
+writeWord32Array (MutableByteArray arr) (I# ix) (W32# x) = IO $ \s0 ->
+    case writeWord32Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeWord64Array :: MutableByteArray -> Int -> Word64 -> IO ()
+writeWord64Array (MutableByteArray arr) (I# ix) (W64# x) = IO $ \s0 ->
+    case writeWord64Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeIntArray :: MutableByteArray -> Int -> Int -> IO ()
+writeIntArray (MutableByteArray arr) (I# ix) (I# x) = IO $ \s0 ->
+    case writeIntArray# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeInt8Array :: MutableByteArray -> Int -> Int8 -> IO ()
+writeInt8Array (MutableByteArray arr) (I# ix) (I8# x) = IO $ \s0 ->
+    case writeInt8Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeInt16Array :: MutableByteArray -> Int -> Int16 -> IO ()
+writeInt16Array (MutableByteArray arr) (I# ix) (I16# x) = IO $ \s0 ->
+    case writeInt16Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeInt32Array :: MutableByteArray -> Int -> Int32 -> IO ()
+writeInt32Array (MutableByteArray arr) (I# ix) (I32# x) = IO $ \s0 ->
+    case writeInt32Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeInt64Array :: MutableByteArray -> Int -> Int64 -> IO ()
+writeInt64Array (MutableByteArray arr) (I# ix) (I64# x) = IO $ \s0 ->
+    case writeInt64Array# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+writeCharArray :: MutableByteArray -> Int -> Char -> IO ()
+writeCharArray (MutableByteArray arr) (I# ix) (C# x) = IO $ \s0 ->
+    case writeCharArray# arr ix x s0 of
+      s1 -> (# s1, () #)
+
+
+
+indexWordArray :: ByteArray -> Int -> Word
+indexWordArray (ByteArray arr) (I# ix) =
+    W# (indexWordArray# arr ix)
+
+indexWord8Array :: ByteArray -> Int -> Word8
+indexWord8Array (ByteArray arr) (I# ix) =
+    W8# (indexWord8Array# arr ix)
+
+indexWord16Array :: ByteArray -> Int -> Word16
+indexWord16Array (ByteArray arr) (I# ix) =
+    W16# (indexWord16Array# arr ix)
+
+indexWord32Array :: ByteArray -> Int -> Word32
+indexWord32Array (ByteArray arr) (I# ix) =
+    W32# (indexWord32Array# arr ix)
+
+indexWord64Array :: ByteArray -> Int -> Word64
+indexWord64Array (ByteArray arr) (I# ix) =
+    W64# (indexWord64Array# arr ix)
+
+indexIntArray :: ByteArray -> Int -> Int
+indexIntArray (ByteArray arr) (I# ix) =
+    I# (indexIntArray# arr ix)
+
+indexInt8Array :: ByteArray -> Int -> Int8
+indexInt8Array (ByteArray arr) (I# ix) =
+    I8# (indexInt8Array# arr ix)
+
+indexInt16Array :: ByteArray -> Int -> Int16
+indexInt16Array (ByteArray arr) (I# ix) =
+    I16# (indexInt16Array# arr ix)
+
+indexInt32Array :: ByteArray -> Int -> Int32
+indexInt32Array (ByteArray arr) (I# ix) =
+    I32# (indexInt32Array# arr ix)
+
+indexInt64Array :: ByteArray -> Int -> Int64
+indexInt64Array (ByteArray arr) (I# ix) =
+    I64# (indexInt64Array# arr ix)
+
+indexCharArray :: ByteArray -> Int -> Char
+indexCharArray (ByteArray arr) (I# ix) =
+    C# (indexCharArray# arr ix)
+


=====================================
compiler/GHC/Data/StringBuffer.hs
=====================================
@@ -17,7 +17,7 @@ Buffers for scanning string input stored in external arrays.
 
 module GHC.Data.StringBuffer
        (
-        StringBuffer(..),
+        StringBuffer, len, cur,
         -- non-abstract for vs\/HaskellService
 
          -- * Creation\/destruction
@@ -26,6 +26,7 @@ module GHC.Data.StringBuffer
         hPutStringBuffer,
         appendStringBuffers,
         stringToStringBuffer,
+        byteStringToStringBuffer,
 
         -- * Inspection
         nextChar,
@@ -54,10 +55,13 @@ import GHC.Prelude
 
 import GHC.Utils.Encoding
 import GHC.Data.FastString
+import GHC.Data.ByteArray
 import GHC.Utils.IO.Unsafe
 import GHC.Utils.Panic.Plain
 import GHC.Utils.Misc
 
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
 import Data.Maybe
 import Control.Exception
 import System.IO
@@ -65,6 +69,7 @@ import System.IO.Unsafe         ( unsafePerformIO )
 import GHC.IO.Encoding.UTF8     ( mkUTF8 )
 import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )
 
+import GHC.Word
 import GHC.Exts
 
 import Foreign
@@ -81,7 +86,7 @@ import Foreign
 --
 data StringBuffer
  = StringBuffer {
-     buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+     buf :: {-# UNPACK #-} !ByteArray,
      len :: {-# UNPACK #-} !Int,        -- length
      cur :: {-# UNPACK #-} !Int         -- current pos
   }
@@ -102,34 +107,35 @@ instance Show StringBuffer where
 -- managed by the garbage collector.
 hGetStringBuffer :: FilePath -> IO StringBuffer
 hGetStringBuffer fname = do
-   h <- openBinaryFile fname ReadMode
-   size_i <- hFileSize h
-   offset_i <- skipBOM h size_i 0  -- offset is 0 initially
-   let size = fromIntegral $ size_i - offset_i
-   buf <- mallocForeignPtrArray (size+3)
-   withForeignPtr buf $ \ptr -> do
-     r <- if size == 0 then return 0 else hGetBuf h ptr size
-     hClose h
-     if (r /= size)
-        then ioError (userError "short read of file")
-        else newUTF8StringBuffer buf ptr size
+  h <- openBinaryFile fname ReadMode
+  size_i <- hFileSize h
+  offset_i <- skipBOM h size_i 0  -- offset is 0 initially
+  let size = fromIntegral $ size_i - offset_i
+  buf <- newPinnedMutableByteArray (size+3)
+  r <- if size == 0
+          then return 0
+          else hGetBuf h (unsafeMutableByteArrayContents buf) size
+  hClose h
+  if r /= size
+    then ioError (userError "short read of file")
+    else newUTF8StringBuffer buf size
 
 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
-hGetStringBufferBlock handle wanted
-    = do size_i <- hFileSize handle
-         offset_i <- hTell handle >>= skipBOM handle size_i
-         let size = min wanted (fromIntegral $ size_i-offset_i)
-         buf <- mallocForeignPtrArray (size+3)
-         withForeignPtr buf $ \ptr ->
-             do r <- if size == 0 then return 0 else hGetBuf handle ptr size
-                if r /= size
-                   then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
-                   else newUTF8StringBuffer buf ptr size
+hGetStringBufferBlock handle wanted = do
+  size_i <- hFileSize handle
+  offset_i <- hTell handle >>= skipBOM handle size_i
+  let size = min wanted (fromIntegral $ size_i-offset_i)
+  buf <- newPinnedMutableByteArray (size+3)
+  r <- if size == 0
+          then return 0
+          else hGetBuf handle (unsafeMutableByteArrayContents buf) size
+  if r /= size
+    then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+    else newUTF8StringBuffer buf size
 
 hPutStringBuffer :: Handle -> StringBuffer -> IO ()
-hPutStringBuffer hdl (StringBuffer buf len cur)
-    = withForeignPtr (plusForeignPtr buf cur) $ \ptr ->
-          hPutBuf hdl ptr len
+hPutStringBuffer hdl (StringBuffer buf len cur) = do
+  withByteArrayContents buf $ \ptr -> hPutBuf hdl (ptr `plusPtr` cur) len
 
 -- | Skip the byte-order mark if there is one (see #1744 and #6016),
 -- and return the new position of the handle in bytes.
@@ -156,39 +162,45 @@ skipBOM h size offset =
   where
     safeEncoding = mkUTF8 IgnoreCodingFailure
 
-newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
-newUTF8StringBuffer buf ptr size = do
-  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+-- | @newUTF8StringBuffer buf size@ creates a 'StringBuffer' from a
+-- 'MutableByteArray' of length @size+3@ containing UTF-8 encoded text. A three
+-- byte sentinel will be added to the end of the buffer.
+newUTF8StringBuffer :: MutableByteArray -> Int -> IO StringBuffer
+newUTF8StringBuffer buf size = do
   -- sentinels for UTF-8 decoding
-  return $ StringBuffer buf size 0
+  writeWord8Array buf (size+0) 0
+  writeWord8Array buf (size+1) 0
+  writeWord8Array buf (size+3) 0
+  buf' <- unsafeFreezeByteArray buf
+  return $ StringBuffer buf' size 0
 
 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
-appendStringBuffers sb1 sb2
-    = do newBuf <- mallocForeignPtrArray (size+3)
-         withForeignPtr newBuf $ \ptr ->
-          withForeignPtr (buf sb1) $ \sb1Ptr ->
-           withForeignPtr (buf sb2) $ \sb2Ptr ->
-             do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
-                copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
-                pokeArray (ptr `advancePtr` size) [0,0,0]
-                return (StringBuffer newBuf size 0)
-    where sb1_len = calcLen sb1
-          sb2_len = calcLen sb2
-          calcLen sb = len sb - cur sb
-          size =  sb1_len + sb2_len
+appendStringBuffers sb1 sb2 = do
+  dst <- newPinnedMutableByteArray (size+3)
+  copyByteArray (buf sb1) (cur sb1) dst 0 sb1_len
+  copyByteArray (buf sb2) (cur sb2) dst sb1_len sb2_len
+  newUTF8StringBuffer dst size
+  where
+    sb1_len = calcLen sb1
+    sb2_len = calcLen sb2
+    calcLen sb = len sb - cur sb
+    size =  sb1_len + sb2_len
+
+byteStringToStringBuffer :: BS.ByteString -> StringBuffer
+byteStringToStringBuffer bs = unsafePerformIO $ do
+  let size = BS.length bs
+  buf <- newPinnedMutableByteArray (size+3)
+  BS.unsafeUseAsCString bs (\p -> copyAddrToMutableByteArray p buf 0 size)
+  newUTF8StringBuffer buf size
 
 -- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer
 -- is automatically managed by the garbage collector.
 stringToStringBuffer :: String -> StringBuffer
-stringToStringBuffer str =
- unsafePerformIO $ do
+stringToStringBuffer str = unsafePerformIO $ do
   let size = utf8EncodedLength str
-  buf <- mallocForeignPtrArray (size+3)
-  withForeignPtr buf $ \ptr -> do
-    utf8EncodeString ptr str
-    pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-    -- sentinels for UTF-8 decoding
-  return (StringBuffer buf size 0)
+  buf <- newPinnedMutableByteArray (size+3)
+  utf8EncodeString (unsafeMutableByteArrayContents buf) str
+  newUTF8StringBuffer buf size
 
 -- -----------------------------------------------------------------------------
 -- Grab a character
@@ -202,12 +214,10 @@ stringToStringBuffer str =
 nextChar :: StringBuffer -> (Char,StringBuffer)
 nextChar (StringBuffer buf len (I# cur#)) =
   -- Getting our fingers dirty a little here, but this is performance-critical
-  inlinePerformIO $
-    withForeignPtr buf $ \(Ptr a#) ->
-        case utf8DecodeCharAddr# (a# `plusAddr#` cur#) 0# of
-          (# c#, nBytes# #) ->
-             let cur' = I# (cur# +# nBytes#) in
-             return (C# c#, StringBuffer buf len cur')
+    case utf8DecodeCharByteArray# (getByteArray buf) cur# of
+      (# c#, nBytes# #) ->
+        let cur' = I# (cur# +# nBytes#)
+        in (C# c#, StringBuffer buf len cur')
 
 -- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
 -- to 'Data.List.head').  __Warning:__ The behavior is undefined if the
@@ -219,10 +229,9 @@ currentChar = fst . nextChar
 prevChar :: StringBuffer -> Char -> Char
 prevChar (StringBuffer _   _   0)   deflt = deflt
 prevChar (StringBuffer buf _   cur) _     =
-  inlinePerformIO $
-    withForeignPtr buf $ \p -> do
-      p' <- utf8PrevChar (p `plusPtr` cur)
-      return (fst (utf8DecodeChar p'))
+    let !(I# p') = utf8PrevChar (getByteArray buf) cur
+        !(# c, _ #) = utf8DecodeCharByteArray# (getByteArray buf) p'
+    in C# c
 
 -- -----------------------------------------------------------------------------
 -- Moving
@@ -257,18 +266,18 @@ atEnd (StringBuffer _ l c) = l == c
 -- wanted line. Lines begin at 1.
 atLine :: Int -> StringBuffer -> Maybe StringBuffer
 atLine line sb@(StringBuffer buf len _) =
-  inlinePerformIO $
-    withForeignPtr buf $ \p -> do
-      p' <- skipToLine line len p
-      if p' == nullPtr
-        then return Nothing
-        else
-          let
-            delta = p' `minusPtr` p
-          in return $ Just (sb { cur = delta
-                               , len = len - delta
-                               })
-
+  inlinePerformIO $ withByteArrayContents buf $ \p -> do
+    p' <- skipToLine line len p
+    if p' == nullPtr
+      then return Nothing
+      else
+        let !delta = p' `minusPtr` p
+        in return $ Just (sb { cur = delta
+                             , len = len - delta
+                             })
+
+-- | @skipToLine line len op0@ finds the byte offset to the beginning of
+-- the given line number.
 skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
 skipToLine !line !len !op0 = go 1 op0
   where
@@ -300,39 +309,42 @@ lexemeToString :: StringBuffer
                -> Int                   -- ^ @n@, the number of bytes
                -> String
 lexemeToString _ 0 = ""
-lexemeToString (StringBuffer buf _ cur) bytes =
-  utf8DecodeStringLazy buf cur bytes
+lexemeToString (StringBuffer buf _ (I# cur#)) (I# bytes#) =
+  utf8DecodeByteArrayLazy# (getByteArray buf) cur# bytes#
 
 lexemeToFastString :: StringBuffer
                    -> Int               -- ^ @n@, the number of bytes
                    -> FastString
 lexemeToFastString _ 0 = nilFS
 lexemeToFastString (StringBuffer buf _ cur) len =
-   inlinePerformIO $
-     withForeignPtr buf $ \ptr ->
-       return $! mkFastStringBytes (ptr `plusPtr` cur) len
+  inlinePerformIO $
+    withByteArrayContents buf $ \ptr ->
+      return $! mkFastStringBytes (ptr `plusPtr` cur) len
 
 -- | Return the previous @n@ characters (or fewer if we are less than @n@
 -- characters into the buffer.
 decodePrevNChars :: Int -> StringBuffer -> String
-decodePrevNChars n (StringBuffer buf _ cur) =
-    inlinePerformIO $ withForeignPtr buf $ \p0 ->
-      go p0 n "" (p0 `plusPtr` (cur - 1))
+decodePrevNChars n (StringBuffer buf0 _ cur) =
+    go (getByteArray buf0) (min n (cur - 1)) "" (cur - 1)
   where
-    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
-    go buf0 n acc p | n == 0 || buf0 >= p = return acc
-    go buf0 n acc p = do
-        p' <- utf8PrevChar p
-        let (c,_) = utf8DecodeChar p'
-        go buf0 (n - 1) (c:acc) p'
+    go :: ByteArray# -> Int -> String -> Int -> String
+    go buf n acc ofs
+      | n == 0 = acc
+      | otherwise =
+          let !ofs'@(I# ofs'#) = utf8PrevChar buf ofs
+              !(# c,_ #) = utf8DecodeCharByteArray# buf ofs'#
+          in go buf (n - 1) (C# c:acc) ofs'
 
 -- -----------------------------------------------------------------------------
 -- Parsing integer strings in various bases
 parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
-  = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
-    go i x | i == len  = x
-           | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
-               '_'  -> go (i + 1) x    -- skip "_" (#14473)
-               char -> go (i + 1) (x * radix + toInteger (char_to_int char))
-  in go 0 0
+parseUnsignedInteger (StringBuffer buf _ (I# cur)) (I# len) radix char_to_int
+  = go (len +# cur) cur 0
+  where
+    go :: Int# -> Int# -> Integer -> Integer
+    go end i !acc
+      | isTrue# (i ==# end) = acc
+      | otherwise =
+        case utf8DecodeCharByteArray# (getByteArray buf) i of
+          (# '_'#, _ #) -> go end (i +# 1#) acc    -- skip "_" (#14473)
+          (# char, _ #) -> go end (i +# 1#) (acc * radix + toInteger (char_to_int (C# char)))


=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -6,6 +6,7 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE UnboxedTuples #-}
 
 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
 -- We always optimise this, otherwise performance of a non-optimised
@@ -84,6 +85,7 @@ import Data.Array
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Internal as BS
 import qualified Data.ByteString.Unsafe   as BS
+import GHC.ForeignPtr
 import Data.IORef
 import Data.Char                ( ord, chr )
 import Data.Time
@@ -96,7 +98,10 @@ import GHC.Real                 ( Ratio(..) )
 
 type BinArray = ForeignPtr Word8
 
-
+#if !MIN_VERSION_base(4,15,0)
+unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
+unsafeWithForeignPtr = withForeignPtr
+#endif
 
 ---------------------------------------------------------------
 -- BinData
@@ -111,14 +116,14 @@ instance Binary BinData where
   put_ bh (BinData sz dat) = do
     put_ bh sz
     putPrim bh sz $ \dest ->
-      withForeignPtr dat $ \orig ->
+      unsafeWithForeignPtr dat $ \orig ->
         copyBytes dest orig sz
   --
   get bh = do
     sz <- get bh
     dat <- mallocForeignPtrBytes sz
     getPrim bh sz $ \orig ->
-      withForeignPtr dat $ \dest ->
+      unsafeWithForeignPtr dat $ \dest ->
         copyBytes dest orig sz
     return (BinData sz dat)
 
@@ -226,7 +231,7 @@ writeBinMem (BinMem _ ix_r _ arr_r) fn = do
   h <- openBinaryFile fn WriteMode
   arr <- readIORef arr_r
   ix  <- readFastMutInt ix_r
-  withForeignPtr arr $ \p -> hPutBuf h p ix
+  unsafeWithForeignPtr arr $ \p -> hPutBuf h p ix
   hClose h
 
 readBinMem :: FilePath -> IO BinHandle
@@ -236,7 +241,7 @@ readBinMem filename = do
   filesize' <- hFileSize h
   let filesize = fromIntegral filesize'
   arr <- mallocForeignPtrBytes filesize
-  count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
+  count <- unsafeWithForeignPtr arr $ \p -> hGetBuf h p filesize
   when (count /= filesize) $
        error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
   hClose h
@@ -280,7 +285,7 @@ putPrim h@(BinMem _ ix_r sz_r arr_r) size f = do
   when (ix + size > sz) $
     expandBin h (ix + size)
   arr <- readIORef arr_r
-  withForeignPtr arr $ \op -> f (op `plusPtr` ix)
+  unsafeWithForeignPtr arr $ \op -> f (op `plusPtr` ix)
   writeFastMutInt ix_r (ix + size)
 
 -- -- | Similar to putPrim but advances the index by the actual number of
@@ -302,7 +307,10 @@ getPrim (BinMem _ ix_r sz_r arr_r) size f = do
   when (ix + size > sz) $
       ioError (mkIOError eofErrorType "Data.Binary.getPrim" Nothing Nothing)
   arr <- readIORef arr_r
-  w <- withForeignPtr arr $ \op -> f (op `plusPtr` ix)
+  w <- f (unsafeForeignPtrToPtr arr `plusPtr` ix)
+  touchForeignPtr arr
+    -- This is safe WRT #17760 as we we guarantee that the above line doesn't
+    -- diverge
   writeFastMutInt ix_r (ix + size)
   return w
 


=====================================
compiler/ghc.cabal.in
=====================================
@@ -356,6 +356,7 @@ Library
         GHC.Data.Bag
         GHC.Data.Bitmap
         GHC.Data.BooleanFormula
+        GHC.Data.ByteArray
         GHC.Data.EnumSet
         GHC.Data.FastMutInt
         GHC.Data.FastString


=====================================
libraries/base/GHC/Event/Array.hs
=====================================
@@ -33,7 +33,7 @@ import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
 import Foreign.Ptr (Ptr, nullPtr, plusPtr)
 import Foreign.Storable (Storable(..))
 import GHC.Base hiding (empty)
-import GHC.ForeignPtr (mallocPlainForeignPtrBytes, newForeignPtr_)
+import GHC.ForeignPtr (mallocPlainForeignPtrBytes, newForeignPtr_, unsafeWithForeignPtr)
 import GHC.Num (Num(..))
 import GHC.Real (fromIntegral)
 import GHC.Show (show)
@@ -78,9 +78,9 @@ reallocArray p newSize oldSize = reallocHack undefined p
   reallocHack dummy src = do
       let size = sizeOf dummy
       dst <- mallocPlainForeignPtrBytes (newSize * size)
-      withForeignPtr src $ \s ->
+      unsafeWithForeignPtr src $ \s ->
         when (s /= nullPtr && oldSize > 0) .
-          withForeignPtr dst $ \d -> do
+          unsafeWithForeignPtr dst $ \d -> do
             _ <- memcpy d s (fromIntegral (oldSize * size))
             return ()
       return dst
@@ -99,8 +99,8 @@ duplicate a = dupHack undefined a
   dupHack dummy (Array ref) = do
     AC es len cap <- readIORef ref
     ary <- allocArray cap
-    withForeignPtr ary $ \dest ->
-      withForeignPtr es $ \src -> do
+    unsafeWithForeignPtr ary $ \dest ->
+      unsafeWithForeignPtr es $ \src -> do
         _ <- memcpy dest src (fromIntegral (len * sizeOf dummy))
         return ()
     Array `fmap` newIORef (AC ary len cap)
@@ -119,8 +119,8 @@ unsafeRead :: Storable a => Array a -> Int -> IO a
 unsafeRead (Array ref) ix = do
     AC es _ cap <- readIORef ref
     CHECK_BOUNDS("unsafeRead",cap,ix)
-      withForeignPtr es $ \p ->
-        peekElemOff p ix
+      unsafeWithForeignPtr es $ \ptr -> peekElemOff ptr ix
+        -- this is safe WRT #17760 as we assume that peekElemOff doesn't diverge
 
 unsafeWrite :: Storable a => Array a -> Int -> a -> IO ()
 unsafeWrite (Array ref) ix a = do
@@ -130,13 +130,15 @@ unsafeWrite (Array ref) ix a = do
 unsafeWrite' :: Storable a => AC a -> Int -> a -> IO ()
 unsafeWrite' (AC es _ cap) ix a =
     CHECK_BOUNDS("unsafeWrite'",cap,ix)
-      withForeignPtr es $ \p ->
-        pokeElemOff p ix a
+      unsafeWithForeignPtr es $ \ptr -> pokeElemOff ptr ix a
+        -- this is safe WRT #17760 as we assume that peekElemOff doesn't diverge
 
+-- | Precondition: continuation must not diverge due to use of
+-- 'unsafeWithForeignPtr'.
 unsafeLoad :: Array a -> (Ptr a -> Int -> IO Int) -> IO Int
 unsafeLoad (Array ref) load = do
     AC es _ cap <- readIORef ref
-    len' <- withForeignPtr es $ \p -> load p cap
+    len' <- unsafeWithForeignPtr es $ \p -> load p cap
     writeIORef ref (AC es len' cap)
     return len'
 
@@ -146,7 +148,7 @@ unsafeCopyFromBuffer :: Storable a => Array a -> Ptr a -> Int -> IO ()
 unsafeCopyFromBuffer (Array ref) sptr n =
     readIORef ref >>= \(AC es _ cap) ->
     CHECK_BOUNDS("unsafeCopyFromBuffer", cap, n)
-    withForeignPtr es $ \pdest -> do
+    unsafeWithForeignPtr es $ \pdest -> do
       let size = sizeOfPtr sptr undefined
       _ <- memcpy pdest sptr (fromIntegral $ n * size)
       writeIORef ref (AC es n cap)
@@ -198,7 +200,7 @@ forM_ ary g = forHack ary g undefined
       AC es len _ <- readIORef ref
       let size = sizeOf dummy
           offset = len * size
-      withForeignPtr es $ \p -> do
+      unsafeWithForeignPtr es $ \p -> do
         let go n | n >= offset = return ()
                  | otherwise = do
               f =<< peek (p `plusPtr` n)
@@ -269,8 +271,8 @@ copy' d dstart s sstart maxCount = copyHack d s undefined
       then return dac
       else do
         AC dst dlen dcap <- ensureCapacity' dac (dstart + count)
-        withForeignPtr dst $ \dptr ->
-          withForeignPtr src $ \sptr -> do
+        unsafeWithForeignPtr dst $ \dptr ->
+          unsafeWithForeignPtr src $ \sptr -> do
             _ <- memcpy (dptr `plusPtr` (dstart * size))
                         (sptr `plusPtr` (sstart * size))
                         (fromIntegral (count * size))
@@ -286,7 +288,7 @@ removeAt a i = removeHack a undefined
     let size   = sizeOf dummy
         newLen = oldLen - 1
     when (newLen > 0 && i < newLen) .
-      withForeignPtr fp $ \ptr -> do
+      unsafeWithForeignPtr fp $ \ptr -> do
         _ <- memmove (ptr `plusPtr` (size * i))
                      (ptr `plusPtr` (size * (i+1)))
                      (fromIntegral (size * (newLen-i)))


=====================================
libraries/base/GHC/IO/Buffer.hs
=====================================
@@ -73,6 +73,7 @@ import GHC.Show
 import GHC.Real
 import GHC.List
 import GHC.ForeignPtr.Ops
+import GHC.ForeignPtr  (unsafeWithForeignPtr)
 import Foreign.C.Types
 import Foreign.ForeignPtr
 import Foreign.Storable
@@ -118,17 +119,17 @@ type CharBufElem = Char
 type RawCharBuffer = RawBuffer CharBufElem
 
 peekCharBuf :: RawCharBuffer -> Int -> IO Char
-peekCharBuf arr ix = withForeignPtr arr $ \p -> do
+peekCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> do
                         (c,_) <- readCharBufPtr p ix
                         return c
 
 {-# INLINE readCharBuf #-}
 readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int)
-readCharBuf arr ix = withForeignPtr arr $ \p -> readCharBufPtr p ix
+readCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> readCharBufPtr p ix
 
 {-# INLINE writeCharBuf #-}
 writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int
-writeCharBuf arr ix c = withForeignPtr arr $ \p -> writeCharBufPtr p ix c
+writeCharBuf arr ix c = unsafeWithForeignPtr arr $ \p -> writeCharBufPtr p ix c
 
 {-# INLINE readCharBufPtr #-}
 readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int)


=====================================
libraries/bytestring
=====================================
@@ -1 +1 @@
-Subproject commit e043aacfc4202a59ccae8b8c8cf0e1ad83a3f209
+Subproject commit 4cbf0cd6053411139a08ff67c7ec5eae1da87b03


=====================================
libraries/ghc-boot/GHC/Utils/Encoding.hs
=====================================
@@ -17,12 +17,15 @@
 module GHC.Utils.Encoding (
         -- * UTF-8
         utf8DecodeCharAddr#,
+        utf8DecodeCharByteArray#,
         utf8PrevChar,
         utf8CharStart,
         utf8DecodeChar,
         utf8DecodeByteString,
+        utf8DecodeByteArray,
         utf8DecodeShortByteString,
         utf8CompareShortByteString,
+        utf8DecodeByteArrayLazy#,
         utf8DecodeStringLazy,
         utf8EncodeChar,
         utf8EncodeString,
@@ -53,6 +56,7 @@ import Data.ByteString (ByteString)
 import qualified Data.ByteString.Internal as BS
 import Data.ByteString.Short.Internal (ShortByteString(..))
 
+import GHC.Word
 import GHC.Exts
 
 -- -----------------------------------------------------------------------------
@@ -131,15 +135,17 @@ utf8DecodeChar !(Ptr a#) =
 -- the start of the current character is, given any position in a
 -- stream.  This function finds the start of the previous character,
 -- assuming there *is* a previous character.
-utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
-utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+utf8PrevChar :: ByteArray# -> Int -> Int
+utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1)
 
-utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
-utf8CharStart p = go p
- where go p = do w <- peek p
-                 if w >= 0x80 && w < 0xC0
-                        then go (p `plusPtr` (-1))
-                        else return p
+utf8CharStart :: ByteArray# -> Int -> Int
+utf8CharStart = go
+  where
+    go arr ofs@(I# ofs#)
+      | w >= 0x80 && w < 0xC0 = go arr (ofs - 1)
+      | otherwise             = ofs
+      where
+        w = W8# (indexWord8Array# arr ofs#)
 
 {-# INLINE utf8DecodeLazy# #-}
 utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
@@ -158,6 +164,12 @@ utf8DecodeByteString :: ByteString -> [Char]
 utf8DecodeByteString (BS.PS fptr offset len)
   = utf8DecodeStringLazy fptr offset len
 
+utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char]
+utf8DecodeByteArrayLazy# a# offset# len#
+  = unsafeDupablePerformIO $
+      let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#)
+      in utf8DecodeLazy# (return ()) decodeChar len#
+
 utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
 utf8DecodeStringLazy fp offset (I# len#)
   = unsafeDupablePerformIO $ do
@@ -200,12 +212,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0#
                          | isTrue# (b1_1 `ltWord#` b2_1) -> LT
                          | otherwise                     -> go (off1 +# 1#) (off2 +# 1#)
 
-utf8DecodeShortByteString :: ShortByteString -> [Char]
-utf8DecodeShortByteString (SBS ba#)
+utf8DecodeByteArray :: ByteArray# -> [Char]
+utf8DecodeByteArray ba#
   = unsafeDupablePerformIO $
       let len# = sizeofByteArray# ba# in
       utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len#
 
+utf8DecodeShortByteString :: ShortByteString -> [Char]
+utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba#
+
 countUTF8Chars :: ShortByteString -> IO Int
 countUTF8Chars (SBS ba) = go 0# 0#
   where


=====================================
testsuite/tests/parser/should_run/CountParserDeps.stdout
=====================================
@@ -1,4 +1,4 @@
-Found 235 parser module dependencies
+Found 236 parser module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.Types
@@ -62,6 +62,7 @@ GHC.Core.Utils
 GHC.CoreToIface
 GHC.Data.Bag
 GHC.Data.BooleanFormula
+GHC.Data.ByteArray
 GHC.Data.EnumSet
 GHC.Data.FastMutInt
 GHC.Data.FastString


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 25fa8fde84701c010fa466c2648f8f6d10265e8f
+Subproject commit 81dcb5545c88a6113777c6d87cd687278356d3a3



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/918211013cced851784c1f5ec5c68f97317f0f13...0f4327d893252acf6a7e7d4914a8168b2f16944e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/918211013cced851784c1f5ec5c68f97317f0f13...0f4327d893252acf6a7e7d4914a8168b2f16944e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201124/cbb50f05/attachment-0001.html>


More information about the ghc-commits mailing list