[Git][ghc/ghc][wip/unboxed-codebuffer] 2 commits: Replace the implementation of CodeBuffers with unboxed types

Josh Meredith (@JoshMeredith) gitlab at gitlab.haskell.org
Thu Apr 20 17:14:51 UTC 2023



Josh Meredith pushed to branch wip/unboxed-codebuffer at Glasgow Haskell Compiler / GHC


Commits:
6eccf82c by Josh Meredith at 2023-04-20T08:44:11+00:00
Replace the implementation of CodeBuffers with unboxed types

- - - - -
852820f4 by Josh Meredith at 2023-04-20T17:14:19+00:00
Use unboxed codebuffers in base

- - - - -


8 changed files:

- libraries/base/GHC/IO/Encoding.hs
- libraries/base/GHC/IO/Encoding/Failure.hs
- libraries/base/GHC/IO/Encoding/Iconv.hs
- libraries/base/GHC/IO/Encoding/Latin1.hs
- libraries/base/GHC/IO/Encoding/Types.hs
- libraries/base/GHC/IO/Encoding/UTF16.hs
- libraries/base/GHC/IO/Encoding/UTF32.hs
- libraries/base/GHC/IO/Encoding/UTF8.hs


Changes:

=====================================
libraries/base/GHC/IO/Encoding.hs
=====================================
@@ -1,5 +1,6 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE CPP, NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
 -----------------------------------------------------------------------------
@@ -336,11 +337,13 @@ mkTextEncoding' cfm enc =
 
 
 latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
-latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8
+latin1_encode input output = IO $ \st -> case Latin1.latin1_encode input output st of
+  (# st', _why, input', output' #) -> (# st', (input', output') #) -- unchecked, used for char8
 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
 
 latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
-latin1_decode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_decode input output
+latin1_decode input output = IO $ \st -> case Latin1.latin1_decode input output st of
+  (# st', _why, input', output' #) -> (# st', (input',output') #)
 --latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode
 
 unknownEncodingErr :: String -> IO a


=====================================
libraries/base/GHC/IO/Encoding/Failure.hs
=====================================
@@ -1,5 +1,7 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE MagicHash #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -18,7 +20,8 @@
 module GHC.IO.Encoding.Failure (
     CodingFailureMode(..), codingFailureModeSuffix,
     isSurrogate,
-    recoverDecode, recoverEncode
+    recoverDecode, recoverEncode,
+    recoverDecode#, recoverEncode#,
   ) where
 
 import GHC.IO
@@ -142,6 +145,12 @@ unescapeRoundtripCharacterSurrogate c
     | otherwise                 = Nothing
   where x = ord c
 
+recoverDecode# :: CodingFailureMode -> Buffer Word8 -> Buffer Char
+               -> State# RealWorld -> (# State# RealWorld, Buffer Word8, Buffer Char #)
+recoverDecode# cfm input output st =
+  let !(# st', (bIn, bOut) #) = unIO (recoverDecode cfm input output) st
+  in (# st', bIn, bOut #)
+
 recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
               -> IO (Buffer Word8, Buffer Char)
 recoverDecode cfm input at Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }
@@ -160,6 +169,12 @@ recoverDecode cfm input at Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }
       ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b)
       return (input { bufL=ir+1 }, output { bufR=ow' })
 
+recoverEncode# :: CodingFailureMode -> Buffer Char -> Buffer Word8
+               -> State# RealWorld -> (# State# RealWorld, Buffer Char, Buffer Word8 #)
+recoverEncode# cfm input output st =
+  let !(# st', (bIn, bOut) #) = unIO (recoverEncode cfm input output) st
+  in (# st', bIn, bOut #)
+
 recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
               -> IO (Buffer Char, Buffer Word8)
 recoverEncode cfm input at Buffer{  bufRaw=iraw, bufL=ir, bufR=_  }


=====================================
libraries/base/GHC/IO/Encoding/Iconv.hs
=====================================
@@ -2,6 +2,8 @@
 {-# LANGUAGE CPP
            , NoImplicitPrelude
            , NondecreasingIndentation
+           , UnboxedTuples
+           , MagicHash
   #-}
 {-# OPTIONS_HADDOCK not-home #-}
 
@@ -133,19 +135,24 @@ newIConv from to rec fn =
   withCAString to   $ \ to_str -> do
     iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
     let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
-    return BufferCodec{
-                encode = fn iconvt,
-                recover = rec,
-                close  = iclose,
+        fn_iconvt ibuf obuf st = case unIO (fn iconvt ibuf obuf) st of
+          (# st', (prog, ibuf', obuf') #) -> (# st', prog, ibuf', obuf' #)
+    return BufferCodec# {
+                encode#   = fn_iconvt,
+                recover#  = rec#,
+                close#    = iclose,
                 -- iconv doesn't supply a way to save/restore the state
-                getState = return (),
-                setState = const $ return ()
+                getState# = return (),
+                setState# = const $ return ()
                 }
+  where
+    rec# ibuf obuf st = case unIO (rec ibuf obuf) st of
+      (# st', (ibuf', obuf') #) -> (# st', ibuf', obuf' #)
 
-iconvDecode :: IConv -> DecodeBuffer
+iconvDecode :: IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char)
 iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
 
-iconvEncode :: IConv -> EncodeBuffer
+iconvEncode :: IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8)
 iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
 
 iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int


=====================================
libraries/base/GHC/IO/Encoding/Latin1.hs
=====================================
@@ -2,6 +2,8 @@
 {-# LANGUAGE NoImplicitPrelude
            , BangPatterns
            , NondecreasingIndentation
+           , UnboxedTuples
+           , MagicHash
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -56,22 +58,22 @@ mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1",
 
 latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
 latin1_DF cfm =
-  return (BufferCodec {
-             encode   = latin1_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
 latin1_EF cfm =
-  return (BufferCodec {
-             encode   = latin1_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 latin1_checked :: TextEncoding
@@ -85,12 +87,12 @@ mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1",
 
 latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
 latin1_checked_EF cfm =
-  return (BufferCodec {
-             encode   = latin1_checked_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = latin1_checked_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 -- -----------------------------------------------------------------------------
@@ -108,22 +110,22 @@ mkAscii cfm = TextEncoding { textEncodingName = "ASCII",
 
 ascii_DF :: CodingFailureMode -> IO (TextDecoder ())
 ascii_DF cfm =
-  return (BufferCodec {
-             encode   = ascii_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = ascii_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 ascii_EF :: CodingFailureMode -> IO (TextEncoder ())
 ascii_EF cfm =
-  return (BufferCodec {
-             encode   = ascii_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = ascii_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
@@ -134,97 +136,115 @@ ascii_EF cfm =
 -- TODO: Eliminate code duplication between the checked and unchecked
 -- versions of the decoder or encoder (but don't change the Core!)
 
-latin1_decode :: DecodeBuffer
+latin1_decode :: DecodeBuffer#
 latin1_decode 
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let 
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-              loop (ir+1) ow'
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+                  !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+              loop (ir+1) ow' st2
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-ascii_decode :: DecodeBuffer
+ascii_decode :: DecodeBuffer#
 ascii_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              if c0 > 0x7f then invalid else do
-              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-              loop (ir+1) ow'
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+              if c0 > 0x7f then invalid st1 else do
+                let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+                loop (ir+1) ow' st2
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' = 
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-latin1_encode :: EncodeBuffer
+latin1_encode :: EncodeBuffer#
 latin1_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' = 
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           writeWord8Buf oraw ow (fromIntegral (ord c))
-           loop ir' (ow+1)
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+               !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+           loop ir' (ow+1) st2
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-latin1_checked_encode :: EncodeBuffer
+latin1_checked_encode :: EncodeBuffer#
 latin1_checked_encode input output
  = single_byte_checked_encode 0xff input output
 
-ascii_encode :: EncodeBuffer
+ascii_encode :: EncodeBuffer#
 ascii_encode input output
  = single_byte_checked_encode 0x7f input output
 
-single_byte_checked_encode :: Int -> EncodeBuffer
+single_byte_checked_encode :: Int -> EncodeBuffer#
 single_byte_checked_encode max_legal_char
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' = 
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if ord c > max_legal_char then invalid else do
-           writeWord8Buf oraw ow (fromIntegral (ord c))
-           loop ir' (ow+1)
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if ord c > max_legal_char then invalid st1 else do
+             let (# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+             loop ir' (ow+1) st2
         where
-           invalid = done InvalidSequence ir ow
+          invalid :: EncodingBuffer#
+          invalid st' = done InvalidSequence ir ow st'
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 {-# INLINE single_byte_checked_encode #-}


=====================================
libraries/base/GHC/IO/Encoding/Types.hs
=====================================
@@ -1,6 +1,9 @@
 {-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE NoImplicitPrelude, ExistentialQuantification #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
+{-# LANGUAGE UnboxedTuples, MagicHash #-}
 
 -----------------------------------------------------------------------------
 -- |
@@ -17,11 +20,13 @@
 -----------------------------------------------------------------------------
 
 module GHC.IO.Encoding.Types (
-    BufferCodec(..),
+    BufferCodec(.., BufferCodec, encode, recover, close, getState, setState),
     TextEncoding(..),
     TextEncoder, TextDecoder,
     CodeBuffer, EncodeBuffer, DecodeBuffer,
-    CodingProgress(..)
+    CodingProgress(..),
+    DecodeBuffer#, EncodeBuffer#,
+    DecodingBuffer#, EncodingBuffer#
   ) where
 
 import GHC.Base
@@ -33,8 +38,8 @@ import GHC.IO.Buffer
 -- -----------------------------------------------------------------------------
 -- Text encoders/decoders
 
-data BufferCodec from to state = BufferCodec {
-  encode :: CodeBuffer from to,
+data BufferCodec from to state = BufferCodec# {
+  encode# :: CodeBuffer# from to,
    -- ^ The @encode@ function translates elements of the buffer @from@
    -- to the buffer @to at .  It should translate as many elements as possible
    -- given the sizes of the buffers, including translating zero elements
@@ -50,7 +55,7 @@ data BufferCodec from to state = BufferCodec {
    -- library in order to report translation errors at the point they
    -- actually occur, rather than when the buffer is translated.
   
-  recover :: Buffer from -> Buffer to -> IO (Buffer from, Buffer to),
+  recover# :: Buffer from -> Buffer to -> State# RealWorld -> (# State# RealWorld, Buffer from, Buffer to #),
    -- ^ The @recover@ function is used to continue decoding
    -- in the presence of invalid or unrepresentable sequences. This includes
    -- both those detected by @encode@ returning @InvalidSequence@ and those
@@ -69,12 +74,12 @@ data BufferCodec from to state = BufferCodec {
    --
    -- @since 4.4.0.0
   
-  close  :: IO (),
+  close# :: IO (),
    -- ^ Resources associated with the encoding may now be released.
    -- The @encode@ function may not be called again after calling
    -- @close at .
 
-  getState :: IO state,
+  getState# :: IO state,
    -- ^ Return the current state of the codec.
    --
    -- Many codecs are not stateful, and in these case the state can be
@@ -87,14 +92,22 @@ data BufferCodec from to state = BufferCodec {
    -- beginning), and if not, whether to use the big or little-endian
    -- encoding.
 
-  setState :: state -> IO ()
+  setState# :: state -> IO ()
    -- restore the state of the codec using the state from a previous
    -- call to 'getState'.
  }
 
-type CodeBuffer from to = Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to)
-type DecodeBuffer = CodeBuffer Word8 Char
-type EncodeBuffer = CodeBuffer Char Word8
+type CodeBuffer      from to = Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to)
+type DecodeBuffer            = CodeBuffer Word8 Char
+type EncodeBuffer            = CodeBuffer Char Word8
+
+type CodeBuffer#     from to = Buffer from -> Buffer to -> State# RealWorld -> (# State# RealWorld, CodingProgress, Buffer from, Buffer to #)
+type DecodeBuffer#           = CodeBuffer# Word8 Char
+type EncodeBuffer#           = CodeBuffer# Char  Word8
+
+type CodingBuffer#   from to = State# RealWorld -> (# State# RealWorld, CodingProgress, Buffer from, Buffer to #)
+type DecodingBuffer#         = CodingBuffer# Word8 Char
+type EncodingBuffer#         = CodingBuffer# Char  Word8
 
 type TextDecoder state = BufferCodec Word8 CharBufElem state
 type TextEncoder state = BufferCodec CharBufElem Word8 state
@@ -132,3 +145,29 @@ data CodingProgress = InputUnderflow  -- ^ Stopped because the input contains in
                              , Show -- ^ @since 4.4.0.0
                              )
 
+pattern BufferCodec :: CodeBuffer from to
+                    -> (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
+                    -> IO ()
+                    -> IO state
+                    -> (state -> IO ())
+                    -> BufferCodec from to state
+pattern BufferCodec{encode, recover, close, getState, setState} <-
+    BufferCodec# (getEncode -> encode) (getRecover -> recover) close getState setState
+  where
+    BufferCodec e r c g s = BufferCodec# (mkEncode e) (mkRecover r) c g s
+
+getEncode :: CodeBuffer# from to -> CodeBuffer from to
+getEncode e i o = IO $ \st ->
+  let !(# st', prog, i', o' #) = e i o st in (# st', (prog, i', o') #)
+
+getRecover :: (Buffer from -> Buffer to -> State# RealWorld -> (# State# RealWorld, Buffer from, Buffer to #))
+           -> (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
+getRecover r i o = IO $ \st ->
+  let !(# st', i', o' #) = r i o st in (# st', (i', o') #)
+
+mkEncode :: CodeBuffer from to -> CodeBuffer# from to
+mkEncode e i o st = let !(# st', (prog, i', o') #) = unIO (e i o) st in (# st', prog, i', o' #)
+
+mkRecover :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
+          -> (Buffer from -> Buffer to -> State# RealWorld -> (# State# RealWorld, Buffer from, Buffer to #))
+mkRecover r i o st = let !(# st', (i', o') #) = unIO (r i o) st in (# st', i', o' #)


=====================================
libraries/base/GHC/IO/Encoding/UTF16.hs
=====================================
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -61,64 +62,66 @@ mkUTF16 cfm =  TextEncoding { textEncodingName = "UTF-16",
                               mkTextDecoder = utf16_DF cfm,
                               mkTextEncoder = utf16_EF cfm }
 
-utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
 utf16_DF cfm = do
   seen_bom <- newIORef Nothing
-  return (BufferCodec {
-             encode   = utf16_decode seen_bom,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef seen_bom,
-             setState = writeIORef seen_bom
+  return (BufferCodec# {
+             encode#   = utf16_decode seen_bom,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef seen_bom,
+             setState# = writeIORef seen_bom
           })
 
 utf16_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf16_EF cfm = do
   done_bom <- newIORef False
-  return (BufferCodec {
-             encode   = utf16_encode done_bom,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef done_bom,
-             setState = writeIORef done_bom
+  return (BufferCodec# {
+             encode#   = utf16_encode done_bom,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef done_bom,
+             setState# = writeIORef done_bom
           })
 
-utf16_encode :: IORef Bool -> EncodeBuffer
+utf16_encode :: IORef Bool -> EncodeBuffer#
 utf16_encode done_bom input
   output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef done_bom
-  if b then utf16_native_encode input output
+  let !(# st1, b #) = unIO (readIORef done_bom) st0
+  if b then utf16_native_encode input output st1
        else if os - ow < 2
-               then return (OutputUnderflow,input,output)
+               then (# st1,OutputUnderflow,input,output #)
                else do
-                    writeIORef done_bom True
-                    writeWord8Buf oraw ow     bom1
-                    writeWord8Buf oraw (ow+1) bom2
-                    utf16_native_encode input output{ bufR = ow+2 }
+               let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+                   !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom1) st2
+                   !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom2) st3
+               utf16_native_encode input output{ bufR = ow+2 } st4
 
-utf16_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+utf16_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
 utf16_decode seen_bom
   input at Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   mb <- readIORef seen_bom
+   let !(# st1, mb #) = unIO (readIORef seen_bom) st0
    case mb of
-     Just decode -> decode input output
+     Just decode -> decode input output st1
      Nothing ->
-       if iw - ir < 2 then return (InputUnderflow,input,output) else do
-       c0 <- readWord8Buf iraw ir
-       c1 <- readWord8Buf iraw (ir+1)
+       if iw - ir < 2 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw  ir   ) st1
+           !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
        case () of
-        _ | c0 == bomB && c1 == bomL -> do
-               writeIORef seen_bom (Just utf16be_decode)
-               utf16be_decode input{ bufL= ir+2 } output
-          | c0 == bomL && c1 == bomB -> do
-               writeIORef seen_bom (Just utf16le_decode)
-               utf16le_decode input{ bufL= ir+2 } output
-          | otherwise -> do
-               writeIORef seen_bom (Just utf16_native_decode)
-               utf16_native_decode input output
+        _ | c0 == bomB && c1 == bomL ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16be_decode)) st3
+               in utf16be_decode input{ bufL= ir+2 } output st4
+          | c0 == bomL && c1 == bomB ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16le_decode)) st3
+               in utf16le_decode input{ bufL= ir+2 } output st4
+          | otherwise ->
+               let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16_native_decode)) st3
+               in utf16_native_decode input output st4
 
 
 bomB, bomL, bom1, bom2 :: Word8
@@ -126,10 +129,10 @@ bomB = 0xfe
 bomL = 0xff
 
 -- choose UTF-16BE by default for UTF-16 output
-utf16_native_decode :: DecodeBuffer
+utf16_native_decode :: DecodeBuffer#
 utf16_native_decode = utf16be_decode
 
-utf16_native_encode :: EncodeBuffer
+utf16_native_encode :: EncodeBuffer#
 utf16_native_encode = utf16be_encode
 
 bom1 = bomB
@@ -149,22 +152,22 @@ mkUTF16be cfm = TextEncoding { textEncodingName = "UTF-16BE",
 
 utf16be_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf16be_DF cfm =
-  return (BufferCodec {
-             encode   = utf16be_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16be_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16be_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf16be_EF cfm =
-  return (BufferCodec {
-             encode   = utf16be_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16be_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16le :: TextEncoding
@@ -178,114 +181,127 @@ mkUTF16le cfm = TextEncoding { textEncodingName = "UTF16-LE",
 
 utf16le_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf16le_DF cfm =
-  return (BufferCodec {
-             encode   = utf16le_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16le_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf16le_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf16le_EF cfm =
-  return (BufferCodec {
-             encode   = utf16le_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf16le_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
-utf16be_decode :: DecodeBuffer
+utf16be_decode :: DecodeBuffer#
 utf16be_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os     = done OutputUnderflow ir ow
-         | ir >= iw     = done InputUnderflow ir ow
-         | ir + 1 == iw = done InputUnderflow ir ow
+       loop !ir !ow st0
+         | ow >= os     = done OutputUnderflow ir ow st0
+         | ir >= iw     = done InputUnderflow ir ow st0
+         | ir + 1 == iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw  ir   ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
               let x1 = fromIntegral c0 `shiftL` 8 + fromIntegral c1
               if validate1 x1
-                 then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
-                         loop (ir+2) ow'
-                 else if iw - ir < 4 then done InputUnderflow ir ow else do
-                      c2 <- readWord8Buf iraw (ir+2)
-                      c3 <- readWord8Buf iraw (ir+3)
-                      let x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
-                      if not (validate2 x1 x2) then invalid else do
-                      ow' <- writeCharBuf oraw ow (chr2 x1 x2)
-                      loop (ir+4) ow'
+                 then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+                      in loop (ir+2) ow' st3
+                 else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+                      let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                          !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                          x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
+                      if not (validate2 x1 x2) then invalid st4 else do
+                      let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+                      loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input { bufL = 0, bufR = 0 } else input { bufL = ir }
+             !ro = output{ bufR = ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16le_decode :: DecodeBuffer
+utf16le_decode :: DecodeBuffer#
 utf16le_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os     = done OutputUnderflow ir ow
-         | ir >= iw     = done InputUnderflow ir ow
-         | ir + 1 == iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os     = done OutputUnderflow ir ow st0
+         | ir >= iw     = done InputUnderflow ir ow st0
+         | ir + 1 == iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw  ir   ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
               if validate1 x1
-                 then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
-                         loop (ir+2) ow'
-                 else if iw - ir < 4 then done InputUnderflow ir ow else do
-                      c2 <- readWord8Buf iraw (ir+2)
-                      c3 <- readWord8Buf iraw (ir+3)
-                      let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
-                      if not (validate2 x1 x2) then invalid else do
-                      ow' <- writeCharBuf oraw ow (chr2 x1 x2)
-                      loop (ir+4) ow'
+                 then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+                      in loop (ir+2) ow' st3
+                 else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+                      let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                          !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                          x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
+                      if not (validate2 x1 x2) then invalid st4 else do
+                      let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+                      loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+             !ro = output{ bufR = ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16be_encode :: EncodeBuffer
+utf16be_encode :: EncodeBuffer#
 utf16be_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw     =  done InputUnderflow ir ow
-        | os - ow < 2  =  done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw     =  done InputUnderflow ir ow st0
+        | os - ow < 2  =  done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
-             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
-                    writeWord8Buf oraw ow     (fromIntegral (x `shiftR` 8))
-                    writeWord8Buf oraw (ow+1) (fromIntegral x)
-                    loop ir' (ow+2)
+             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    let !(# st2, () #) = unIO (writeWord8Buf oraw ow     (fromIntegral (x `shiftR` 8))) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral x))              st2
+                    loop ir' (ow+2) st3
                | otherwise -> do
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let
                          n1 = x - 0x10000
                          c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -294,35 +310,39 @@ utf16be_encode
                          c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
                          c4 = fromIntegral n2
                     --
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    writeWord8Buf oraw (ow+3) c4
-                    loop ir' (ow+4)
+                         !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                         !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                         !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                         !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+                    loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf16le_encode :: EncodeBuffer
+utf16le_encode :: EncodeBuffer#
 utf16le_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw     =  done InputUnderflow ir ow
-        | os - ow < 2  =  done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+            !ro = output{ bufR = ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw     =  done InputUnderflow ir ow st0
+        | os - ow < 2  =  done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
-             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
-                    writeWord8Buf oraw ow     (fromIntegral x)
-                    writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))
-                    loop ir' (ow+2)
+             x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    let !(# st2, () #) = unIO (writeWord8Buf oraw ow     (fromIntegral x)) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))) st2
+                    loop ir' (ow+2) st3
                | otherwise ->
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let
                          n1 = x - 0x10000
                          c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -331,13 +351,13 @@ utf16le_encode
                          c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
                          c4 = fromIntegral n2
                     --
-                    writeWord8Buf oraw ow     c2
-                    writeWord8Buf oraw (ow+1) c1
-                    writeWord8Buf oraw (ow+2) c4
-                    writeWord8Buf oraw (ow+3) c3
-                    loop ir' (ow+4)
+                         !(# st2, () #) = unIO (writeWord8Buf oraw ow     c2) st1
+                         !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+                         !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c4) st3
+                         !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+                    loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
 chr2 :: Word16 -> Word16 -> Char
 chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))


=====================================
libraries/base/GHC/IO/Encoding/UTF32.hs
=====================================
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC  -funbox-strict-fields #-}
 
@@ -61,68 +62,70 @@ mkUTF32 cfm = TextEncoding { textEncodingName = "UTF-32",
                              mkTextDecoder = utf32_DF cfm,
                              mkTextEncoder = utf32_EF cfm }
 
-utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
 utf32_DF cfm = do
   seen_bom <- newIORef Nothing
-  return (BufferCodec {
-             encode   = utf32_decode seen_bom,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef seen_bom,
-             setState = writeIORef seen_bom
+  return (BufferCodec# {
+             encode#   = utf32_decode seen_bom,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef seen_bom,
+             setState# = writeIORef seen_bom
           })
 
 utf32_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf32_EF cfm = do
   done_bom <- newIORef False
-  return (BufferCodec {
-             encode   = utf32_encode done_bom,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef done_bom,
-             setState = writeIORef done_bom
+  return (BufferCodec# {
+             encode#   = utf32_encode done_bom,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef done_bom,
+             setState# = writeIORef done_bom
           })
 
-utf32_encode :: IORef Bool -> EncodeBuffer
+utf32_encode :: IORef Bool -> EncodeBuffer#
 utf32_encode done_bom input
   output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef done_bom
-  if b then utf32_native_encode input output
+  let !(# st1, b #) = unIO (readIORef done_bom) st0
+  if b then utf32_native_encode input output st1
        else if os - ow < 4
-               then return (OutputUnderflow, input,output)
+               then (# st1,OutputUnderflow,input,output #)
                else do
-                    writeIORef done_bom True
-                    writeWord8Buf oraw ow     bom0
-                    writeWord8Buf oraw (ow+1) bom1
-                    writeWord8Buf oraw (ow+2) bom2
-                    writeWord8Buf oraw (ow+3) bom3
-                    utf32_native_encode input output{ bufR = ow+4 }
-
-utf32_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+               let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+                   !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom0) st2
+                   !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+                   !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+                   !(# st6, () #) = unIO (writeWord8Buf oraw (ow+3) bom3) st5
+               utf32_native_encode input output{ bufR = ow+4 } st6
+
+utf32_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
 utf32_decode seen_bom
   input at Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   mb <- readIORef seen_bom
+   let !(# st1, mb #) = unIO (readIORef seen_bom) st0
    case mb of
-     Just decode -> decode input output
+     Just decode -> decode input output st1
      Nothing ->
-       if iw - ir < 4 then return (InputUnderflow, input,output) else do
-       c0 <- readWord8Buf iraw ir
-       c1 <- readWord8Buf iraw (ir+1)
-       c2 <- readWord8Buf iraw (ir+2)
-       c3 <- readWord8Buf iraw (ir+3)
+       if iw - ir < 4 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw  ir   ) st1
+           !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
+           !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
+           !(# st5, c3 #) = unIO (readWord8Buf iraw (ir+3)) st4
        case () of
-        _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 -> do
-               writeIORef seen_bom (Just utf32be_decode)
-               utf32be_decode input{ bufL= ir+4 } output
-        _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 -> do
-               writeIORef seen_bom (Just utf32le_decode)
-               utf32le_decode input{ bufL= ir+4 } output
-          | otherwise -> do
-               writeIORef seen_bom (Just utf32_native_decode)
-               utf32_native_decode input output
+        _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32be_decode)) st5
+               in utf32be_decode input{ bufL= ir+4 } output st6
+        _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32le_decode)) st5
+               in utf32le_decode input{ bufL= ir+4 } output st6
+          | otherwise ->
+               let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32_native_decode)) st5
+               in utf32_native_decode input output st6
 
 
 bom0, bom1, bom2, bom3 :: Word8
@@ -132,10 +135,10 @@ bom2 = 0xfe
 bom3 = 0xff
 
 -- choose UTF-32BE by default for UTF-32 output
-utf32_native_decode :: DecodeBuffer
+utf32_native_decode :: DecodeBuffer#
 utf32_native_decode = utf32be_decode
 
-utf32_native_encode :: EncodeBuffer
+utf32_native_encode :: EncodeBuffer#
 utf32_native_encode = utf32be_encode
 
 -- -----------------------------------------------------------------------------
@@ -152,22 +155,22 @@ mkUTF32be cfm = TextEncoding { textEncodingName = "UTF-32BE",
 
 utf32be_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf32be_DF cfm =
-  return (BufferCodec {
-             encode   = utf32be_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32be_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf32be_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf32be_EF cfm =
-  return (BufferCodec {
-             encode   = utf32be_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32be_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
@@ -182,128 +185,145 @@ mkUTF32le cfm = TextEncoding { textEncodingName = "UTF-32LE",
 
 utf32le_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf32le_DF cfm =
-  return (BufferCodec {
-             encode   = utf32le_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32le_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf32le_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf32le_EF cfm =
-  return (BufferCodec {
-             encode   = utf32le_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf32le_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 
-utf32be_decode :: DecodeBuffer
+utf32be_decode :: DecodeBuffer#
 utf32be_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os    = done OutputUnderflow ir ow
-         | iw - ir < 4 = done InputUnderflow  ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os    = done OutputUnderflow ir ow st0
+         | iw - ir < 4 = done InputUnderflow  ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              c2 <- readWord8Buf iraw (ir+2)
-              c3 <- readWord8Buf iraw (ir+3)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir    ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                  !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
               let x1 = chr4 c0 c1 c2 c3
-              if not (validate x1) then invalid else do
-              ow' <- writeCharBuf oraw ow x1
-              loop (ir+4) ow'
+              if not (validate x1) then invalid st4 else do
+              let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+              loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32le_decode :: DecodeBuffer
+utf32le_decode :: DecodeBuffer#
 utf32le_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os    = done OutputUnderflow ir ow
-         | iw - ir < 4 = done InputUnderflow  ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os    = done OutputUnderflow ir ow st0
+         | iw - ir < 4 = done InputUnderflow  ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
-              c1 <- readWord8Buf iraw (ir+1)
-              c2 <- readWord8Buf iraw (ir+2)
-              c3 <- readWord8Buf iraw (ir+3)
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir    ) st0
+                  !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                  !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                  !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
               let x1 = chr4 c3 c2 c1 c0
-              if not (validate x1) then invalid else do
-              ow' <- writeCharBuf oraw ow x1
-              loop (ir+4) ow'
+              if not (validate x1) then invalid st4 else do
+              let (# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+              loop (ir+4) ow' st5
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+             !ro = output{ bufR=ow }
+         in  (# st', why, ri, ro #)
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32be_encode :: EncodeBuffer
+utf32be_encode :: EncodeBuffer#
 utf32be_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw    = done InputUnderflow  ir ow
-        | os - ow < 4 = done OutputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw    = done InputUnderflow  ir ow st0
+        | os - ow < 4 = done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if isSurrogate c then done InvalidSequence ir ow else do
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if isSurrogate c then done InvalidSequence ir ow st1 else do
              let (c0,c1,c2,c3) = ord4 c
-             writeWord8Buf oraw ow     c0
-             writeWord8Buf oraw (ow+1) c1
-             writeWord8Buf oraw (ow+2) c2
-             writeWord8Buf oraw (ow+3) c3
-             loop ir' (ow+4)
+                 !(# st2, () #) = unIO (writeWord8Buf oraw ow     c0) st1
+                 !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+                 !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c2) st3
+                 !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+             loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
-utf32le_encode :: EncodeBuffer
+utf32le_encode :: EncodeBuffer#
 utf32le_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ir >= iw    = done InputUnderflow  ir ow
-        | os - ow < 4 = done OutputUnderflow ir ow
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+            !ro = output{ bufR=ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ir >= iw    = done InputUnderflow  ir ow st0
+        | os - ow < 4 = done OutputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
-           if isSurrogate c then done InvalidSequence ir ow else do
+           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+           if isSurrogate c then done InvalidSequence ir ow st1 else do
              let (c0,c1,c2,c3) = ord4 c
-             writeWord8Buf oraw ow     c3
-             writeWord8Buf oraw (ow+1) c2
-             writeWord8Buf oraw (ow+2) c1
-             writeWord8Buf oraw (ow+3) c0
-             loop ir' (ow+4)
+                 !(# st2, () #) = unIO (writeWord8Buf oraw ow     c3) st1
+                 !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                 !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c1) st3
+                 !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c0) st4
+             loop ir' (ow+4) st5
     in
-    loop ir0 ow0
+    loop ir0 ow0 st
 
 chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
 chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =


=====================================
libraries/base/GHC/IO/Encoding/UTF8.hs
=====================================
@@ -3,6 +3,7 @@
            , BangPatterns
            , NondecreasingIndentation
            , MagicHash
+           , UnboxedTuples
   #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
@@ -56,22 +57,22 @@ mkUTF8 cfm = TextEncoding { textEncodingName = "UTF-8",
 
 utf8_DF :: CodingFailureMode -> IO (TextDecoder ())
 utf8_DF cfm =
-  return (BufferCodec {
-             encode   = utf8_decode,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf8_decode,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf8_EF :: CodingFailureMode -> IO (TextEncoder ())
 utf8_EF cfm =
-  return (BufferCodec {
-             encode   = utf8_encode,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = return (),
-             setState = const $ return ()
+  return (BufferCodec# {
+             encode#   = utf8_encode,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = return (),
+             setState# = const $ return ()
           })
 
 utf8_bom :: TextEncoding
@@ -85,177 +86,188 @@ mkUTF8_bom cfm = TextEncoding { textEncodingName = "UTF-8BOM",
 utf8_bom_DF :: CodingFailureMode -> IO (TextDecoder Bool)
 utf8_bom_DF cfm = do
    ref <- newIORef True
-   return (BufferCodec {
-             encode   = utf8_bom_decode ref,
-             recover  = recoverDecode cfm,
-             close    = return (),
-             getState = readIORef ref,
-             setState = writeIORef ref
+   return (BufferCodec# {
+             encode#   = utf8_bom_decode ref,
+             recover#  = recoverDecode# cfm,
+             close#    = return (),
+             getState# = readIORef ref,
+             setState# = writeIORef ref
           })
 
 utf8_bom_EF :: CodingFailureMode -> IO (TextEncoder Bool)
 utf8_bom_EF cfm = do
    ref <- newIORef True
-   return (BufferCodec {
-             encode   = utf8_bom_encode ref,
-             recover  = recoverEncode cfm,
-             close    = return (),
-             getState = readIORef ref,
-             setState = writeIORef ref
+   return (BufferCodec# {
+             encode#   = utf8_bom_encode ref,
+             recover#  = recoverEncode# cfm,
+             close#    = return (),
+             getState# = readIORef ref,
+             setState# = writeIORef ref
           })
 
-utf8_bom_decode :: IORef Bool -> DecodeBuffer
+utf8_bom_decode :: IORef Bool -> DecodeBuffer#
 utf8_bom_decode ref
   input at Buffer{  bufRaw=iraw, bufL=ir, bufR=iw,  bufSize=_  }
   output
+  st0
  = do
-   first <- readIORef ref
+   let (# st1, first #) = unIO (readIORef ref) st0
    if not first
-      then utf8_decode input output
+      then utf8_decode input output st1
       else do
-       let no_bom = do writeIORef ref False; utf8_decode input output
-       if iw - ir < 1 then return (InputUnderflow,input,output) else do
-       c0 <- readWord8Buf iraw ir
+       let no_bom = let !(# st', () #) = unIO (writeIORef ref False) st1 in utf8_decode input output st'
+       if iw - ir < 1 then (# st1,InputUnderflow,input,output #) else do
+       let !(# st2, c0 #) = unIO (readWord8Buf iraw ir) st1
        if (c0 /= bom0) then no_bom else do
-       if iw - ir < 2 then return (InputUnderflow,input,output) else do
-       c1 <- readWord8Buf iraw (ir+1)
+       if iw - ir < 2 then (# st2,InputUnderflow,input,output #) else do
+       let !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
        if (c1 /= bom1) then no_bom else do
-       if iw - ir < 3 then return (InputUnderflow,input,output) else do
-       c2 <- readWord8Buf iraw (ir+2)
+       if iw - ir < 3 then (# st3,InputUnderflow,input,output #) else do
+       let !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
        if (c2 /= bom2) then no_bom else do
        -- found a BOM, ignore it and carry on
-       writeIORef ref False
-       utf8_decode input{ bufL = ir + 3 } output
+       let !(# st5, () #) = unIO (writeIORef ref False) st4
+       utf8_decode input{ bufL = ir + 3 } output st5
 
-utf8_bom_encode :: IORef Bool -> EncodeBuffer
+utf8_bom_encode :: IORef Bool -> EncodeBuffer#
 utf8_bom_encode ref input
   output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+  st0
  = do
-  b <- readIORef ref
-  if not b then utf8_encode input output
+  let !(# st1, b #) = unIO (readIORef ref) st0
+  if not b then utf8_encode input output st1
            else if os - ow < 3
-                  then return (OutputUnderflow,input,output)
+                  then (# st1,OutputUnderflow,input,output #)
                   else do
-                    writeIORef ref False
-                    writeWord8Buf oraw ow     bom0
-                    writeWord8Buf oraw (ow+1) bom1
-                    writeWord8Buf oraw (ow+2) bom2
-                    utf8_encode input output{ bufR = ow+3 }
+                    let !(# st2, () #) = unIO (writeIORef ref False)           st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw ow     bom0) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+                        !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+                    utf8_encode input output{ bufR = ow+3 } st5
 
 bom0, bom1, bom2 :: Word8
 bom0 = 0xef
 bom1 = 0xbb
 bom2 = 0xbf
 
-utf8_decode :: DecodeBuffer
+utf8_decode :: DecodeBuffer#
 utf8_decode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-       loop !ir !ow
-         | ow >= os = done OutputUnderflow ir ow
-         | ir >= iw = done InputUnderflow ir ow
+       loop :: Int -> Int -> DecodingBuffer#
+       loop !ir !ow st0
+         | ow >= os = done OutputUnderflow ir ow st0
+         | ir >= iw = done InputUnderflow ir ow st0
          | otherwise = do
-              c0 <- readWord8Buf iraw ir
+              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
               case c0 of
                 _ | c0 <= 0x7f -> do
-                           ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
-                           loop (ir+1) ow'
-                  | c0 >= 0xc0 && c0 <= 0xc1 -> invalid -- Overlong forms
+                           let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+                           loop (ir+1) ow' st2
+                  | c0 >= 0xc0 && c0 <= 0xc1 -> invalid st1 -- Overlong forms
                   | c0 >= 0xc2 && c0 <= 0xdf ->
-                           if iw - ir < 2 then done InputUnderflow ir ow else do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           if (c1 < 0x80 || c1 >= 0xc0) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr2 c0 c1)
-                           loop (ir+2) ow'
+                           if iw - ir < 2 then done InputUnderflow ir ow st1 else do
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                           if (c1 < 0x80 || c1 >= 0xc0) then invalid st2 else do
+                           let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (chr2 c0 c1)) st2
+                           loop (ir+2) ow' st3
                   | c0 >= 0xe0 && c0 <= 0xef ->
                       case iw - ir of
-                        1 -> done InputUnderflow ir ow
+                        1 -> done InputUnderflow ir ow st1
                         2 -> do -- check for an error even when we don't have
                                 -- the full sequence yet (#3341)
-                           c1 <- readWord8Buf iraw (ir+1)
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
                            if not (validate3 c0 c1 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st2 else done InputUnderflow ir ow st2
                         _ -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
-                           if not (validate3 c0 c1 c2) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr3 c0 c1 c2)
-                           loop (ir+3) ow'
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                           let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                           if not (validate3 c0 c1 c2) then invalid st3 else do
+                           let !(# st4, ow' #) = unIO (writeCharBuf oraw ow (chr3 c0 c1 c2)) st3
+                           loop (ir+3) ow' st4
                   | c0 >= 0xf0 ->
                       case iw - ir of
-                        1 -> done InputUnderflow ir ow
+                        1 -> done InputUnderflow ir ow st1
                         2 -> do -- check for an error even when we don't have
                                 -- the full sequence yet (#3341)
-                           c1 <- readWord8Buf iraw (ir+1)
+                           let (# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
                            if not (validate4 c0 c1 0x80 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st2 else done InputUnderflow ir ow st2
                         3 -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                               !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
                            if not (validate4 c0 c1 c2 0x80)
-                              then invalid else done InputUnderflow ir ow
+                              then invalid st3 else done InputUnderflow ir ow st3
                         _ -> do
-                           c1 <- readWord8Buf iraw (ir+1)
-                           c2 <- readWord8Buf iraw (ir+2)
-                           c3 <- readWord8Buf iraw (ir+3)
-                           if not (validate4 c0 c1 c2 c3) then invalid else do
-                           ow' <- writeCharBuf oraw ow (chr4 c0 c1 c2 c3)
-                           loop (ir+4) ow'
+                           let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+                               !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+                               !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+                           if not (validate4 c0 c1 c2 c3) then invalid st4 else do
+                           let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr4 c0 c1 c2 c3)) st4
+                           loop (ir+4) ow' st5
                   | otherwise ->
-                           invalid
+                           invalid st1
          where
-           invalid = done InvalidSequence ir ow
+           invalid :: DecodingBuffer#
+           invalid st' = done InvalidSequence ir ow st'
 
        -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done why !ir !ow = return (why,
-                                  if ir == iw then input{ bufL=0, bufR=0 }
-                                              else input{ bufL=ir },
-                                  output{ bufR=ow })
+       {-# NOINLINE done #-}
+       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+       done why !ir !ow st' =
+         let !ri = if ir == iw then input{ bufL = 0, bufR = 0} else input{ bufL = ir }
+             !ro = output { bufR = ow }
+         in (# st', why, ri, ro #)
    in
-   loop ir0 ow0
+   loop ir0 ow0 st
 
-utf8_encode :: EncodeBuffer
+utf8_encode :: EncodeBuffer#
 utf8_encode
   input at Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
   output at Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
+  st
  = let
-      done why !ir !ow = return (why,
-                                 if ir == iw then input{ bufL=0, bufR=0 }
-                                             else input{ bufL=ir },
-                                 output{ bufR=ow })
-      loop !ir !ow
-        | ow >= os = done OutputUnderflow ir ow
-        | ir >= iw = done InputUnderflow ir ow
+      {-# NOINLINE done #-}
+      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+      done why !ir !ow st' =
+        let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+            !ro = output{ bufR = ow }
+        in  (# st', why, ri, ro #)
+      loop :: Int -> Int -> EncodingBuffer#
+      loop !ir !ow st0
+        | ow >= os = done OutputUnderflow ir ow st0
+        | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
-           (c,ir') <- readCharBuf iraw ir
+           let (# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
            case ord c of
              x | x <= 0x7F   -> do
-                    writeWord8Buf oraw ow (fromIntegral x)
-                    loop ir' (ow+1)
+                     let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral x)) st1
+                     loop ir' (ow+1) st2
                | x <= 0x07FF ->
-                    if os - ow < 2 then done OutputUnderflow ir ow else do
+                    if os - ow < 2 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2) = ord2 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    loop ir' (ow+2)
-               | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow else do
-                    if os - ow < 3 then done OutputUnderflow ir ow else do
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                    loop ir' (ow+2) st3
+               | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+                    if os - ow < 3 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2,c3) = ord3 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    loop ir' (ow+3)
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                    loop ir' (ow+3) st4
                | otherwise -> do
-                    if os - ow < 4 then done OutputUnderflow ir ow else do
+                    if os - ow < 4 then done OutputUnderflow ir ow st1 else do
                     let (c1,c2,c3,c4) = ord4 c
-                    writeWord8Buf oraw ow     c1
-                    writeWord8Buf oraw (ow+1) c2
-                    writeWord8Buf oraw (ow+2) c3
-                    writeWord8Buf oraw (ow+3) c4
-                    loop ir' (ow+4)
+                        !(# st2, () #) = unIO (writeWord8Buf oraw ow     c1) st1
+                        !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+                        !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+                        !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+                    loop ir' (ow+4) st5
    in
-   loop ir0 ow0
+   loop ir0 ow0 st
 
 -- -----------------------------------------------------------------------------
 -- UTF-8 primitives, lifted from Data.Text.Fusion.Utf8



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b037bbb093c5d51520acdd332d5947811e36417e...852820f42fd5bd3196d46fb2ec2eaa1fc95a851d

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b037bbb093c5d51520acdd332d5947811e36417e...852820f42fd5bd3196d46fb2ec2eaa1fc95a851d
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230420/0939a2e8/attachment-0001.html>


More information about the ghc-commits mailing list