[Git][ghc/ghc][wip/unboxed-codebuffer] Use unboxed codebuffers in base
Josh Meredith (@JoshMeredith)
gitlab at gitlab.haskell.org
Tue May 9 11:21:27 UTC 2023
Josh Meredith pushed to branch wip/unboxed-codebuffer at Glasgow Haskell Compiler / GHC
Commits:
536e0538 by Josh Meredith at 2023-05-09T11:19:39+00:00
Use unboxed codebuffers in base
Metric Decrease:
encodingAllocations
- - - - -
8 changed files:
- libraries/base/GHC/IO/Encoding.hs
- libraries/base/GHC/IO/Encoding/CodePage/API.hs
- libraries/base/GHC/IO/Encoding/Failure.hs
- libraries/base/GHC/IO/Encoding/Iconv.hs
- libraries/base/GHC/IO/Encoding/Latin1.hs
- libraries/base/GHC/IO/Encoding/UTF16.hs
- libraries/base/GHC/IO/Encoding/UTF32.hs
- libraries/base/GHC/IO/Encoding/UTF8.hs
Changes:
=====================================
libraries/base/GHC/IO/Encoding.hs
=====================================
@@ -1,5 +1,6 @@
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
-----------------------------------------------------------------------------
@@ -336,11 +337,13 @@ mkTextEncoding' cfm enc =
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
-latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8
+latin1_encode input output = IO $ \st -> case Latin1.latin1_encode input output st of
+ (# st', _why, input', output' #) -> (# st', (input', output') #) -- unchecked, used for char8
--latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
-latin1_decode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_decode input output
+latin1_decode input output = IO $ \st -> case Latin1.latin1_decode input output st of
+ (# st', _why, input', output' #) -> (# st', (input',output') #)
--latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode
unknownEncodingErr :: String -> IO a
=====================================
libraries/base/GHC/IO/Encoding/CodePage/API.hs
=====================================
@@ -1,6 +1,7 @@
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, NoImplicitPrelude, NondecreasingIndentation,
- RecordWildCards, ScopedTypeVariables #-}
+ RecordWildCards, ScopedTypeVariables,
+ UnboxedTuples #-}
{-# OPTIONS_GHC -Wno-name-shadowing #-}
module GHC.IO.Encoding.CodePage.API (
@@ -157,11 +158,15 @@ newCP rec fn cp = do
utf16_native_encode' :: EncodeBuffer
utf16_native_decode' :: DecodeBuffer
#if defined(WORDS_BIGENDIAN)
-utf16_native_encode' = utf16be_encode
-utf16_native_decode' = utf16be_decode
+utf16_native_encode' i o = IO $ \st -> case utf16be_encode i o st of
+ (# st', c, i', o' #) -> (# st', (c, i', o') #)
+utf16_native_decode' i o = IO $ \st -> case utf16be_decode i o st of
+ (# st', c, i', o' #) -> (# st', (c, i', o') #)
#else
-utf16_native_encode' = utf16le_encode
-utf16_native_decode' = utf16le_decode
+utf16_native_encode' i o = IO $ \st -> case utf16le_encode i o st of
+ (# st', c, i', o' #) -> (# st', (c, i', o') #)
+utf16_native_decode' i o = IO $ \st -> case utf16le_decode i o st of
+ (# st', c, i', o' #) -> (# st', (c, i', o') #)
#endif
saner :: CodeBuffer from to
=====================================
libraries/base/GHC/IO/Encoding/Failure.hs
=====================================
@@ -1,5 +1,8 @@
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE BangPatterns #-}
-----------------------------------------------------------------------------
-- |
@@ -18,7 +21,8 @@
module GHC.IO.Encoding.Failure (
CodingFailureMode(..), codingFailureModeSuffix,
isSurrogate,
- recoverDecode, recoverEncode
+ recoverDecode, recoverEncode,
+ recoverDecode#, recoverEncode#,
) where
import GHC.IO
@@ -142,6 +146,12 @@ unescapeRoundtripCharacterSurrogate c
| otherwise = Nothing
where x = ord c
+recoverDecode# :: CodingFailureMode -> Buffer Word8 -> Buffer Char
+ -> State# RealWorld -> (# State# RealWorld, Buffer Word8, Buffer Char #)
+recoverDecode# cfm input output st =
+ let !(# st', (bIn, bOut) #) = unIO (recoverDecode cfm input output) st
+ in (# st', bIn, bOut #)
+
recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> IO (Buffer Word8, Buffer Char)
recoverDecode cfm input at Buffer{ bufRaw=iraw, bufL=ir, bufR=_ }
@@ -160,6 +170,12 @@ recoverDecode cfm input at Buffer{ bufRaw=iraw, bufL=ir, bufR=_ }
ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b)
return (input { bufL=ir+1 }, output { bufR=ow' })
+recoverEncode# :: CodingFailureMode -> Buffer Char -> Buffer Word8
+ -> State# RealWorld -> (# State# RealWorld, Buffer Char, Buffer Word8 #)
+recoverEncode# cfm input output st =
+ let !(# st', (bIn, bOut) #) = unIO (recoverEncode cfm input output) st
+ in (# st', bIn, bOut #)
+
recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> IO (Buffer Char, Buffer Word8)
recoverEncode cfm input at Buffer{ bufRaw=iraw, bufL=ir, bufR=_ }
=====================================
libraries/base/GHC/IO/Encoding/Iconv.hs
=====================================
@@ -2,6 +2,8 @@
{-# LANGUAGE CPP
, NoImplicitPrelude
, NondecreasingIndentation
+ , UnboxedTuples
+ , MagicHash
#-}
{-# OPTIONS_HADDOCK not-home #-}
@@ -133,19 +135,24 @@ newIConv from to rec fn =
withCAString to $ \ to_str -> do
iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
- return BufferCodec{
- encode = fn iconvt,
- recover = rec,
- close = iclose,
+ fn_iconvt ibuf obuf st = case unIO (fn iconvt ibuf obuf) st of
+ (# st', (prog, ibuf', obuf') #) -> (# st', prog, ibuf', obuf' #)
+ return BufferCodec# {
+ encode# = fn_iconvt,
+ recover# = rec#,
+ close# = iclose,
-- iconv doesn't supply a way to save/restore the state
- getState = return (),
- setState = const $ return ()
+ getState# = return (),
+ setState# = const $ return ()
}
+ where
+ rec# ibuf obuf st = case unIO (rec ibuf obuf) st of
+ (# st', (ibuf', obuf') #) -> (# st', ibuf', obuf' #)
-iconvDecode :: IConv -> DecodeBuffer
+iconvDecode :: IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char)
iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
-iconvEncode :: IConv -> EncodeBuffer
+iconvEncode :: IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8)
iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
=====================================
libraries/base/GHC/IO/Encoding/Latin1.hs
=====================================
@@ -2,6 +2,8 @@
{-# LANGUAGE NoImplicitPrelude
, BangPatterns
, NondecreasingIndentation
+ , UnboxedTuples
+ , MagicHash
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -56,22 +58,22 @@ mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1",
latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
latin1_DF cfm =
- return (BufferCodec {
- encode = latin1_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = latin1_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_EF cfm =
- return (BufferCodec {
- encode = latin1_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = latin1_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
latin1_checked :: TextEncoding
@@ -85,12 +87,12 @@ mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1",
latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_checked_EF cfm =
- return (BufferCodec {
- encode = latin1_checked_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = latin1_checked_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
-- -----------------------------------------------------------------------------
@@ -108,22 +110,22 @@ mkAscii cfm = TextEncoding { textEncodingName = "ASCII",
ascii_DF :: CodingFailureMode -> IO (TextDecoder ())
ascii_DF cfm =
- return (BufferCodec {
- encode = ascii_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = ascii_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
ascii_EF :: CodingFailureMode -> IO (TextEncoder ())
ascii_EF cfm =
- return (BufferCodec {
- encode = ascii_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = ascii_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
@@ -134,97 +136,115 @@ ascii_EF cfm =
-- TODO: Eliminate code duplication between the checked and unchecked
-- versions of the decoder or encoder (but don't change the Core!)
-latin1_decode :: DecodeBuffer
+latin1_decode :: DecodeBuffer#
latin1_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
- loop (ir+1) ow'
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+ !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+ loop (ir+1) ow' st2
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-ascii_decode :: DecodeBuffer
+ascii_decode :: DecodeBuffer#
ascii_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- if c0 > 0x7f then invalid else do
- ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
- loop (ir+1) ow'
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
+ if c0 > 0x7f then invalid st1 else do
+ let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+ loop (ir+1) ow' st2
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-latin1_encode :: EncodeBuffer
+latin1_encode :: EncodeBuffer#
latin1_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- writeWord8Buf oraw ow (fromIntegral (ord c))
- loop ir' (ow+1)
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+ loop ir' (ow+1) st2
in
- loop ir0 ow0
+ loop ir0 ow0 st
-latin1_checked_encode :: EncodeBuffer
+latin1_checked_encode :: EncodeBuffer#
latin1_checked_encode input output
= single_byte_checked_encode 0xff input output
-ascii_encode :: EncodeBuffer
+ascii_encode :: EncodeBuffer#
ascii_encode input output
= single_byte_checked_encode 0x7f input output
-single_byte_checked_encode :: Int -> EncodeBuffer
+single_byte_checked_encode :: Int -> EncodeBuffer#
single_byte_checked_encode max_legal_char
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- if ord c > max_legal_char then invalid else do
- writeWord8Buf oraw ow (fromIntegral (ord c))
- loop ir' (ow+1)
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ if ord c > max_legal_char then invalid st1 else do
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
+ loop ir' (ow+1) st2
where
- invalid = done InvalidSequence ir ow
+ invalid :: EncodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
in
- loop ir0 ow0
+ loop ir0 ow0 st
{-# INLINE single_byte_checked_encode #-}
=====================================
libraries/base/GHC/IO/Encoding/UTF16.hs
=====================================
@@ -3,6 +3,7 @@
, BangPatterns
, NondecreasingIndentation
, MagicHash
+ , UnboxedTuples
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -61,64 +62,66 @@ mkUTF16 cfm = TextEncoding { textEncodingName = "UTF-16",
mkTextDecoder = utf16_DF cfm,
mkTextEncoder = utf16_EF cfm }
-utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
utf16_DF cfm = do
seen_bom <- newIORef Nothing
- return (BufferCodec {
- encode = utf16_decode seen_bom,
- recover = recoverDecode cfm,
- close = return (),
- getState = readIORef seen_bom,
- setState = writeIORef seen_bom
+ return (BufferCodec# {
+ encode# = utf16_decode seen_bom,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = readIORef seen_bom,
+ setState# = writeIORef seen_bom
})
utf16_EF :: CodingFailureMode -> IO (TextEncoder Bool)
utf16_EF cfm = do
done_bom <- newIORef False
- return (BufferCodec {
- encode = utf16_encode done_bom,
- recover = recoverEncode cfm,
- close = return (),
- getState = readIORef done_bom,
- setState = writeIORef done_bom
+ return (BufferCodec# {
+ encode# = utf16_encode done_bom,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = readIORef done_bom,
+ setState# = writeIORef done_bom
})
-utf16_encode :: IORef Bool -> EncodeBuffer
+utf16_encode :: IORef Bool -> EncodeBuffer#
utf16_encode done_bom input
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+ st0
= do
- b <- readIORef done_bom
- if b then utf16_native_encode input output
+ let !(# st1, b #) = unIO (readIORef done_bom) st0
+ if b then utf16_native_encode input output st1
else if os - ow < 2
- then return (OutputUnderflow,input,output)
+ then (# st1,OutputUnderflow,input,output #)
else do
- writeIORef done_bom True
- writeWord8Buf oraw ow bom1
- writeWord8Buf oraw (ow+1) bom2
- utf16_native_encode input output{ bufR = ow+2 }
+ let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw ow bom1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom2) st3
+ utf16_native_encode input output{ bufR = ow+2 } st4
-utf16_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+utf16_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
utf16_decode seen_bom
input at Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ }
output
+ st0
= do
- mb <- readIORef seen_bom
+ let !(# st1, mb #) = unIO (readIORef seen_bom) st0
case mb of
- Just decode -> decode input output
+ Just decode -> decode input output st1
Nothing ->
- if iw - ir < 2 then return (InputUnderflow,input,output) else do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
+ if iw - ir < 2 then (# st1,InputUnderflow,input,output #) else do
+ let !(# st2, c0 #) = unIO (readWord8Buf iraw ir ) st1
+ !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
case () of
- _ | c0 == bomB && c1 == bomL -> do
- writeIORef seen_bom (Just utf16be_decode)
- utf16be_decode input{ bufL= ir+2 } output
- | c0 == bomL && c1 == bomB -> do
- writeIORef seen_bom (Just utf16le_decode)
- utf16le_decode input{ bufL= ir+2 } output
- | otherwise -> do
- writeIORef seen_bom (Just utf16_native_decode)
- utf16_native_decode input output
+ _ | c0 == bomB && c1 == bomL ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16be_decode)) st3
+ in utf16be_decode input{ bufL= ir+2 } output st4
+ | c0 == bomL && c1 == bomB ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16le_decode)) st3
+ in utf16le_decode input{ bufL= ir+2 } output st4
+ | otherwise ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16_native_decode)) st3
+ in utf16_native_decode input output st4
bomB, bomL, bom1, bom2 :: Word8
@@ -126,10 +129,10 @@ bomB = 0xfe
bomL = 0xff
-- choose UTF-16BE by default for UTF-16 output
-utf16_native_decode :: DecodeBuffer
+utf16_native_decode :: DecodeBuffer#
utf16_native_decode = utf16be_decode
-utf16_native_encode :: EncodeBuffer
+utf16_native_encode :: EncodeBuffer#
utf16_native_encode = utf16be_encode
bom1 = bomB
@@ -149,22 +152,22 @@ mkUTF16be cfm = TextEncoding { textEncodingName = "UTF-16BE",
utf16be_DF :: CodingFailureMode -> IO (TextDecoder ())
utf16be_DF cfm =
- return (BufferCodec {
- encode = utf16be_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16be_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16be_EF :: CodingFailureMode -> IO (TextEncoder ())
utf16be_EF cfm =
- return (BufferCodec {
- encode = utf16be_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16be_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16le :: TextEncoding
@@ -178,114 +181,127 @@ mkUTF16le cfm = TextEncoding { textEncodingName = "UTF16-LE",
utf16le_DF :: CodingFailureMode -> IO (TextDecoder ())
utf16le_DF cfm =
- return (BufferCodec {
- encode = utf16le_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16le_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16le_EF :: CodingFailureMode -> IO (TextEncoder ())
utf16le_EF cfm =
- return (BufferCodec {
- encode = utf16le_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16le_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
-utf16be_decode :: DecodeBuffer
+utf16be_decode :: DecodeBuffer#
utf16be_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
- | ir + 1 == iw = done InputUnderflow ir ow
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | ir + 1 == iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
let x1 = fromIntegral c0 `shiftL` 8 + fromIntegral c1
if validate1 x1
- then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
- loop (ir+2) ow'
- else if iw - ir < 4 then done InputUnderflow ir ow else do
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
- let x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
- if not (validate2 x1 x2) then invalid else do
- ow' <- writeCharBuf oraw ow (chr2 x1 x2)
- loop (ir+4) ow'
+ then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+ in loop (ir+2) ow' st3
+ else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+ let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+ x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
+ if not (validate2 x1 x2) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input { bufL = 0, bufR = 0 } else input { bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16le_decode :: DecodeBuffer
+utf16le_decode :: DecodeBuffer#
utf16le_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
- | ir + 1 == iw = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | ir + 1 == iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
if validate1 x1
- then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
- loop (ir+2) ow'
- else if iw - ir < 4 then done InputUnderflow ir ow else do
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
- let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
- if not (validate2 x1 x2) then invalid else do
- ow' <- writeCharBuf oraw ow (chr2 x1 x2)
- loop (ir+4) ow'
+ then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+ in loop (ir+2) ow' st3
+ else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+ let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+ x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
+ if not (validate2 x1 x2) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16be_encode :: EncodeBuffer
+utf16be_encode :: EncodeBuffer#
utf16be_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 2 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 2 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
case ord c of
- x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
- writeWord8Buf oraw ow (fromIntegral (x `shiftR` 8))
- writeWord8Buf oraw (ow+1) (fromIntegral x)
- loop ir' (ow+2)
+ x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (x `shiftR` 8))) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral x)) st2
+ loop ir' (ow+2) st3
| otherwise -> do
- if os - ow < 4 then done OutputUnderflow ir ow else do
+ if os - ow < 4 then done OutputUnderflow ir ow st1 else do
let
n1 = x - 0x10000
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -294,35 +310,39 @@ utf16be_encode
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
c4 = fromIntegral n2
--
- writeWord8Buf oraw ow c1
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c3
- writeWord8Buf oraw (ow+3) c4
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c1) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16le_encode :: EncodeBuffer
+utf16le_encode :: EncodeBuffer#
utf16le_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 2 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 2 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
case ord c of
- x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
- writeWord8Buf oraw ow (fromIntegral x)
- writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))
- loop ir' (ow+2)
+ x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral x)) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))) st2
+ loop ir' (ow+2) st3
| otherwise ->
- if os - ow < 4 then done OutputUnderflow ir ow else do
+ if os - ow < 4 then done OutputUnderflow ir ow st1 else do
let
n1 = x - 0x10000
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -331,13 +351,13 @@ utf16le_encode
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
c4 = fromIntegral n2
--
- writeWord8Buf oraw ow c2
- writeWord8Buf oraw (ow+1) c1
- writeWord8Buf oraw (ow+2) c4
- writeWord8Buf oraw (ow+3) c3
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c2) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c4) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
chr2 :: Word16 -> Word16 -> Char
chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))
=====================================
libraries/base/GHC/IO/Encoding/UTF32.hs
=====================================
@@ -3,6 +3,7 @@
, BangPatterns
, NondecreasingIndentation
, MagicHash
+ , UnboxedTuples
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -61,68 +62,70 @@ mkUTF32 cfm = TextEncoding { textEncodingName = "UTF-32",
mkTextDecoder = utf32_DF cfm,
mkTextEncoder = utf32_EF cfm }
-utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
utf32_DF cfm = do
seen_bom <- newIORef Nothing
- return (BufferCodec {
- encode = utf32_decode seen_bom,
- recover = recoverDecode cfm,
- close = return (),
- getState = readIORef seen_bom,
- setState = writeIORef seen_bom
+ return (BufferCodec# {
+ encode# = utf32_decode seen_bom,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = readIORef seen_bom,
+ setState# = writeIORef seen_bom
})
utf32_EF :: CodingFailureMode -> IO (TextEncoder Bool)
utf32_EF cfm = do
done_bom <- newIORef False
- return (BufferCodec {
- encode = utf32_encode done_bom,
- recover = recoverEncode cfm,
- close = return (),
- getState = readIORef done_bom,
- setState = writeIORef done_bom
+ return (BufferCodec# {
+ encode# = utf32_encode done_bom,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = readIORef done_bom,
+ setState# = writeIORef done_bom
})
-utf32_encode :: IORef Bool -> EncodeBuffer
+utf32_encode :: IORef Bool -> EncodeBuffer#
utf32_encode done_bom input
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+ st0
= do
- b <- readIORef done_bom
- if b then utf32_native_encode input output
+ let !(# st1, b #) = unIO (readIORef done_bom) st0
+ if b then utf32_native_encode input output st1
else if os - ow < 4
- then return (OutputUnderflow, input,output)
+ then (# st1,OutputUnderflow,input,output #)
else do
- writeIORef done_bom True
- writeWord8Buf oraw ow bom0
- writeWord8Buf oraw (ow+1) bom1
- writeWord8Buf oraw (ow+2) bom2
- writeWord8Buf oraw (ow+3) bom3
- utf32_native_encode input output{ bufR = ow+4 }
-
-utf32_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+ let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw ow bom0) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+ !(# st6, () #) = unIO (writeWord8Buf oraw (ow+3) bom3) st5
+ utf32_native_encode input output{ bufR = ow+4 } st6
+
+utf32_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
utf32_decode seen_bom
input at Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ }
output
+ st0
= do
- mb <- readIORef seen_bom
+ let !(# st1, mb #) = unIO (readIORef seen_bom) st0
case mb of
- Just decode -> decode input output
+ Just decode -> decode input output st1
Nothing ->
- if iw - ir < 4 then return (InputUnderflow, input,output) else do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ if iw - ir < 4 then (# st1,InputUnderflow,input,output #) else do
+ let !(# st2, c0 #) = unIO (readWord8Buf iraw ir ) st1
+ !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
+ !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
+ !(# st5, c3 #) = unIO (readWord8Buf iraw (ir+3)) st4
case () of
- _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 -> do
- writeIORef seen_bom (Just utf32be_decode)
- utf32be_decode input{ bufL= ir+4 } output
- _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 -> do
- writeIORef seen_bom (Just utf32le_decode)
- utf32le_decode input{ bufL= ir+4 } output
- | otherwise -> do
- writeIORef seen_bom (Just utf32_native_decode)
- utf32_native_decode input output
+ _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32be_decode)) st5
+ in utf32be_decode input{ bufL= ir+4 } output st6
+ _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32le_decode)) st5
+ in utf32le_decode input{ bufL= ir+4 } output st6
+ | otherwise ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32_native_decode)) st5
+ in utf32_native_decode input output st6
bom0, bom1, bom2, bom3 :: Word8
@@ -132,10 +135,10 @@ bom2 = 0xfe
bom3 = 0xff
-- choose UTF-32BE by default for UTF-32 output
-utf32_native_decode :: DecodeBuffer
+utf32_native_decode :: DecodeBuffer#
utf32_native_decode = utf32be_decode
-utf32_native_encode :: EncodeBuffer
+utf32_native_encode :: EncodeBuffer#
utf32_native_encode = utf32be_encode
-- -----------------------------------------------------------------------------
@@ -152,22 +155,22 @@ mkUTF32be cfm = TextEncoding { textEncodingName = "UTF-32BE",
utf32be_DF :: CodingFailureMode -> IO (TextDecoder ())
utf32be_DF cfm =
- return (BufferCodec {
- encode = utf32be_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32be_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf32be_EF :: CodingFailureMode -> IO (TextEncoder ())
utf32be_EF cfm =
- return (BufferCodec {
- encode = utf32be_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32be_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
@@ -182,128 +185,145 @@ mkUTF32le cfm = TextEncoding { textEncodingName = "UTF-32LE",
utf32le_DF :: CodingFailureMode -> IO (TextDecoder ())
utf32le_DF cfm =
- return (BufferCodec {
- encode = utf32le_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32le_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf32le_EF :: CodingFailureMode -> IO (TextEncoder ())
utf32le_EF cfm =
- return (BufferCodec {
- encode = utf32le_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32le_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
-utf32be_decode :: DecodeBuffer
+utf32be_decode :: DecodeBuffer#
utf32be_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | iw - ir < 4 = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | iw - ir < 4 = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
let x1 = chr4 c0 c1 c2 c3
- if not (validate x1) then invalid else do
- ow' <- writeCharBuf oraw ow x1
- loop (ir+4) ow'
+ if not (validate x1) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32le_decode :: DecodeBuffer
+utf32le_decode :: DecodeBuffer#
utf32le_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | iw - ir < 4 = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | iw - ir < 4 = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
let x1 = chr4 c3 c2 c1 c0
- if not (validate x1) then invalid else do
- ow' <- writeCharBuf oraw ow x1
- loop (ir+4) ow'
+ if not (validate x1) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32be_encode :: EncodeBuffer
+utf32be_encode :: EncodeBuffer#
utf32be_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 4 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 4 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- if isSurrogate c then done InvalidSequence ir ow else do
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ if isSurrogate c then done InvalidSequence ir ow st1 else do
let (c0,c1,c2,c3) = ord4 c
- writeWord8Buf oraw ow c0
- writeWord8Buf oraw (ow+1) c1
- writeWord8Buf oraw (ow+2) c2
- writeWord8Buf oraw (ow+3) c3
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c0) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c2) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32le_encode :: EncodeBuffer
+utf32le_encode :: EncodeBuffer#
utf32le_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 4 = done OutputUnderflow ir ow
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 4 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- if isSurrogate c then done InvalidSequence ir ow else do
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ if isSurrogate c then done InvalidSequence ir ow st1 else do
let (c0,c1,c2,c3) = ord4 c
- writeWord8Buf oraw ow c3
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c1
- writeWord8Buf oraw (ow+3) c0
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c3) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c1) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c0) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
=====================================
libraries/base/GHC/IO/Encoding/UTF8.hs
=====================================
@@ -3,6 +3,7 @@
, BangPatterns
, NondecreasingIndentation
, MagicHash
+ , UnboxedTuples
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -56,22 +57,22 @@ mkUTF8 cfm = TextEncoding { textEncodingName = "UTF-8",
utf8_DF :: CodingFailureMode -> IO (TextDecoder ())
utf8_DF cfm =
- return (BufferCodec {
- encode = utf8_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf8_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf8_EF :: CodingFailureMode -> IO (TextEncoder ())
utf8_EF cfm =
- return (BufferCodec {
- encode = utf8_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf8_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf8_bom :: TextEncoding
@@ -85,177 +86,188 @@ mkUTF8_bom cfm = TextEncoding { textEncodingName = "UTF-8BOM",
utf8_bom_DF :: CodingFailureMode -> IO (TextDecoder Bool)
utf8_bom_DF cfm = do
ref <- newIORef True
- return (BufferCodec {
- encode = utf8_bom_decode ref,
- recover = recoverDecode cfm,
- close = return (),
- getState = readIORef ref,
- setState = writeIORef ref
+ return (BufferCodec# {
+ encode# = utf8_bom_decode ref,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = readIORef ref,
+ setState# = writeIORef ref
})
utf8_bom_EF :: CodingFailureMode -> IO (TextEncoder Bool)
utf8_bom_EF cfm = do
ref <- newIORef True
- return (BufferCodec {
- encode = utf8_bom_encode ref,
- recover = recoverEncode cfm,
- close = return (),
- getState = readIORef ref,
- setState = writeIORef ref
+ return (BufferCodec# {
+ encode# = utf8_bom_encode ref,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = readIORef ref,
+ setState# = writeIORef ref
})
-utf8_bom_decode :: IORef Bool -> DecodeBuffer
+utf8_bom_decode :: IORef Bool -> DecodeBuffer#
utf8_bom_decode ref
input at Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ }
output
+ st0
= do
- first <- readIORef ref
+ let !(# st1, first #) = unIO (readIORef ref) st0
if not first
- then utf8_decode input output
+ then utf8_decode input output st1
else do
- let no_bom = do writeIORef ref False; utf8_decode input output
- if iw - ir < 1 then return (InputUnderflow,input,output) else do
- c0 <- readWord8Buf iraw ir
+ let no_bom = let !(# st', () #) = unIO (writeIORef ref False) st1 in utf8_decode input output st'
+ if iw - ir < 1 then (# st1,InputUnderflow,input,output #) else do
+ let !(# st2, c0 #) = unIO (readWord8Buf iraw ir) st1
if (c0 /= bom0) then no_bom else do
- if iw - ir < 2 then return (InputUnderflow,input,output) else do
- c1 <- readWord8Buf iraw (ir+1)
+ if iw - ir < 2 then (# st2,InputUnderflow,input,output #) else do
+ let !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
if (c1 /= bom1) then no_bom else do
- if iw - ir < 3 then return (InputUnderflow,input,output) else do
- c2 <- readWord8Buf iraw (ir+2)
+ if iw - ir < 3 then (# st3,InputUnderflow,input,output #) else do
+ let !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
if (c2 /= bom2) then no_bom else do
-- found a BOM, ignore it and carry on
- writeIORef ref False
- utf8_decode input{ bufL = ir + 3 } output
+ let !(# st5, () #) = unIO (writeIORef ref False) st4
+ utf8_decode input{ bufL = ir + 3 } output st5
-utf8_bom_encode :: IORef Bool -> EncodeBuffer
+utf8_bom_encode :: IORef Bool -> EncodeBuffer#
utf8_bom_encode ref input
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+ st0
= do
- b <- readIORef ref
- if not b then utf8_encode input output
+ let !(# st1, b #) = unIO (readIORef ref) st0
+ if not b then utf8_encode input output st1
else if os - ow < 3
- then return (OutputUnderflow,input,output)
+ then (# st1,OutputUnderflow,input,output #)
else do
- writeIORef ref False
- writeWord8Buf oraw ow bom0
- writeWord8Buf oraw (ow+1) bom1
- writeWord8Buf oraw (ow+2) bom2
- utf8_encode input output{ bufR = ow+3 }
+ let !(# st2, () #) = unIO (writeIORef ref False) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw ow bom0) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+ utf8_encode input output{ bufR = ow+3 } st5
bom0, bom1, bom2 :: Word8
bom0 = 0xef
bom1 = 0xbb
bom2 = 0xbf
-utf8_decode :: DecodeBuffer
+utf8_decode :: DecodeBuffer#
utf8_decode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
case c0 of
_ | c0 <= 0x7f -> do
- ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
- loop (ir+1) ow'
- | c0 >= 0xc0 && c0 <= 0xc1 -> invalid -- Overlong forms
+ let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
+ loop (ir+1) ow' st2
+ | c0 >= 0xc0 && c0 <= 0xc1 -> invalid st1 -- Overlong forms
| c0 >= 0xc2 && c0 <= 0xdf ->
- if iw - ir < 2 then done InputUnderflow ir ow else do
- c1 <- readWord8Buf iraw (ir+1)
- if (c1 < 0x80 || c1 >= 0xc0) then invalid else do
- ow' <- writeCharBuf oraw ow (chr2 c0 c1)
- loop (ir+2) ow'
+ if iw - ir < 2 then done InputUnderflow ir ow st1 else do
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ if (c1 < 0x80 || c1 >= 0xc0) then invalid st2 else do
+ let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (chr2 c0 c1)) st2
+ loop (ir+2) ow' st3
| c0 >= 0xe0 && c0 <= 0xef ->
case iw - ir of
- 1 -> done InputUnderflow ir ow
+ 1 -> done InputUnderflow ir ow st1
2 -> do -- check for an error even when we don't have
-- the full sequence yet (#3341)
- c1 <- readWord8Buf iraw (ir+1)
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
if not (validate3 c0 c1 0x80)
- then invalid else done InputUnderflow ir ow
+ then invalid st2 else done InputUnderflow ir ow st2
_ -> do
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- if not (validate3 c0 c1 c2) then invalid else do
- ow' <- writeCharBuf oraw ow (chr3 c0 c1 c2)
- loop (ir+3) ow'
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ if not (validate3 c0 c1 c2) then invalid st3 else do
+ let !(# st4, ow' #) = unIO (writeCharBuf oraw ow (chr3 c0 c1 c2)) st3
+ loop (ir+3) ow' st4
| c0 >= 0xf0 ->
case iw - ir of
- 1 -> done InputUnderflow ir ow
+ 1 -> done InputUnderflow ir ow st1
2 -> do -- check for an error even when we don't have
-- the full sequence yet (#3341)
- c1 <- readWord8Buf iraw (ir+1)
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
if not (validate4 c0 c1 0x80 0x80)
- then invalid else done InputUnderflow ir ow
+ then invalid st2 else done InputUnderflow ir ow st2
3 -> do
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
if not (validate4 c0 c1 c2 0x80)
- then invalid else done InputUnderflow ir ow
+ then invalid st3 else done InputUnderflow ir ow st3
_ -> do
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
- if not (validate4 c0 c1 c2 c3) then invalid else do
- ow' <- writeCharBuf oraw ow (chr4 c0 c1 c2 c3)
- loop (ir+4) ow'
+ let !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+ if not (validate4 c0 c1 c2 c3) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr4 c0 c1 c2 c3)) st4
+ loop (ir+4) ow' st5
| otherwise ->
- invalid
+ invalid st1
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0} else input{ bufL = ir }
+ !ro = output { bufR = ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf8_encode :: EncodeBuffer
+utf8_encode :: EncodeBuffer#
utf8_encode
input at Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output at Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
case ord c of
x | x <= 0x7F -> do
- writeWord8Buf oraw ow (fromIntegral x)
- loop ir' (ow+1)
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral x)) st1
+ loop ir' (ow+1) st2
| x <= 0x07FF ->
- if os - ow < 2 then done OutputUnderflow ir ow else do
+ if os - ow < 2 then done OutputUnderflow ir ow st1 else do
let (c1,c2) = ord2 c
- writeWord8Buf oraw ow c1
- writeWord8Buf oraw (ow+1) c2
- loop ir' (ow+2)
- | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow else do
- if os - ow < 3 then done OutputUnderflow ir ow else do
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c1) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ loop ir' (ow+2) st3
+ | x <= 0xFFFF -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+ if os - ow < 3 then done OutputUnderflow ir ow st1 else do
let (c1,c2,c3) = ord3 c
- writeWord8Buf oraw ow c1
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c3
- loop ir' (ow+3)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c1) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+ loop ir' (ow+3) st4
| otherwise -> do
- if os - ow < 4 then done OutputUnderflow ir ow else do
+ if os - ow < 4 then done OutputUnderflow ir ow st1 else do
let (c1,c2,c3,c4) = ord4 c
- writeWord8Buf oraw ow c1
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c3
- writeWord8Buf oraw (ow+3) c4
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c1) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
-- -----------------------------------------------------------------------------
-- UTF-8 primitives, lifted from Data.Text.Fusion.Utf8
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/536e0538a9256e52229bbf8e10159e35593a155f
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/536e0538a9256e52229bbf8e10159e35593a155f
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230509/0750f5fb/attachment-0001.html>
More information about the ghc-commits
mailing list