[commit: packages/binary] master: Remove quadratic behaviour in `ensureN`. (e46b7b6)

git at git.haskell.org git at git.haskell.org
Mon Jun 1 08:47:37 UTC 2015


Repository : ssh://git@git.haskell.org/binary

On branch  : master
Link       : http://git.haskell.org/packages/binary.git/commitdiff/e46b7b61111579befcc99954be8a0f48738f8ef7

>---------------------------------------------------------------

commit e46b7b61111579befcc99954be8a0f48738f8ef7
Author: Francesco Mazzoli <f at mazzo.li>
Date:   Thu May 28 00:16:53 2015 +0200

    Remove quadratic behaviour in `ensureN`.
    
    Chains of `B.append`s were being created by repeated calls to
    `demandInput`.
    
    Try the following program, which writes and read 100MB, to appreciate
    the difference:
    
    ```
    import qualified Data.ByteString as BS
    import qualified Data.ByteString.Lazy as BSL
    import Data.Binary (encode, decode)
    import Data.Char (ord)
    
    main :: IO ()
    main = do
    
      let inBs = BS.replicate 100000000 (fromIntegral $ ord 'a')
      BSL.writeFile "bs.bin" (encode inBs)
      putStrLn "writing done"
    
      bin <- BSL.readFile "bs.bin"
      -- This takes around 30 seconds and causes more than 10GB to be
      -- allocated.
      let outBs = decode bin
      print $ inBs == outBs
    ```


>---------------------------------------------------------------

e46b7b61111579befcc99954be8a0f48738f8ef7
 src/Data/Binary/Get/Internal.hs | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/Data/Binary/Get/Internal.hs b/src/Data/Binary/Get/Internal.hs
index 9b53831..4cb7f15 100644
--- a/src/Data/Binary/Get/Internal.hs
+++ b/src/Data/Binary/Get/Internal.hs
@@ -381,16 +381,20 @@ readN !n f = ensureN n >> unsafeReadN n f
 -- | Ensure that there are at least @n@ bytes available. If not, the
 -- computation will escape with 'Partial'.
 ensureN :: Int -> Get ()
-ensureN !n0 = C $ \inp ks -> do
-  if B.length inp >= n0
-    then ks inp ()
-    else runCont (go n0) inp ks
-  where -- might look a bit funny, but plays very well with GHC's inliner.
-        -- GHC won't inline recursive functions, so we make ensureN non-recursive
-    go n = C $ \inp ks -> do
-      if B.length inp >= n
-        then ks inp ()
-        else runCont (demandInput >> go n) inp ks
+ensureN !n0 = go n0 []
+  where
+    go !remaining0 bss0 = C $ \inp ks ->
+      let remaining = remaining0 - B.length inp
+          bss = inp : bss0
+      in if remaining <= 0
+        then ks (B.concat $ reverse bss) ()
+        else
+          Partial $ \mbBs -> case mbBs of
+            Just bs -> runCont (go remaining bss) bs ks
+            -- We keep the error message referencing @demandInput@,
+            -- for legacy reasons -- people have been seeing this for
+            -- years.
+            Nothing -> Fail (B.concat $ reverse bss) "demandInput: not enough bytes"
 {-# INLINE ensureN #-}
 
 unsafeReadN :: Int -> (B.ByteString -> a) -> Get a



More information about the ghc-commits mailing list