[Git][ghc/ghc][master] Fix a leak in `transpose`

Marge Bot gitlab at gitlab.haskell.org
Sun Nov 1 13:52:16 UTC 2020



 Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
ce1bb995 by Hécate at 2020-11-01T08:52:08-05:00
Fix a leak in `transpose`

This patch was authored by David Feuer <david.feuer at gmail.com>

- - - - -


4 changed files:

- libraries/base/Data/OldList.hs
- + libraries/base/tests/T18642.hs
- + libraries/base/tests/T18642.stdout
- libraries/base/tests/all.T


Changes:

=====================================
libraries/base/Data/OldList.hs
=====================================
@@ -547,19 +547,57 @@ intercalate xs xss = concat (intersperse xs xss)
 --
 -- >>> transpose [[10,11],[20],[],[30,31,32]]
 -- [[10,20,30],[11,31],[32]]
-transpose               :: [[a]] -> [[a]]
-transpose []             = []
-transpose ([]   : xss)   = transpose xss
-transpose ((x:xs) : xss) = (x : hds) : transpose (xs : tls)
+transpose :: [[a]] -> [[a]]
+transpose [] = []
+transpose ([] : xss) = transpose xss
+transpose ((x : xs) : xss) = combine x hds xs tls
   where
     -- We tie the calculations of heads and tails together
     -- to prevent heads from leaking into tails and vice versa.
     -- unzip makes the selector thunk arrangements we need to
     -- ensure everything gets cleaned up properly.
-    (hds, tls) = unzip [(hd, tl) | (hd:tl) <- xss]
+    (hds, tls) = unzip [(hd, tl) | hd : tl <- xss]
+    combine y h ys t = (y:h) : transpose (ys:t)
+    {-# NOINLINE combine #-}
+  {- Implementation note:
+  If the bottom part of the function was written as such:
+
+  ```
+  transpose ((x : xs) : xss) = (x:hds) : transpose (xs:tls)
+  where
+    (hds,tls) = hdstls
+    hdstls = unzip [(hd, tl) | hd : tl <- xss]
+    {-# NOINLINE hdstls #-}
+  ```
+  Here are the steps that would take place:
+
+  1. We allocate a thunk, `hdstls`, representing the result of unzipping.
+  2. We allocate selector thunks, `hds` and `tls`, that deconstruct `hdstls`.
+  3. Install `hds` as the tail of the result head and pass `xs:tls` to
+     the recursive call in the result tail.
+
+  Once optimised, this code would amount to:
+
+  ```
+  transpose ((x : xs) : xss) = (x:hds) : (let tls = snd hdstls in transpose (xs:tls))
+  where
+    hds = fst hdstls
+    hdstls = unzip [(hd, tl) | hd : tl <- xss]
+    {-# NOINLINE hdstls #-}
+  ```
+
+  In particular, GHC does not produce the `tls` selector thunk immediately;
+  rather, it waits to do so until the tail of the result is actually demanded.
+  So when `hds` is demanded, that does not resolve `snd hdstls`; the tail of the
+  result keeps `hdstls` alive.
+
+  By writing `combine` and making it NOINLINE, we prevent GHC from delaying
+  the selector thunk allocation, requiring that `hds` and `tls` are actually
+  allocated to be passed to `combine`.
+  -}
 
 
--- | The 'partition' function takes a predicate a list and returns
+-- | The 'partition' function takes a predicate and a list, and returns
 -- the pair of lists of elements which do and do not satisfy the
 -- predicate, respectively; i.e.,
 --


=====================================
libraries/base/tests/T18642.hs
=====================================
@@ -0,0 +1,27 @@
+{-# LANGUAGE NumericUnderscores #-}
+module Main where
+
+import Data.List (transpose, foldl')
+import GHC.Stats
+import System.Exit
+
+thingy :: [[[Int]]]
+thingy = [ [[1],[2]], [[1..10^7], [3]]]
+
+thingy2 :: [[[Int]]]
+thingy2 = [ [[1],[2]], [[3], [2..10^7+1]]]
+
+main = do
+  htr : ttr <- pure $ transpose thingy
+  print $ even $ foldl' (+) 0 . head . tail $ htr
+
+  htr2 : ttr2 <- pure $ transpose thingy2
+  print $ even $ foldl' (+) 0 . head . tail . head $ ttr2
+
+  maxLiveBytes <- max_live_bytes <$> getRTSStats
+  if (maxLiveBytes) < 200_000
+  then putStrLn "Test is running in the expected residency limit"
+  else do
+    putStrLn $ "Test is running with " <> show maxLiveBytes <> " bytes of residency!"
+    exitFailure
+


=====================================
libraries/base/tests/T18642.stdout
=====================================
@@ -0,0 +1,3 @@
+True
+True
+Test is running in the expected residency limit


=====================================
libraries/base/tests/all.T
=====================================
@@ -260,3 +260,4 @@ test('T16943b', normal, compile_and_run, [''])
 test('T17499', [collect_stats('bytes allocated',5)], compile_and_run, ['-O -w'])
 test('T16643', normal, compile_and_run, [''])
 test('clamp', normal, compile_and_run, [''])
+test('T18642', extra_run_opts('+RTS -T -RTS'), compile_and_run, ['-O2'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ce1bb9959e2465db1c3880f3c532ae7e1be39b41

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ce1bb9959e2465db1c3880f3c532ae7e1be39b41
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201101/bfab2211/attachment-0001.html>


More information about the ghc-commits mailing list