[commit: ghc] master: Make Data.List.Inits fast (cde3a77)
git at git.haskell.org
git at git.haskell.org
Thu Oct 16 07:45:39 UTC 2014
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/cde3a77f9703966145cae481ee35f52dcca2cf7d/ghc
>---------------------------------------------------------------
commit cde3a77f9703966145cae481ee35f52dcca2cf7d
Author: David Feuer <David.Feuer at gmail.com>
Date: Thu Oct 16 09:42:27 2014 +0200
Make Data.List.Inits fast
Fixes #9345. Use a modified banker's queue to achieve amortized optimal
performance for inits. The previous implementation was extremely slow.
Reviewed By: nomeata, ekmett, austin
Differential Revision: https://phabricator.haskell.org/D329
>---------------------------------------------------------------
cde3a77f9703966145cae481ee35f52dcca2cf7d
libraries/base/Data/OldList.hs | 60 +++++++++++++++++++++++++++++++++++++++---
libraries/base/tests/all.T | 1 +
libraries/base/tests/inits.hs | 28 ++++++++++++++++++++
3 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index 9b6a431..ad2c510 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -208,6 +208,7 @@ module Data.OldList
) where
import Data.Maybe
+import Data.Bits ( (.&.) )
import Data.Char ( isSpace )
import Data.Ord ( comparing )
import Data.Tuple ( fst, snd )
@@ -767,11 +768,16 @@ groupBy eq (x:xs) = (x:ys) : groupBy eq zs
-- > inits "abc" == ["","a","ab","abc"]
--
-- Note that 'inits' has the following strictness property:
+-- @inits (xs ++ _|_) = inits xs ++ _|_@
+--
+-- In particular,
-- @inits _|_ = [] : _|_@
inits :: [a] -> [[a]]
-inits xs = [] : case xs of
- [] -> []
- x : xs' -> map (x :) (inits xs')
+inits = map toListSB . scanl' snocSB emptySB
+{-# NOINLINE inits #-}
+-- We do not allow inits to inline, because it plays havoc with Call Arity
+-- if it fuses with a consumer, and it would generally lead to serious
+-- loss of sharing if allowed to fuse with a producer.
-- | The 'tails' function returns all final segments of the argument,
-- longest first. For example,
@@ -1130,3 +1136,51 @@ unwords [] = ""
unwords [w] = w
unwords (w:ws) = w ++ ' ' : unwords ws
#endif
+
+{- A "SnocBuilder" is a version of Chris Okasaki's banker's queue that supports
+toListSB instead of uncons. In single-threaded use, its performance
+characteristics are similar to John Hughes's functional difference lists, but
+likely somewhat worse. In heavily persistent settings, however, it does much
+better, because it takes advantage of sharing. The banker's queue guarantees
+(amortized) O(1) snoc and O(1) uncons, meaning that we can think of toListSB as
+an O(1) conversion to a list-like structure a constant factor slower than
+normal lists--we pay the O(n) cost incrementally as we consume the list. Using
+functional difference lists, on the other hand, we would have to pay the whole
+cost up front for each output list. -}
+
+{- We store a front list, a rear list, and the length of the queue. Because we
+only snoc onto the queue and never uncons, we know it's time to rotate when the
+length of the queue plus 1 is a power of 2. Note that we rely on the value of
+the length field only for performance. In the unlikely event of overflow, the
+performance will suffer but the semantics will remain correct. -}
+
+data SnocBuilder a = SnocBuilder {-# UNPACK #-} !Word [a] [a]
+
+{- Smart constructor that rotates the builder when lp is one minus a power of
+2. Does not rotate very small builders because doing so is not worth the
+trouble. The lp < 255 test goes first because the power-of-2 test gives awful
+branch prediction for very small n (there are 5 powers of 2 between 1 and
+16). Putting the well-predicted lp < 255 test first avoids branching on the
+power-of-2 test until powers of 2 have become sufficiently rare to be predicted
+well. -}
+
+{-# INLINE sb #-}
+sb :: Word -> [a] -> [a] -> SnocBuilder a
+sb lp f r
+ | lp < 255 || (lp .&. (lp + 1)) /= 0 = SnocBuilder lp f r
+ | otherwise = SnocBuilder lp (f ++ reverse r) []
+
+-- The empty builder
+
+emptySB :: SnocBuilder a
+emptySB = SnocBuilder 0 [] []
+
+-- Add an element to the end of a queue.
+
+snocSB :: SnocBuilder a -> a -> SnocBuilder a
+snocSB (SnocBuilder lp f r) x = sb (lp + 1) f (x:r)
+
+-- Convert a builder to a list
+
+toListSB :: SnocBuilder a -> [a]
+toListSB (SnocBuilder _ f r) = f ++ reverse r
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index 6520b21..f80f542 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -23,6 +23,7 @@ test('readInteger001', normal, compile_and_run, [''])
test('readFixed001', normal, compile_and_run, [''])
test('lex001', normal, compile_and_run, [''])
test('take001', extra_run_opts('1'), compile_and_run, [''])
+test('inits', normal, compile_and_run, [''])
test('genericNegative001', extra_run_opts('-1'), compile_and_run, [''])
test('ix001', normal, compile_and_run, [''])
diff --git a/libraries/base/tests/inits.hs b/libraries/base/tests/inits.hs
new file mode 100644
index 0000000..4474769
--- /dev/null
+++ b/libraries/base/tests/inits.hs
@@ -0,0 +1,28 @@
+{-# LANGUAGE RankNTypes #-}
+module Main (main) where
+
+import Data.List
+
+-- A simple implementation of inits that should be obviously correct.
+{-# NOINLINE initsR #-}
+initsR :: [a] -> [[a]]
+initsR = map reverse . scanl (flip (:)) []
+
+-- The inits implementation added in 7.10 uses a queue rotated around
+-- powers of 2, starting the rotation only at size 255, so we want to check
+-- around powers of 2 and around the switch.
+ranges :: [Int]
+ranges = [0..20] ++ [252..259] ++ [508..515]
+
+simple :: (forall a . [a] -> [[a]]) -> [[[Int]]]
+simple impl = [impl [1..n] | n <- ranges]
+
+-- We want inits (xs ++ undefined) = inits xs ++ undefined
+laziness :: Bool
+laziness = [take (n+1) (inits $ [1..n] ++ undefined) | n <- ranges]
+ == simple inits
+
+main :: IO ()
+main | simple initsR /= simple inits = error "inits failed simple test"
+ | not laziness = error "inits failed laziness test"
+ | otherwise = return ()
More information about the ghc-commits
mailing list