[commit: packages/containers] ghc-head: Improve {Map, Set}.fromDistinctAscList. (b518422)
git at git.haskell.org
git at git.haskell.org
Fri Aug 30 13:33:46 CEST 2013
Repository : ssh://git@git.haskell.org/containers
On branch : ghc-head
Link : http://git.haskell.org/?p=packages/containers.git;a=commit;h=b51842247a149368f963fa1ab242d8eccfff63b6
>---------------------------------------------------------------
commit b51842247a149368f963fa1ab242d8eccfff63b6
Author: Milan Straka <fox at ucw.cz>
Date: Thu Aug 30 17:28:49 2012 +0200
Improve {Map,Set}.fromDistinctAscList.
Benchmarks show 42% improvement for Map and 39% improvement for Set.
The new implementation builds the map on the fly, without needing to
know the number of elements in the list. It proceeds by building a map
of size 2^1-1, 2^2-1, 2^3-1, .... When the input list is empty, there is
at most log N trees left. These are joined from the smallest to the
largest.
>---------------------------------------------------------------
b51842247a149368f963fa1ab242d8eccfff63b6
Data/Map/Base.hs | 34 ++++++++++++++++++----------------
Data/Map/Strict.hs | 38 ++++++++++++++++++++------------------
Data/Set/Base.hs | 35 +++++++++++++++++++----------------
3 files changed, 57 insertions(+), 50 deletions(-)
diff --git a/Data/Map/Base.hs b/Data/Map/Base.hs
index dc71e9f..61bcfe6 100644
--- a/Data/Map/Base.hs
+++ b/Data/Map/Base.hs
@@ -263,6 +263,7 @@ module Data.Map.Base (
import Prelude hiding (lookup,map,filter,foldr,foldl,null)
import qualified Data.Set.Base as Set
import Data.StrictPair
+import Data.Bits (shiftL, shiftR)
import Data.Monoid (Monoid(..))
import Control.Applicative (Applicative(..), (<$>))
import Data.Traversable (Traversable(traverse))
@@ -2086,24 +2087,25 @@ fromAscListWithKey f xs
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
fromDistinctAscList :: [(k,a)] -> Map k a
-fromDistinctAscList xs
- = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
- -- 1) use continuations so that we use heap space instead of stack space.
- -- 2) special case for n==5 to create bushier trees.
- create c 0 xs' = c Tip xs'
- create c 5 xs' = case xs' of
- ((k1,x1):(k2,x2):(k3,x3):(k4,x4):(k5,x5):xx)
- -> c (bin k4 x4 (bin k2 x2 (singleton k1 x1) (singleton k3 x3)) (singleton k5 x5)) xx
- _ -> error "fromDistinctAscList create"
- create c n xs' = seq nr $ create (createR nr c) nl xs'
- where nl = n `div` 2
- nr = n - nl - 1
-
- createR n c l ((k,x):ys) = create (createB l k x c) n ys
- createR _ _ _ [] = error "fromDistinctAscList createR []"
- createB l k x c r zs = c (bin k x l r) zs
+ STRICT_1_OF_3(go)
+ go _ t [] = t
+ go s l ((kx, x) : xs) = case create s xs of
+ (r, ys) -> go (s `shiftL` 1) (join kx x l r) ys
+
+ STRICT_1_OF_2(create)
+ create _ [] = (Tip, [])
+ create s xs@(x' : xs')
+ | s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip, xs')
+ | otherwise = case create (s `shiftR` 1) xs of
+ res@(_, []) -> res
+ (l, (ky, y):ys) -> case create (s `shiftR` 1) ys of
+ (r, zs) -> (join ky y l r, zs)
{--------------------------------------------------------------------
diff --git a/Data/Map/Strict.hs b/Data/Map/Strict.hs
index de1a366..4d8b81b 100644
--- a/Data/Map/Strict.hs
+++ b/Data/Map/Strict.hs
@@ -267,6 +267,7 @@ import Data.Map.Base hiding
)
import qualified Data.Set.Base as Set
import Data.StrictPair
+import Data.Bits (shiftL, shiftR)
-- Use macros to define strictness of functions. STRICT_x_OF_y
-- denotes an y-ary function strict in the x-th parameter. Similarly
@@ -274,6 +275,8 @@ import Data.StrictPair
-- y-th parameter. We do not use BangPatterns, because they are not
-- in any standard and we want the compilers to be compiled by as many
-- compilers as possible.
+#define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
+#define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
#define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
#define STRICT_1_2_OF_3(fn) fn arg1 arg2 _ | arg1 `seq` arg2 `seq` False = undefined
#define STRICT_2_3_OF_4(fn) fn _ arg1 arg2 _ | arg1 `seq` arg2 `seq` False = undefined
@@ -1119,23 +1122,22 @@ fromAscListWithKey f xs
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
fromDistinctAscList :: [(k,a)] -> Map k a
-fromDistinctAscList xs
- = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
- -- 1) use continuations so that we use heap space instead of stack space.
- -- 2) special case for n==5 to create bushier trees.
- create c 0 xs' = c Tip xs'
- create c 5 xs' = case xs' of
- ((k1,x1):(k2,x2):(k3,x3):(k4,x4):(k5,x5):xx)
- -> x1 `seq` x2 `seq` x3 `seq` x4 `seq` x5 `seq`
- c (bin k4 x4 (bin k2 x2 (singleton k1 x1) (singleton k3 x3))
- (singleton k5 x5)) xx
- _ -> error "fromDistinctAscList create"
- create c n xs' = seq nr $ create (createR nr c) nl xs'
- where nl = n `div` 2
- nr = n - nl - 1
-
- createR n c l ((k,x):ys) = x `seq` create (createB l k x c) n ys
- createR _ _ _ [] = error "fromDistinctAscList createR []"
- createB l k x c r zs = x `seq` c (bin k x l r) zs
+ STRICT_1_OF_3(go)
+ go _ t [] = t
+ go s l ((kx, x) : xs) = case create s xs of
+ (r, ys) -> x `seq` go (s `shiftL` 1) (join kx x l r) ys
+
+ STRICT_1_OF_2(create)
+ create _ [] = (Tip, [])
+ create s xs@(x' : xs')
+ | s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip, xs')
+ | otherwise = case create (s `shiftR` 1) xs of
+ res@(_, []) -> res
+ (l, (ky, y):ys) -> case create (s `shiftR` 1) ys of
+ (r, zs) -> y `seq` (join ky y l r, zs)
diff --git a/Data/Set/Base.hs b/Data/Set/Base.hs
index 600f3d2..f324c8e 100644
--- a/Data/Set/Base.hs
+++ b/Data/Set/Base.hs
@@ -177,6 +177,7 @@ module Data.Set.Base (
import Prelude hiding (filter,foldl,foldr,null,map)
import qualified Data.List as List
+import Data.Bits (shiftL, shiftR)
import Data.Monoid (Monoid(..))
import qualified Data.Foldable as Foldable
import Data.Typeable
@@ -815,24 +816,26 @@ fromAscList xs
-- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
-- /The precondition (input list is strictly ascending) is not checked./
+
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
fromDistinctAscList :: [a] -> Set a
-fromDistinctAscList xs
- = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList (x0 : xs0) = go (1::Int) (Bin 1 x0 Tip Tip) xs0
where
- -- 1) use continutations so that we use heap space instead of stack space.
- -- 2) special case for n==5 to create bushier trees.
- create c 0 xs' = c Tip xs'
- create c 5 xs' = case xs' of
- (x1:x2:x3:x4:x5:xx)
- -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
- _ -> error "fromDistinctAscList create 5"
- create c n xs' = seq nr $ create (createR nr c) nl xs'
- where nl = n `div` 2
- nr = n - nl - 1
-
- createR n c l (x:ys) = create (createB l x c) n ys
- createR _ _ _ [] = error "fromDistinctAscList createR []"
- createB l x c r zs = c (bin x l r) zs
+ STRICT_1_OF_3(go)
+ go _ t [] = t
+ go s l (x : xs) = case create s xs of
+ (r, ys) -> go (s `shiftL` 1) (join x l r) ys
+
+ STRICT_1_OF_2(create)
+ create _ [] = (Tip, [])
+ create s xs@(x : xs')
+ | s == 1 = (Bin 1 x Tip Tip, xs')
+ | otherwise = case create (s `shiftR` 1) xs of
+ res@(_, []) -> res
+ (l, y:ys) -> case create (s `shiftR` 1) ys of
+ (r, zs) -> (join y l r, zs)
{--------------------------------------------------------------------
Eq converts the set to a list. In a lazy setting, this
More information about the ghc-commits
mailing list