[commit: packages/containers] ghc-head: Improve {Map, Set}.fromDistinctAscList. (b518422)

git at git.haskell.org git at git.haskell.org
Fri Aug 30 13:33:46 CEST 2013


Repository : ssh://git@git.haskell.org/containers

On branch  : ghc-head
Link       : http://git.haskell.org/?p=packages/containers.git;a=commit;h=b51842247a149368f963fa1ab242d8eccfff63b6

>---------------------------------------------------------------

commit b51842247a149368f963fa1ab242d8eccfff63b6
Author: Milan Straka <fox at ucw.cz>
Date:   Thu Aug 30 17:28:49 2012 +0200

    Improve {Map,Set}.fromDistinctAscList.
    
    Benchmarks show 42% improvement for Map and 39% improvement for Set.
    
    The new implementation builds the map on the fly, without needing to
    know the number of elements in the list. It proceeds by building a map
    of size 2^1-1, 2^2-1, 2^3-1, .... When the input list is empty, there is
    at most log N trees left. These are joined from the smallest to the
    largest.


>---------------------------------------------------------------

b51842247a149368f963fa1ab242d8eccfff63b6
 Data/Map/Base.hs   |   34 ++++++++++++++++++----------------
 Data/Map/Strict.hs |   38 ++++++++++++++++++++------------------
 Data/Set/Base.hs   |   35 +++++++++++++++++++----------------
 3 files changed, 57 insertions(+), 50 deletions(-)

diff --git a/Data/Map/Base.hs b/Data/Map/Base.hs
index dc71e9f..61bcfe6 100644
--- a/Data/Map/Base.hs
+++ b/Data/Map/Base.hs
@@ -263,6 +263,7 @@ module Data.Map.Base (
 import Prelude hiding (lookup,map,filter,foldr,foldl,null)
 import qualified Data.Set.Base as Set
 import Data.StrictPair
+import Data.Bits (shiftL, shiftR)
 import Data.Monoid (Monoid(..))
 import Control.Applicative (Applicative(..), (<$>))
 import Data.Traversable (Traversable(traverse))
@@ -2086,24 +2087,25 @@ fromAscListWithKey f xs
 -- > valid (fromDistinctAscList [(3,"b"), (5,"a")])          == True
 -- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
 
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
 fromDistinctAscList :: [(k,a)] -> Map k a
-fromDistinctAscList xs
-  = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
   where
-    -- 1) use continuations so that we use heap space instead of stack space.
-    -- 2) special case for n==5 to create bushier trees.
-    create c 0 xs' = c Tip xs'
-    create c 5 xs' = case xs' of
-                       ((k1,x1):(k2,x2):(k3,x3):(k4,x4):(k5,x5):xx)
-                            -> c (bin k4 x4 (bin k2 x2 (singleton k1 x1) (singleton k3 x3)) (singleton k5 x5)) xx
-                       _ -> error "fromDistinctAscList create"
-    create c n xs' = seq nr $ create (createR nr c) nl xs'
-      where nl = n `div` 2
-            nr = n - nl - 1
-
-    createR n c l ((k,x):ys) = create (createB l k x c) n ys
-    createR _ _ _ []         = error "fromDistinctAscList createR []"
-    createB l k x c r zs     = c (bin k x l r) zs
+    STRICT_1_OF_3(go)
+    go _ t [] = t
+    go s l ((kx, x) : xs) = case create s xs of
+                              (r, ys) -> go (s `shiftL` 1) (join kx x l r) ys
+
+    STRICT_1_OF_2(create)
+    create _ [] = (Tip, [])
+    create s xs@(x' : xs')
+      | s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip, xs')
+      | otherwise = case create (s `shiftR` 1) xs of
+                      res@(_, []) -> res
+                      (l, (ky, y):ys) -> case create (s `shiftR` 1) ys of
+                        (r, zs) -> (join ky y l r, zs)
 
 
 {--------------------------------------------------------------------
diff --git a/Data/Map/Strict.hs b/Data/Map/Strict.hs
index de1a366..4d8b81b 100644
--- a/Data/Map/Strict.hs
+++ b/Data/Map/Strict.hs
@@ -267,6 +267,7 @@ import Data.Map.Base hiding
     )
 import qualified Data.Set.Base as Set
 import Data.StrictPair
+import Data.Bits (shiftL, shiftR)
 
 -- Use macros to define strictness of functions.  STRICT_x_OF_y
 -- denotes an y-ary function strict in the x-th parameter. Similarly
@@ -274,6 +275,8 @@ import Data.StrictPair
 -- y-th parameter.  We do not use BangPatterns, because they are not
 -- in any standard and we want the compilers to be compiled by as many
 -- compilers as possible.
+#define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
+#define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
 #define STRICT_1_2_OF_3(fn) fn arg1 arg2 _ | arg1 `seq` arg2 `seq` False = undefined
 #define STRICT_2_3_OF_4(fn) fn _ arg1 arg2 _ | arg1 `seq` arg2 `seq` False = undefined
@@ -1119,23 +1122,22 @@ fromAscListWithKey f xs
 -- > valid (fromDistinctAscList [(3,"b"), (5,"a")])          == True
 -- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
 
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
 fromDistinctAscList :: [(k,a)] -> Map k a
-fromDistinctAscList xs
-  = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
   where
-    -- 1) use continuations so that we use heap space instead of stack space.
-    -- 2) special case for n==5 to create bushier trees.
-    create c 0 xs' = c Tip xs'
-    create c 5 xs' = case xs' of
-                       ((k1,x1):(k2,x2):(k3,x3):(k4,x4):(k5,x5):xx)
-                            -> x1 `seq` x2 `seq` x3 `seq` x4 `seq` x5 `seq`
-                               c (bin k4 x4 (bin k2 x2 (singleton k1 x1) (singleton k3 x3))
-                                  (singleton k5 x5)) xx
-                       _ -> error "fromDistinctAscList create"
-    create c n xs' = seq nr $ create (createR nr c) nl xs'
-      where nl = n `div` 2
-            nr = n - nl - 1
-
-    createR n c l ((k,x):ys) = x `seq` create (createB l k x c) n ys
-    createR _ _ _ []         = error "fromDistinctAscList createR []"
-    createB l k x c r zs     = x `seq` c (bin k x l r) zs
+    STRICT_1_OF_3(go)
+    go _ t [] = t
+    go s l ((kx, x) : xs) = case create s xs of
+                              (r, ys) -> x `seq` go (s `shiftL` 1) (join kx x l r) ys
+
+    STRICT_1_OF_2(create)
+    create _ [] = (Tip, [])
+    create s xs@(x' : xs')
+      | s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip, xs')
+      | otherwise = case create (s `shiftR` 1) xs of
+                      res@(_, []) -> res
+                      (l, (ky, y):ys) -> case create (s `shiftR` 1) ys of
+                        (r, zs) -> y `seq` (join ky y l r, zs)
diff --git a/Data/Set/Base.hs b/Data/Set/Base.hs
index 600f3d2..f324c8e 100644
--- a/Data/Set/Base.hs
+++ b/Data/Set/Base.hs
@@ -177,6 +177,7 @@ module Data.Set.Base (
 
 import Prelude hiding (filter,foldl,foldr,null,map)
 import qualified Data.List as List
+import Data.Bits (shiftL, shiftR)
 import Data.Monoid (Monoid(..))
 import qualified Data.Foldable as Foldable
 import Data.Typeable
@@ -815,24 +816,26 @@ fromAscList xs
 
 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
 -- /The precondition (input list is strictly ascending) is not checked./
+
+-- For some reason, when 'singleton' is used in fromDistinctAscList or in
+-- create, it is not inlined, so we inline it manually.
 fromDistinctAscList :: [a] -> Set a
-fromDistinctAscList xs
-  = create const (length xs) xs
+fromDistinctAscList [] = Tip
+fromDistinctAscList (x0 : xs0) = go (1::Int) (Bin 1 x0 Tip Tip) xs0
   where
-    -- 1) use continutations so that we use heap space instead of stack space.
-    -- 2) special case for n==5 to create bushier trees.
-    create c 0 xs' = c Tip xs'
-    create c 5 xs' = case xs' of
-                       (x1:x2:x3:x4:x5:xx)
-                            -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
-                       _ -> error "fromDistinctAscList create 5"
-    create c n xs' = seq nr $ create (createR nr c) nl xs'
-      where nl = n `div` 2
-            nr = n - nl - 1
-
-    createR n c l (x:ys) = create (createB l x c) n ys
-    createR _ _ _ []     = error "fromDistinctAscList createR []"
-    createB l x c r zs   = c (bin x l r) zs
+    STRICT_1_OF_3(go)
+    go _ t [] = t
+    go s l (x : xs) = case create s xs of
+                        (r, ys) -> go (s `shiftL` 1) (join x l r) ys
+
+    STRICT_1_OF_2(create)
+    create _ [] = (Tip, [])
+    create s xs@(x : xs')
+      | s == 1 = (Bin 1 x Tip Tip, xs')
+      | otherwise = case create (s `shiftR` 1) xs of
+                      res@(_, []) -> res
+                      (l, y:ys) -> case create (s `shiftR` 1) ys of
+                        (r, zs) -> (join y l r, zs)
 
 {--------------------------------------------------------------------
   Eq converts the set to a list. In a lazy setting, this





More information about the ghc-commits mailing list