[commit: packages/containers] master: Revert "Optimize IntMap's withoutKeys" (6dcb45c)

git at git.haskell.org git at git.haskell.org
Mon Apr 17 21:48:38 UTC 2017


Repository : ssh://git@git.haskell.org/containers

On branch  : master
Link       : http://git.haskell.org/packages/containers.git/commitdiff/6dcb45c122674df71560d88d214eca23b14431de

>---------------------------------------------------------------

commit 6dcb45c122674df71560d88d214eca23b14431de
Author: wren romano <wren at community.haskell.org>
Date:   Mon Feb 20 11:12:14 2017 -0800

    Revert "Optimize IntMap's withoutKeys"


>---------------------------------------------------------------

6dcb45c122674df71560d88d214eca23b14431de
 Data/IntMap/Internal.hs | 88 ++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 52 deletions(-)

diff --git a/Data/IntMap/Internal.hs b/Data/IntMap/Internal.hs
index 620ffa8..38468f6 100644
--- a/Data/IntMap/Internal.hs
+++ b/Data/IntMap/Internal.hs
@@ -354,16 +354,6 @@ data IntMap a = Bin {-# UNPACK #-} !Prefix
 type Prefix = Int
 type Mask   = Int
 
-
--- Some stuff from "Data.IntSet.Internal", for 'restrictKeys' and
--- 'withoutKeys' to use.
-type IntSetPrefix = Int
-type IntSetBitMap = Word
-
-bitmapOf :: Int -> IntSetBitMap
-bitmapOf x = shiftLL 1 (x .&. IntSet.suffixBitMask)
-{-# INLINE bitmapOf #-}
-
 {--------------------------------------------------------------------
   Operators
 --------------------------------------------------------------------}
@@ -1046,9 +1036,7 @@ differenceWithKey :: (Key -> a -> b -> Maybe a) -> IntMap a -> IntMap b -> IntMa
 differenceWithKey f m1 m2
   = mergeWithKey f id (const Nil) m1 m2
 
-
--- TODO(wrengr): re-verify that asymptotic bound
--- | /O(n+m)/. Remove all the keys in a given set from a map.
+-- | Remove all the keys in a given set from a map.
 --
 -- @
 -- m `withoutKeys` s = 'filterWithKey' (\k _ -> k `'IntSet.notMember'` s) m
@@ -1070,14 +1058,8 @@ withoutKeys t1@(Bin p1 m1 l1 r1) t2@(IntSet.Bin p2 m2 l2 r2)
         | nomatch p1 p2 m2  = t1
         | zero p1 m2        = withoutKeys t1 l2
         | otherwise         = withoutKeys t1 r2
-withoutKeys t1@(Bin p1 m1 _ _) (IntSet.Tip p2 bm2) =
-    let minbit = bitmapOf p1
-        lt_minbit = minbit - 1
-        maxbit = bitmapOf (p1 .|. (m1 .|. (m1 - 1)))
-        gt_maxbit = maxbit `xor` complement (maxbit - 1)
-    -- TODO(wrengr): should we manually inline/unroll 'updatePrefix'
-    -- and 'withoutBM' here, in order to avoid redundant case analyses?
-    in updatePrefix p2 t1 $ withoutBM (bm2 .|. lt_minbit .|. gt_maxbit)
+withoutKeys t1@(Bin _ _ _ _) (IntSet.Tip p2 bm2) =
+    withoutBM t1 p2 bm2 (IntSet.suffixBitMask + 1)
 withoutKeys t1@(Bin _ _ _ _) IntSet.Nil = t1
 withoutKeys t1@(Tip k1 _) t2
     | k1 `IntSet.member` t2 = Nil
@@ -1085,33 +1067,30 @@ withoutKeys t1@(Tip k1 _) t2
 withoutKeys Nil _ = Nil
 
 
-updatePrefix
-    :: IntSetPrefix -> IntMap a -> (IntMap a -> IntMap a) -> IntMap a
-updatePrefix !kp t@(Bin p m l r) f
-    | m .&. IntSet.suffixBitMask /= 0 =
-        if p .&. IntSet.prefixBitMask == kp then f t else t
-    | nomatch kp p m = t
-    | zero kp m      = binCheckLeft p m (updatePrefix kp l f) r
-    | otherwise      = binCheckRight p m l (updatePrefix kp r f)
-updatePrefix kp t@(Tip kx _) f
-    | kx .&. IntSet.prefixBitMask == kp = f t
-    | otherwise = t
-updatePrefix _ Nil _ = Nil
-
-
-withoutBM :: IntSetBitMap -> IntMap a -> IntMap a
-withoutBM 0 t = t
-withoutBM bm (Bin p m l r) =
-    let leftBits = bitmapOf (p .|. m) - 1
-        bmL = bm .&. leftBits
-        bmR = bm `xor` bmL -- = (bm .&. complement leftBits)
-    in  bin p m (withoutBM bmL l) (withoutBM bmR r)
-withoutBM bm t@(Tip k _)
-    -- TODO(wrengr): need we manually inline 'IntSet.Member' here?
-    | k `IntSet.member` IntSet.Tip (k .&. IntSet.prefixBitMask) bm = Nil
-    | otherwise = t
-withoutBM _ Nil = Nil
-
+-- TODO(wrengr): Right now this is still pretty naive. It essentially
+-- unpacks the 'IntSetBitMap' into a tree-representation, and then
+-- calls 'delete' on each element of the set; thus, it is
+-- /O(min(m,W) * min(n,W)/. While technically that degenerates to
+-- /O(1)/ for a fixed /W/, it's morally equivalent to /O(m * log n)/.
+-- Really, we should be able to get this down to /O(n+m)/ just like
+-- 'difference' is. One way to do this would be to restrict @t@
+-- on the recursive calls, so that the 'lookup's are cheaper. But
+-- we should be able to do even better by avoiding the call to
+-- 'lookup' entirely.
+withoutBM :: IntMap a -> IntSetPrefix -> IntSetBitMap -> Key -> IntMap a
+withoutBM t !prefix !_ 0 = delete prefix t
+withoutBM t prefix bmask bits =
+    case intFromNat (natFromInt bits `shiftRL` 1) of
+    bits2
+      | bmask .&. (shiftLL 1 bits2 - 1) == 0 ->
+          withoutBM t (prefix + bits2) (shiftRL bmask bits2) bits2
+      | shiftRL bmask bits2 .&. (shiftLL 1 bits2 - 1) == 0 ->
+          withoutBM t prefix bmask bits2
+      | otherwise ->
+          -- withoutKeys t (bin prefix bits2 _ _)
+          withoutBM
+            (withoutBM t (prefix + bits2) (shiftRL bmask bits2) bits2)
+            prefix bmask bits2
 
 {--------------------------------------------------------------------
   Intersection
@@ -1124,8 +1103,6 @@ intersection :: IntMap a -> IntMap b -> IntMap a
 intersection m1 m2
   = mergeWithKey' bin const (const Nil) (const Nil) m1 m2
 
-
--- TODO(wrengr): re-verify that asymptotic bound
 -- | /O(n+m)/. The restriction of a map to the keys in a set.
 --
 -- @
@@ -1163,8 +1140,10 @@ restrictKeys t1@(Tip k1 _) t2
 restrictKeys Nil _ = Nil
 
 
--- | /O(min(n,W))/. Restrict to the sub-map with all keys matching
--- a key prefix.
+type IntSetPrefix = Int
+type IntSetBitMap = Word
+
+-- | Find the sub-tree of @t@ which matches the prefix @kp at .
 lookupPrefix :: IntSetPrefix -> IntMap a -> IntMap a
 lookupPrefix !kp t@(Bin p m l r)
     | m .&. IntSet.suffixBitMask /= 0 =
@@ -1192,6 +1171,11 @@ restrictBM bm t@(Tip k _)
 restrictBM _ Nil = Nil
 
 
+bitmapOf :: Int -> IntSetBitMap
+bitmapOf x = shiftLL 1 (x .&. IntSet.suffixBitMask)
+{-# INLINE bitmapOf #-}
+
+
 -- | /O(n+m)/. The intersection with a combining function.
 --
 -- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"



More information about the ghc-commits mailing list