[commit: packages/containers] master, revert-408-bugfix_394: greatly improved restrictBM. bitmapForBin is still buggy though. (b272994)

git at git.haskell.org git at git.haskell.org
Mon Apr 17 21:48:17 UTC 2017


Repository : ssh://git@git.haskell.org/containers

On branches: master,revert-408-bugfix_394
Link       : http://git.haskell.org/packages/containers.git/commitdiff/b27299481fff36cc91b16240e8755cc6ca70ae4b

>---------------------------------------------------------------

commit b27299481fff36cc91b16240e8755cc6ca70ae4b
Author: wren romano <wren at community.haskell.org>
Date:   Wed Feb 15 23:04:17 2017 -0800

    greatly improved restrictBM. bitmapForBin is still buggy though.


>---------------------------------------------------------------

b27299481fff36cc91b16240e8755cc6ca70ae4b
 Data/IntMap/Internal.hs | 96 ++++++++++++++++++++++++-------------------------
 1 file changed, 46 insertions(+), 50 deletions(-)

diff --git a/Data/IntMap/Internal.hs b/Data/IntMap/Internal.hs
index 8390a2a..cd0bb65 100644
--- a/Data/IntMap/Internal.hs
+++ b/Data/IntMap/Internal.hs
@@ -1125,13 +1125,9 @@ restrictKeys t1@(Bin p1 m1 l1 r1) t2@(IntSet.Bin p2 m2 l2 r2)
         | zero p1 m2        = restrictKeys t1 l2
         | otherwise         = restrictKeys t1 r2
 restrictKeys t1@(Bin _ _ _ _) (IntSet.Tip p2 bm2) =
-    case lookupPrefix p2 bm2 t1 of
-    t1'@(Bin p1 m1 _ _) ->
-        -- TODO(wrengr): start with a value for @bits@ based off @minkey@, so `restrictBM` can avoid needing to scan past the known-zero bits for too-small keys.
-        restrictBM t1' p2
-            (bm2 .&. bitmapForBin p1 m1)
-            (IntSet.suffixBitMask + 1)
-    t1' -> t1'
+    -- TODO(wrengr): should we manually inline/unroll 'lookupPrefix'
+    -- and 'restrictBM' here, in order to avoid redundant case analyses?
+    restrictBM bm2 (lookupPrefix p2 t1)
 restrictKeys (Bin _ _ _ _) IntSet.Nil = Nil
 restrictKeys t1@(Tip k1 _) t2
     | k1 `IntSet.member` t2 = t1
@@ -1143,22 +1139,38 @@ type IntSetPrefix = Int
 type IntSetBitMap = Word
 
 -- | Find the sub-tree of @t@ which matches the prefix @kp at .
-lookupPrefix :: IntSetPrefix -> IntSetBitMap -> IntMap a -> IntMap a
-lookupPrefix !kp !bm t@(Bin p m l r)
+lookupPrefix :: IntSetPrefix -> IntMap a -> IntMap a
+lookupPrefix !kp t@(Bin p m l r)
     | m .&. IntSet.suffixBitMask /= 0 =
         if p .&. IntSet.prefixBitMask == kp then t else Nil
     | nomatch kp p m = Nil
-    | zero kp m      = lookupPrefix kp bm l
-    | otherwise      = lookupPrefix kp bm r
-lookupPrefix kp bm t@(Tip kx _)
+    | zero kp m      = lookupPrefix kp l
+    | otherwise      = lookupPrefix kp r
+lookupPrefix kp t@(Tip kx _)
+    | (kx .&. IntSet.prefixBitMask) == kp = t
+    | otherwise = Nil
+lookupPrefix _ Nil = Nil
+
+
+restrictBM :: IntSetBitMap -> IntMap a -> IntMap a
+restrictBM 0 _ = Nil
+restrictBM bm (Bin p m l r) =
+    -- Assuming 'bitmapForBin' actually works correctly...
+    let m'  = intFromNat (natFromInt m `shiftRL` 1)
+        bmL = bitmapForBin p m'
+        bmR = bitmapForBin (p .|. m) m'
+    in bin p m (restrictBM bmL l) (restrictBM bmR r)
+restrictBM bm t@(Tip k _)
     -- TODO(wrengr): need we manually inline 'IntSet.Member' here?
-    | kx `IntSet.member` IntSet.Tip kp bm = t
+    | k `IntSet.member` IntSet.Tip (k .&. IntSet.prefixBitMask) bm = t
     | otherwise = Nil
-lookupPrefix _ _ Nil = Nil
+restrictBM _ Nil = Nil
 
 
+-- TODO(wrengr): this is buggy somehow.
 -- | Return an `IntSet`-bitmap for all keys that could possibly be
--- contained in an `IntMap`-`Bin`.
+-- contained in an `IntMap`-`Bin` with the given prefix and switching
+-- bit.
 bitmapForBin :: Prefix -> Mask -> IntSetBitMap
 bitmapForBin p m =
     largeEnough .&. smallEnough
@@ -1182,39 +1194,6 @@ bitmapForBin p m =
 {-# INLINE bitmapForBin #-}
 
 
--- TODO(wrengr): Right now this is still pretty naive. It essentially
--- unpacks the 'IntSetBitMap' into a tree-representation, and then
--- calls 'lookup' on each element of the set; thus, it is
--- /O(min(m,W) * min(n,W)/. While technically that degenerates to
--- /O(1)/ for a fixed /W/, it's morally equivalent to /O(m * log n)/.
--- Really, we should be able to get this down to /O(n+m)/ just like
--- 'intersection' is. One way to do this would be to restrict @t@
--- on the recursive calls, so that the 'lookup's are cheaper. But
--- we should be able to do even better by avoiding the call to
--- 'lookup' entirely.
---
--- | The initial value passed for the final argument should be
--- @(IntSet.suffixBitMask + 1)@. We don't set that here via
--- worker/wrapper, because this is the worker floated to the
--- top-level.
-restrictBM :: IntMap a -> IntSetPrefix -> IntSetBitMap -> Key -> IntMap a
-restrictBM t !prefix !_ 0 =
-    case lookup prefix t of
-    Nothing -> Nil
-    Just x -> Tip prefix x
-restrictBM t prefix bmask bits =
-    case intFromNat (natFromInt bits `shiftRL` 1) of
-    bits2
-      | bmask .&. (shiftLL 1 bits2 - 1) == 0 ->
-          restrictBM t (prefix + bits2) (shiftRL bmask bits2) bits2
-      | shiftRL bmask bits2 .&. (shiftLL 1 bits2 - 1) == 0 ->
-          restrictBM t prefix bmask bits2
-      | otherwise ->
-          bin prefix bits2
-            (restrictBM t prefix bmask bits2)
-            (restrictBM t (prefix + bits2) (shiftRL bmask bits2) bits2)
-
-
 -- | /O(n+m)/. The intersection with a combining function.
 --
 -- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"
@@ -3169,20 +3148,31 @@ binCheckRight p m l r   = Bin p m l r
 {--------------------------------------------------------------------
   Endian independent bit twiddling
 --------------------------------------------------------------------}
+
+-- | Should this key follow the left subtree of a 'Bin' with switching
+-- bit @m@? N.B., the answer is only valid when @match i p m@ is true.
 zero :: Key -> Mask -> Bool
 zero i m
   = (natFromInt i) .&. (natFromInt m) == 0
 {-# INLINE zero #-}
 
 nomatch,match :: Key -> Prefix -> Mask -> Bool
+
+-- | Does the key @i@ differ from the prefix @p@ before getting to
+-- the switching bit @m@?
 nomatch i p m
   = (mask i m) /= p
 {-# INLINE nomatch #-}
 
+-- | Does the key @i@ match the prefix @p@ (up to but not including
+-- bit @m@)?
 match i p m
   = (mask i m) == p
 {-# INLINE match #-}
 
+
+-- | The prefix of key @i@ up to (but not including) the switching
+-- bit @m at .
 mask :: Key -> Mask -> Prefix
 mask i m
   = maskW (natFromInt i) (natFromInt m)
@@ -3192,16 +3182,21 @@ mask i m
 {--------------------------------------------------------------------
   Big endian operations
 --------------------------------------------------------------------}
+
+-- | The prefix of key @i@ up to (but not including) the switching
+-- bit @m at .
 maskW :: Nat -> Nat -> Prefix
 maskW i m
   = intFromNat (i .&. (complement (m-1) `xor` m))
 {-# INLINE maskW #-}
 
+-- | Does the left switching bit specify a shorter prefix?
 shorter :: Mask -> Mask -> Bool
 shorter m1 m2
   = (natFromInt m1) > (natFromInt m2)
 {-# INLINE shorter #-}
 
+-- | The first switching bit where the two prefixes disagree.
 branchMask :: Prefix -> Prefix -> Mask
 branchMask p1 p2
   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
@@ -3211,8 +3206,9 @@ branchMask p1 p2
   Utilities
 --------------------------------------------------------------------}
 
--- | /O(1)/.  Decompose a map into pieces based on the structure of the underlying
--- tree.  This function is useful for consuming a map in parallel.
+-- | /O(1)/.  Decompose a map into pieces based on the structure
+-- of the underlying tree. This function is useful for consuming a
+-- map in parallel.
 --
 -- No guarantee is made as to the sizes of the pieces; an internal, but
 -- deterministic process determines this.  However, it is guaranteed that the



More information about the ghc-commits mailing list