[commit: packages/containers] master: Fix insert worker/wrapper issues (#416) (1fd160a)
git at git.haskell.org
git at git.haskell.org
Mon Apr 17 21:48:56 UTC 2017
Repository : ssh://git@git.haskell.org/containers
On branch : master
Link : http://git.haskell.org/packages/containers.git/commitdiff/1fd160a481a7768f287e9f84d8525a354fa75092
>---------------------------------------------------------------
commit 1fd160a481a7768f287e9f84d8525a354fa75092
Author: David Feuer <David.Feuer at gmail.com>
Date: Wed Feb 22 16:56:49 2017 -0500
Fix insert worker/wrapper issues (#416)
The new pointer equality version of `insert` in `Data.Map` led to
a severe regression in the `last-piece` benchmark of `nofib`.
It turned out that worker/wrapper was doing absolutely horrible
things to `insert`, breaking the pointer equality tests and
also leading to completely unnecessary allocation. This commit
adds horrible hacks that seem to prevent this from happening.
>---------------------------------------------------------------
1fd160a481a7768f287e9f84d8525a354fa75092
Data/Map/Internal.hs | 49 +++++++++++++++++++++++++++++++++++++------------
Data/Set/Internal.hs | 24 +++++++++++++++---------
2 files changed, 52 insertions(+), 21 deletions(-)
diff --git a/Data/Map/Internal.hs b/Data/Map/Internal.hs
index d953722..55f8544 100644
--- a/Data/Map/Internal.hs
+++ b/Data/Map/Internal.hs
@@ -388,7 +388,7 @@ import Utils.Containers.Internal.BitUtil (wordSize)
#endif
#if __GLASGOW_HASKELL__
-import GHC.Exts (build)
+import GHC.Exts (build, lazy)
#if !MIN_VERSION_base(4,8,0)
import Data.Functor ((<$))
#endif
@@ -755,32 +755,51 @@ singleton k x = Bin 1 k x Tip Tip
-- > insert 5 'x' empty == singleton 5 'x'
-- See Note: Type of local 'go' function
+-- See Note: Avoiding worker/wrapper
insert :: Ord k => k -> a -> Map k a -> Map k a
-insert = go
+insert kx0 = go kx0 kx0
where
-- Unlike insertR, we only get sharing here
-- when the inserted value is at the same address
- -- as the present value. We try anyway. If we decide
- -- not to, then Data.Map.Strict should probably
- -- get its own union implementation.
- go :: Ord k => k -> a -> Map k a -> Map k a
- go !kx x Tip = singleton kx x
- go !kx x t@(Bin sz ky y l r) =
+ -- as the present value. We try anyway; this condition
+ -- seems particularly likely to occur in 'union'.
+ go :: Ord k => k -> k -> a -> Map k a -> Map k a
+ go orig !kx x Tip = singleton (lazy orig) x
+ go orig !kx x t@(Bin sz ky y l r) =
case compare kx ky of
LT | l' `ptrEq` l -> t
| otherwise -> balanceL ky y l' r
- where !l' = go kx x l
+ where !l' = go orig kx x l
GT | r' `ptrEq` r -> t
| otherwise -> balanceR ky y l r'
- where !r' = go kx x r
- EQ | kx `ptrEq` ky && x `ptrEq` y -> t
- | otherwise -> Bin sz kx x l r
+ where !r' = go orig kx x r
+ EQ | x `ptrEq` y && (lazy orig `seq` (orig `ptrEq` ky)) -> t
+ | otherwise -> Bin sz (lazy orig) x l r
#if __GLASGOW_HASKELL__
{-# INLINABLE insert #-}
#else
{-# INLINE insert #-}
#endif
+#ifndef __GLASGOW_HASKELL__
+lazy :: a -> a
+lazy a = a
+#endif
+
+-- [Note: Avoiding worker/wrapper]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- 'insert' has to go to great lengths to get pointer equality right and
+-- to prevent unnecessary allocation. The trouble is that GHC *really* wants
+-- to unbox the key and throw away the boxed one. This is bad for us, because
+-- we want to compare the pointer of the box we are given to the one already
+-- present if they compare EQ. It's also bad for us because it leads to the
+-- key being *reboxed* if it's actually stored in the map. Ugh! So we pass the
+-- 'go' function *two copies* of the key we're given. One of them we use for
+-- comparisons; the other we keep in our pocket. To prevent worker/wrapper from
+-- messing with the copy in our pocket, we sprinkle about calls to the magical
+-- function 'lazy'. This is all horrible, but it seems to work okay.
+
+
-- Insert a new key and value in the map if it is not already present.
-- Used by `union`.
@@ -1832,6 +1851,12 @@ unionWithKey f (Bin _ k1 x1 l1 r1) t2 = case splitLookup k1 t2 of
Difference
--------------------------------------------------------------------}
+-- We don't currently attempt to use any pointer equality tricks for
+-- 'difference'. To do so, we'd have to match on the first argument
+-- and split the second. Unfortunately, the proof of the time bound
+-- relies on doing it the way we do, and it's not clear whether that
+-- bound holds the other way.
+
-- | /O(m*log(n\/m + 1)), m <= n/. Difference of two maps.
-- Return elements of the first map not existing in the second map.
--
diff --git a/Data/Set/Internal.hs b/Data/Set/Internal.hs
index c2407f1..d0d4394 100644
--- a/Data/Set/Internal.hs
+++ b/Data/Set/Internal.hs
@@ -239,7 +239,7 @@ import Utils.Containers.Internal.StrictPair
import Utils.Containers.Internal.PtrEquality
#if __GLASGOW_HASKELL__
-import GHC.Exts ( build )
+import GHC.Exts ( build, lazy )
#if __GLASGOW_HASKELL__ >= 708
import qualified GHC.Exts as GHCExts
#endif
@@ -505,26 +505,32 @@ singleton x = Bin 1 x Tip Tip
-- it is replaced with the new value.
-- See Note: Type of local 'go' function
+-- See Note: Avoiding worker/wrapper (in Data.Map.Internal)
insert :: Ord a => a -> Set a -> Set a
-insert = go
+insert x0 = go x0 x0
where
- go :: Ord a => a -> Set a -> Set a
- go !x Tip = singleton x
- go !x t@(Bin sz y l r) = case compare x y of
+ go :: Ord a => a -> a -> Set a -> Set a
+ go orig !x Tip = singleton (lazy orig)
+ go orig !x t@(Bin sz y l r) = case compare x y of
LT | l' `ptrEq` l -> t
| otherwise -> balanceL y l' r
- where !l' = go x l
+ where !l' = go orig x l
GT | r' `ptrEq` r -> t
| otherwise -> balanceR y l r'
- where !r' = go x r
- EQ | x `ptrEq` y -> t
- | otherwise -> Bin sz x l r
+ where !r' = go orig x r
+ EQ | lazy orig `seq` (orig `ptrEq` y) -> t
+ | otherwise -> Bin sz (lazy orig) l r
#if __GLASGOW_HASKELL__
{-# INLINABLE insert #-}
#else
{-# INLINE insert #-}
#endif
+#ifndef __GLASGOW_HASKELL__
+lazy :: a -> a
+lazy a = a
+#endif
+
-- Insert an element to the set only if it is not in the set.
-- Used by `union`.
More information about the ghc-commits
mailing list