[Git][ghc/ghc][wip/romes/deterministic-label-map] cmm: Back LabelMap with UDFM

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Wed Jul 3 13:22:34 UTC 2024



Rodrigo Mesquita pushed to branch wip/romes/deterministic-label-map at Glasgow Haskell Compiler / GHC


Commits:
1f94ce0e by Rodrigo Mesquita at 2024-07-03T14:22:21+01:00
cmm: Back LabelMap with UDFM

Use a deterministic unique map to back the implementation of `LabelMap`.

This is necessary towards the goal of object code determinism in #12935.

Our intended solution requires renaming uniques in a deterministic
order (which will be the order in which they were created), but storing
them label map makes us lose this order. Backing it with a UDFM fixes
this issue.

- - - - -


2 changed files:

- compiler/GHC/Cmm/Dataflow/Label.hs
- compiler/GHC/Types/Unique/DFM.hs


Changes:

=====================================
compiler/GHC/Cmm/Dataflow/Label.hs
=====================================
@@ -45,17 +45,15 @@ module GHC.Cmm.Dataflow.Label
     , mapAlter
     , mapAdjust
     , mapUnion
-    , mapUnions
     , mapUnionWithKey
+    , mapUnions
     , mapDifference
     , mapIntersection
-    , mapIsSubmapOf
     , mapMap
     , mapMapWithKey
     , mapFoldl
     , mapFoldr
     , mapFoldlWithKey
-    , mapFoldMapWithKey
     , mapFilter
     , mapFilterWithKey
     , mapElems
@@ -69,7 +67,7 @@ import GHC.Prelude
 
 import GHC.Utils.Outputable
 
-import GHC.Types.Unique (Uniquable(..), mkUniqueGrimily)
+import GHC.Types.Unique (Uniquable(..), mkUniqueGrimily, getKey)
 
 -- The code generator will eventually be using all the labels stored in a
 -- LabelSet and LabelMap. For these reasons we use the strict variants of these
@@ -77,12 +75,12 @@ import GHC.Types.Unique (Uniquable(..), mkUniqueGrimily)
 -- to fire.
 import GHC.Data.Word64Set (Word64Set)
 import qualified GHC.Data.Word64Set as S
-import GHC.Data.Word64Map.Strict (Word64Map)
-import qualified GHC.Data.Word64Map.Strict as M
 import GHC.Data.TrieMap
+import GHC.Types.Unique.DFM
 
 import Data.Word (Word64)
 import Data.List (foldl1')
+import GHC.Data.Maybe (fromMaybe)
 
 
 -----------------------------------------------------------------------------
@@ -173,114 +171,109 @@ setFromList ks  = LS (S.fromList (map lblToUnique ks))
 -----------------------------------------------------------------------------
 -- LabelMap
 
-newtype LabelMap v = LM (Word64Map v)
-  deriving newtype (Eq, Ord, Show, Functor, Foldable)
+newtype LabelMap v = LM (UniqDFM Word64 v)
+  deriving newtype (Functor, Foldable)
   deriving stock   Traversable
 
 mapNull :: LabelMap a -> Bool
-mapNull (LM m) = M.null m
+mapNull (LM m) = isNullUDFM m
 
 {-# INLINE mapSize #-}
 mapSize :: LabelMap a -> Int
-mapSize (LM m) = M.size m
+mapSize (LM m) = sizeUDFM m
 
 mapMember :: Label -> LabelMap a -> Bool
-mapMember (Label k) (LM m) = M.member k m
+mapMember (Label k) (LM m) = elemUDFM k m
 
 mapLookup :: Label -> LabelMap a -> Maybe a
-mapLookup (Label k) (LM m) = M.lookup k m
+mapLookup (Label k) (LM m) = lookupUDFM m k
 
 mapFindWithDefault :: a -> Label -> LabelMap a -> a
-mapFindWithDefault def (Label k) (LM m) = M.findWithDefault def k m
+mapFindWithDefault def (Label k) (LM m) = fromMaybe def $ lookupUDFM m k
 
 mapEmpty :: LabelMap v
-mapEmpty = LM M.empty
+mapEmpty = LM emptyUDFM
 
 mapSingleton :: Label -> v -> LabelMap v
-mapSingleton (Label k) v = LM (M.singleton k v)
+mapSingleton (Label k) v = LM (unitUDFM k v)
 
 mapInsert :: Label -> v -> LabelMap v -> LabelMap v
-mapInsert (Label k) v (LM m) = LM (M.insert k v m)
+mapInsert (Label k) v (LM m) = LM (addToUDFM m k v)
 
 mapInsertWith :: (v -> v -> v) -> Label -> v -> LabelMap v -> LabelMap v
-mapInsertWith f (Label k) v (LM m) = LM (M.insertWith f k v m)
+mapInsertWith f (Label k) v (LM m) = LM (addToUDFM_C f m k v)
 
 mapDelete :: Label -> LabelMap v -> LabelMap v
-mapDelete (Label k) (LM m) = LM (M.delete k m)
+mapDelete (Label k) (LM m) = LM (delFromUDFM m k)
 
 mapAlter :: (Maybe v -> Maybe v) -> Label -> LabelMap v -> LabelMap v
-mapAlter f (Label k) (LM m) = LM (M.alter f k m)
+mapAlter f (Label k) (LM m) = LM (alterUDFM f m k)
 
 mapAdjust :: (v -> v) -> Label -> LabelMap v -> LabelMap v
-mapAdjust f (Label k) (LM m) = LM (M.adjust f k m)
+mapAdjust f (Label k) (LM m) = LM (adjustUDFM f m k)
 
 mapUnion :: LabelMap v -> LabelMap v -> LabelMap v
-mapUnion (LM x) (LM y) = LM (M.union x y)
+mapUnion (LM x) (LM y) = LM (plusUDFM x y)
+
+mapUnionWithKey :: (Label -> v -> v -> v) -> LabelMap v -> LabelMap v -> LabelMap v
+mapUnionWithKey f (LM x) (LM y) = LM (plusUDFM_CK (f . mkHooplLabel . getKey) x y)
 
 {-# INLINE mapUnions #-}
 mapUnions :: [LabelMap a] -> LabelMap a
 mapUnions [] = mapEmpty
 mapUnions maps = foldl1' mapUnion maps
 
-mapUnionWithKey :: (Label -> v -> v -> v) -> LabelMap v -> LabelMap v -> LabelMap v
-mapUnionWithKey f (LM x) (LM y) = LM (M.unionWithKey (f . mkHooplLabel) x y)
-
 mapDifference :: LabelMap v -> LabelMap b -> LabelMap v
-mapDifference (LM x) (LM y) = LM (M.difference x y)
+mapDifference (LM x) (LM y) = LM (minusUDFM x y)
 
-mapIntersection :: LabelMap v -> LabelMap b -> LabelMap v
-mapIntersection (LM x) (LM y) = LM (M.intersection x y)
-
-mapIsSubmapOf :: Eq a => LabelMap a -> LabelMap a -> Bool
-mapIsSubmapOf (LM x) (LM y) = M.isSubmapOf x y
+mapIntersection :: LabelMap v -> LabelMap v -> LabelMap v
+mapIntersection (LM x) (LM y) = LM (intersectUDFM x y)
 
 mapMap :: (a -> v) -> LabelMap a -> LabelMap v
-mapMap f (LM m) = LM (M.map f m)
+mapMap f (LM m) = LM (mapUDFM f m)
 
 mapMapWithKey :: (Label -> a -> v) -> LabelMap a -> LabelMap v
-mapMapWithKey f (LM m) = LM (M.mapWithKey (f . mkHooplLabel) m)
+mapMapWithKey f (LM m) = LM (mapWithInternalKeyUDFM (f . mkHooplLabel) m)
 
 {-# INLINE mapFoldl #-}
 mapFoldl :: (a -> b -> a) -> a -> LabelMap b -> a
-mapFoldl k z (LM m) = M.foldl k z m
+mapFoldl k z lm = mapFoldr (\b g x -> g (k x b)) id lm z -- foldl as foldr
+  -- REVIEW: Is this implementation bad for performance?
 
 {-# INLINE mapFoldr #-}
 mapFoldr :: (a -> b -> b) -> b -> LabelMap a -> b
-mapFoldr k z (LM m) = M.foldr k z m
+mapFoldr k z (LM m) = foldUDFM k z m
 
 {-# INLINE mapFoldlWithKey #-}
 mapFoldlWithKey :: (t -> Label -> b -> t) -> t -> LabelMap b -> t
-mapFoldlWithKey k z (LM m) = M.foldlWithKey (\a v -> k a (mkHooplLabel v)) z m
-
-mapFoldMapWithKey :: Monoid m => (Label -> t -> m) -> LabelMap t -> m
-mapFoldMapWithKey f (LM m) = M.foldMapWithKey (\k v -> f (mkHooplLabel k) v) m
+mapFoldlWithKey k z (LM m) = foldWithKeyUDFM (\t v acc -> k acc (mkHooplLabel $ getKey t) v) z m
 
 {-# INLINEABLE mapFilter #-}
 mapFilter :: (v -> Bool) -> LabelMap v -> LabelMap v
-mapFilter f (LM m) = LM (M.filter f m)
+mapFilter f (LM m) = LM (filterUDFM f m)
 
 {-# INLINEABLE mapFilterWithKey #-}
 mapFilterWithKey :: (Label -> v -> Bool) -> LabelMap v -> LabelMap v
-mapFilterWithKey f (LM m)  = LM (M.filterWithKey (f . mkHooplLabel) m)
+mapFilterWithKey f (LM m)  = LM (filterUDFM_Directly (f . mkHooplLabel . getKey) m)
 
 {-# INLINE mapElems #-}
 mapElems :: LabelMap a -> [a]
-mapElems (LM m) = M.elems m
+mapElems (LM m) = eltsUDFM m
 
 {-# INLINE mapKeys #-}
 mapKeys :: LabelMap a -> [Label]
-mapKeys (LM m) = map (mkHooplLabel . fst) (M.toList m)
+mapKeys (LM m) = map (mkHooplLabel . getKey . fst) (udfmToList m)
 
 {-# INLINE mapToList #-}
 mapToList :: LabelMap b -> [(Label, b)]
-mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- M.toList m]
+mapToList (LM m) = [(mkHooplLabel $ getKey k, v) | (k, v) <- udfmToList m]
 
 {-# INLINE mapFromList #-}
 mapFromList :: [(Label, v)] -> LabelMap v
-mapFromList assocs = LM (M.fromList [(lblToUnique k, v) | (k, v) <- assocs])
+mapFromList assocs = LM (listToUDFM_Directly [(mkUniqueGrimily $ lblToUnique k, v) | (k, v) <- assocs])
 
 mapFromListWith :: (v -> v -> v) -> [(Label, v)] -> LabelMap v
-mapFromListWith f assocs = LM (M.fromListWith f [(lblToUnique k, v) | (k, v) <- assocs])
+mapFromListWith f assocs = LM (listToUDFM_C_Directly f [(mkUniqueGrimily $ lblToUnique k, v) | (k, v) <- assocs])
 
 -----------------------------------------------------------------------------
 -- Instances


=====================================
compiler/GHC/Types/Unique/DFM.hs
=====================================
@@ -40,12 +40,13 @@ module GHC.Types.Unique.DFM (
         adjustUDFM_Directly,
         alterUDFM,
         mapUDFM,
+        mapWithInternalKeyUDFM,
         mapMaybeUDFM,
         plusUDFM,
-        plusUDFM_C,
+        plusUDFM_C, plusUDFM_CK,
         lookupUDFM, lookupUDFM_Directly,
         elemUDFM,
-        foldUDFM,
+        foldUDFM, foldWithKeyUDFM,
         eltsUDFM,
         filterUDFM, filterUDFM_Directly,
         isNullUDFM,
@@ -55,6 +56,7 @@ module GHC.Types.Unique.DFM (
         equalKeysUDFM,
         minusUDFM,
         listToUDFM, listToUDFM_Directly,
+        listToUDFM_C_Directly,
         udfmMinusUFM, ufmMinusUDFM,
         partitionUDFM,
         udfmRestrictKeys,
@@ -83,6 +85,7 @@ import Data.Function (on)
 import GHC.Types.Unique.FM (UniqFM, nonDetUFMToList, ufmToIntMap, unsafeIntMapToUFM)
 import Unsafe.Coerce
 import qualified GHC.Data.Word64Set as W
+import GHC.Word
 
 -- Note [Deterministic UniqFM]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -223,6 +226,12 @@ addListToUDFM_Directly_C
 addListToUDFM_Directly_C f = foldl' (\m (k, v) -> addToUDFM_C_Directly f m k v)
 {-# INLINEABLE addListToUDFM_Directly_C #-}
 
+-- | Like 'addListToUDFM_Directly_C' but also passes the unique key to the combine function
+addListToUDFM_Directly_CK
+  :: (Unique -> elt -> elt -> elt) -> UniqDFM key elt -> [(Unique,elt)] -> UniqDFM key elt
+addListToUDFM_Directly_CK f = foldl' (\m (k, v) -> addToUDFM_C_Directly (f k) m k v)
+{-# INLINEABLE addListToUDFM_Directly_CK #-}
+
 delFromUDFM :: Uniquable key => UniqDFM key elt -> key -> UniqDFM key elt
 delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
 
@@ -233,6 +242,15 @@ plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
   | i > j = insertUDFMIntoLeft_C f udfml udfmr
   | otherwise = insertUDFMIntoLeft_C f udfmr udfml
 
+-- | Like 'plusUDFM_C' but the combine function also receives the unique key
+plusUDFM_CK :: (Unique -> elt -> elt -> elt) -> UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
+plusUDFM_CK f udfml@(UDFM _ i) udfmr@(UDFM _ j)
+  -- we will use the upper bound on the tag as a proxy for the set size,
+  -- to insert the smaller one into the bigger one
+  | i > j = insertUDFMIntoLeft_CK f udfml udfmr
+  | otherwise = insertUDFMIntoLeft_CK f udfmr udfml
+
+
 -- Note [Overflow on plusUDFM]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- There are multiple ways of implementing plusUDFM.
@@ -281,6 +299,12 @@ insertUDFMIntoLeft_C
 insertUDFMIntoLeft_C f udfml udfmr =
   addListToUDFM_Directly_C f udfml $ udfmToList udfmr
 
+-- | Like 'insertUDFMIntoLeft_C', but the merge function also receives the unique key
+insertUDFMIntoLeft_CK
+  :: (Unique -> elt -> elt -> elt) -> UniqDFM key elt -> UniqDFM key elt -> UniqDFM key elt
+insertUDFMIntoLeft_CK f udfml udfmr =
+  addListToUDFM_Directly_CK f udfml $ udfmToList udfmr
+
 lookupUDFM :: Uniquable key => UniqDFM key elt -> key -> Maybe elt
 lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
 
@@ -297,6 +321,12 @@ foldUDFM :: (elt -> a -> a) -> a -> UniqDFM key elt -> a
 -- This INLINE prevents a regression in !10568
 foldUDFM k z m = foldr k z (eltsUDFM m)
 
+-- | Like 'foldUDFM' but the function also receives a key
+foldWithKeyUDFM :: (Unique -> elt -> a -> a) -> a -> UniqDFM key elt -> a
+{-# INLINE foldWithKeyUDFM #-}
+-- This INLINE was copied from foldUDFM
+foldWithKeyUDFM k z m = foldr (uncurry k) z (udfmToList m)
+
 -- | Performs a nondeterministic strict fold over the UniqDFM.
 -- It's O(n), same as the corresponding function on `UniqFM`.
 -- If you use this please provide a justification why it doesn't introduce
@@ -396,6 +426,9 @@ listToUDFM = foldl' (\m (k, v) -> addToUDFM m k v) emptyUDFM
 listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM key elt
 listToUDFM_Directly = foldl' (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
 
+listToUDFM_C_Directly :: (elt -> elt -> elt) -> [(Unique, elt)] -> UniqDFM key elt
+listToUDFM_C_Directly f = foldl' (\m (u, v) -> addToUDFM_C_Directly f m u v) emptyUDFM
+
 -- | Apply a function to a particular element
 adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM key elt -> key -> UniqDFM key elt
 adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
@@ -429,6 +462,10 @@ mapUDFM f (UDFM m i) = UDFM (MS.map (fmap f) m) i
 -- in GHCi because all old ModDetails are retained (see pruneHomePackageTable).
 -- Modify with care.
 
+-- | Map a function over every value in a UniqDFM
+mapWithInternalKeyUDFM :: (Word64 -> elt1 -> elt2) -> UniqDFM key elt1 -> UniqDFM key elt2
+mapWithInternalKeyUDFM f (UDFM m i) = UDFM (MS.mapWithKey (fmap . f) m) i
+
 mapMaybeUDFM :: forall elt1 elt2 key.
                 (elt1 -> Maybe elt2) -> UniqDFM key elt1 -> UniqDFM key elt2
 mapMaybeUDFM f (UDFM m i) = UDFM (M.mapMaybe (traverse f) m) i



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1f94ce0e5e66df3443668c02230a452c1d374a72

-- 
This project does not include diff previews in email notifications.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1f94ce0e5e66df3443668c02230a452c1d374a72
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240703/ef3d3590/attachment-0001.html>


More information about the ghc-commits mailing list