[Git][ghc/ghc][master] 2 commits: Replace `SizedSeq` with `FlatBag` for flattened structure
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Thu Apr 4 18:48:00 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
5f085d3a by Fendor at 2024-04-04T14:47:33-04:00
Replace `SizedSeq` with `FlatBag` for flattened structure
LinkedLists are notoriously memory ineffiecient when all we do is
traversing a structure.
As 'UnlinkedBCO' has been identified as a data structure that impacts
the overall memory usage of GHCi sessions, we avoid linked lists and
prefer flattened structure for storing.
We introduce a new memory efficient representation of sequential
elements that has special support for the cases:
* Empty
* Singleton
* Tuple Elements
This improves sharing in the 'Empty' case and avoids the overhead of
'Array' until its constant overhead is justified.
- - - - -
82cfe10c by Fendor at 2024-04-04T14:47:33-04:00
Compact FlatBag array representation
`Array` contains three additional `Word`'s we do not need in `FlatBag`. Move
`FlatBag` to `SmallArray`.
Expand the API of SmallArray by `sizeofSmallArray` and add common
traversal functions, such as `mapSmallArray` and `foldMapSmallArray`.
Additionally, allow users to force the elements of a `SmallArray`
via `rnfSmallArray`.
- - - - -
8 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Linker.hs
- compiler/GHC/ByteCode/Types.hs
- + compiler/GHC/Data/FlatBag.hs
- compiler/GHC/Data/SmallArray.hs
- compiler/ghc.cabal.in
- testsuite/tests/count-deps/CountDepsAst.stdout
- testsuite/tests/count-deps/CountDepsParser.stdout
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -71,9 +71,9 @@ bcoFreeNames bco
where
bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
= unionManyUniqDSets (
- mkUniqDSet [ n | BCOPtrName n <- ssElts ptrs ] :
- mkUniqDSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
- map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
+ mkUniqDSet [ n | BCOPtrName n <- elemsFlatBag ptrs ] :
+ mkUniqDSet [ n | BCONPtrItbl n <- elemsFlatBag nonptrs ] :
+ map bco_refs [ bco | BCOPtrBCO bco <- elemsFlatBag ptrs ]
)
-- -----------------------------------------------------------------------------
@@ -215,7 +215,7 @@ assembleBCO platform (ProtoBCO { protoBCOName = nm
let asm_insns = ssElts final_insns
insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns
bitmap_arr = mkBitmapArray bsize bitmap
- ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr final_lits final_ptrs
+ ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr (fromSizedSeq final_lits) (fromSizedSeq final_ptrs)
-- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
-- objects, since they might get run too early. Disable this until
=====================================
compiler/GHC/ByteCode/Linker.hs
=====================================
@@ -60,10 +60,10 @@ linkBCO interp le bco_ix
(UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do
-- fromIntegral Word -> Word64 should be a no op if Word is Word64
-- otherwise it will result in a cast to longlong on 32bit systems.
- lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (ssElts lits0)
- ptrs <- mapM (resolvePtr interp le bco_ix) (ssElts ptrs0)
+ lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (elemsFlatBag lits0)
+ ptrs <- mapM (resolvePtr interp le bco_ix) (elemsFlatBag ptrs0)
return (ResolvedBCO isLittleEndian arity insns bitmap
- (listArray (0, fromIntegral (sizeSS lits0)-1) lits)
+ (listArray (0, fromIntegral (sizeFlatBag lits0)-1) lits)
(addListToSS emptySS ptrs))
lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word
=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -18,12 +18,13 @@ module GHC.ByteCode.Types
, CgBreakInfo(..)
, ModBreaks (..), BreakIndex, emptyModBreaks
, CCostCentre
+ , FlatBag, sizeFlatBag, fromSizedSeq, elemsFlatBag
) where
import GHC.Prelude
import GHC.Data.FastString
-import GHC.Data.SizedSeq
+import GHC.Data.FlatBag
import GHC.Types.Name
import GHC.Types.Name.Env
import GHC.Utils.Outputable
@@ -154,8 +155,8 @@ data UnlinkedBCO
unlinkedBCOArity :: {-# UNPACK #-} !Int,
unlinkedBCOInstrs :: !(UArray Int Word16), -- insns
unlinkedBCOBitmap :: !(UArray Int Word64), -- bitmap
- unlinkedBCOLits :: !(SizedSeq BCONPtr), -- non-ptrs
- unlinkedBCOPtrs :: !(SizedSeq BCOPtr) -- ptrs
+ unlinkedBCOLits :: !(FlatBag BCONPtr), -- non-ptrs
+ unlinkedBCOPtrs :: !(FlatBag BCOPtr) -- ptrs
}
instance NFData UnlinkedBCO where
@@ -210,8 +211,8 @@ seqCgBreakInfo CgBreakInfo{..} =
instance Outputable UnlinkedBCO where
ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs)
= sep [text "BCO", ppr nm, text "with",
- ppr (sizeSS lits), text "lits",
- ppr (sizeSS ptrs), text "ptrs" ]
+ ppr (sizeFlatBag lits), text "lits",
+ ppr (sizeFlatBag ptrs), text "ptrs" ]
instance Outputable CgBreakInfo where
ppr info = text "CgBreakInfo" <+>
=====================================
compiler/GHC/Data/FlatBag.hs
=====================================
@@ -0,0 +1,129 @@
+{-# LANGUAGE UnboxedTuples #-}
+module GHC.Data.FlatBag
+ ( FlatBag
+ , emptyFlatBag
+ , unitFlatBag
+ , sizeFlatBag
+ , elemsFlatBag
+ , mappendFlatBag
+ -- * Construction
+ , fromList
+ , fromSizedSeq
+ ) where
+
+import GHC.Prelude
+
+import GHC.Data.SizedSeq (SizedSeq, ssElts, sizeSS)
+
+import Control.DeepSeq
+
+import GHC.Data.SmallArray
+
+-- | Store elements in a flattened representation.
+--
+-- A 'FlatBag' is a data structure that stores an ordered list of elements
+-- in a flat structure, avoiding the overhead of a linked list.
+-- Use this data structure, if the code requires the following properties:
+--
+-- * Elements are stored in a long-lived object, and benefit from a flattened
+-- representation.
+-- * The 'FlatBag' will be traversed but not extended or filtered.
+-- * The number of elements should be known.
+-- * Sharing of the empty case improves memory behaviour.
+--
+-- A 'FlagBag' aims to have as little overhead as possible to store its elements.
+-- To achieve that, it distinguishes between the empty case, singleton, tuple
+-- and general case.
+-- Thus, we only pay for the additional three words of an 'Array' if we have at least
+-- three elements.
+data FlatBag a
+ = EmptyFlatBag
+ | UnitFlatBag !a
+ | TupleFlatBag !a !a
+ | FlatBag {-# UNPACK #-} !(SmallArray a)
+
+instance Functor FlatBag where
+ fmap _ EmptyFlatBag = EmptyFlatBag
+ fmap f (UnitFlatBag a) = UnitFlatBag $ f a
+ fmap f (TupleFlatBag a b) = TupleFlatBag (f a) (f b)
+ fmap f (FlatBag e) = FlatBag $ mapSmallArray f e
+
+instance Foldable FlatBag where
+ foldMap _ EmptyFlatBag = mempty
+ foldMap f (UnitFlatBag a) = f a
+ foldMap f (TupleFlatBag a b) = f a `mappend` f b
+ foldMap f (FlatBag arr) = foldMapSmallArray f arr
+
+ length = fromIntegral . sizeFlatBag
+
+instance Traversable FlatBag where
+ traverse _ EmptyFlatBag = pure EmptyFlatBag
+ traverse f (UnitFlatBag a) = UnitFlatBag <$> f a
+ traverse f (TupleFlatBag a b) = TupleFlatBag <$> f a <*> f b
+ traverse f fl@(FlatBag arr) = fromList (fromIntegral $ sizeofSmallArray arr) <$> traverse f (elemsFlatBag fl)
+
+instance NFData a => NFData (FlatBag a) where
+ rnf EmptyFlatBag = ()
+ rnf (UnitFlatBag a) = rnf a
+ rnf (TupleFlatBag a b) = rnf a `seq` rnf b
+ rnf (FlatBag arr) = rnfSmallArray arr
+
+-- | Create an empty 'FlatBag'.
+--
+-- The empty 'FlatBag' is shared over all instances.
+emptyFlatBag :: FlatBag a
+emptyFlatBag = EmptyFlatBag
+
+-- | Create a singleton 'FlatBag'.
+unitFlatBag :: a -> FlatBag a
+unitFlatBag = UnitFlatBag
+
+-- | Calculate the size of
+sizeFlatBag :: FlatBag a -> Word
+sizeFlatBag EmptyFlatBag = 0
+sizeFlatBag UnitFlatBag{} = 1
+sizeFlatBag TupleFlatBag{} = 2
+sizeFlatBag (FlatBag arr) = fromIntegral $ sizeofSmallArray arr
+
+-- | Get all elements that are stored in the 'FlatBag'.
+elemsFlatBag :: FlatBag a -> [a]
+elemsFlatBag EmptyFlatBag = []
+elemsFlatBag (UnitFlatBag a) = [a]
+elemsFlatBag (TupleFlatBag a b) = [a, b]
+elemsFlatBag (FlatBag arr) =
+ [indexSmallArray arr i | i <- [0 .. sizeofSmallArray arr - 1]]
+
+-- | Combine two 'FlatBag's.
+--
+-- The new 'FlatBag' contains all elements from both 'FlatBag's.
+--
+-- If one of the 'FlatBag's is empty, the old 'FlatBag' is reused.
+mappendFlatBag :: FlatBag a -> FlatBag a -> FlatBag a
+mappendFlatBag EmptyFlatBag b = b
+mappendFlatBag a EmptyFlatBag = a
+mappendFlatBag (UnitFlatBag a) (UnitFlatBag b) = TupleFlatBag a b
+mappendFlatBag a b =
+ fromList (sizeFlatBag a + sizeFlatBag b)
+ (elemsFlatBag a ++ elemsFlatBag b)
+
+-- | Store the list in a flattened memory representation, avoiding the memory overhead
+-- of a linked list.
+--
+-- The size 'n' needs to be smaller or equal to the length of the list.
+-- If it is smaller than the length of the list, overflowing elements are
+-- discarded. It is undefined behaviour to set 'n' to be bigger than the
+-- length of the list.
+fromList :: Word -> [a] -> FlatBag a
+fromList n elts =
+ case elts of
+ [] -> EmptyFlatBag
+ [a] -> UnitFlatBag a
+ [a, b] -> TupleFlatBag a b
+ xs ->
+ FlatBag (listToArray (fromIntegral n) fst snd (zip [0..] xs))
+
+-- | Convert a 'SizedSeq' into its flattened representation.
+-- A 'FlatBag a' is more memory efficient than '[a]', if no further modification
+-- is necessary.
+fromSizedSeq :: SizedSeq a -> FlatBag a
+fromSizedSeq s = fromList (sizeSS s) (ssElts s)
=====================================
compiler/GHC/Data/SmallArray.hs
=====================================
@@ -11,13 +11,18 @@ module GHC.Data.SmallArray
, freezeSmallArray
, unsafeFreezeSmallArray
, indexSmallArray
+ , sizeofSmallArray
, listToArray
+ , mapSmallArray
+ , foldMapSmallArray
+ , rnfSmallArray
)
where
import GHC.Exts
import GHC.Prelude
import GHC.ST
+import Control.DeepSeq
data SmallArray a = SmallArray (SmallArray# a)
@@ -64,6 +69,14 @@ unsafeFreezeSmallArray (SmallMutableArray ma) s =
case unsafeFreezeSmallArray# ma s of
(# s', a #) -> (# s', SmallArray a #)
+-- | Get the size of a 'SmallArray'
+sizeofSmallArray
+ :: SmallArray a
+ -> Int
+{-# INLINE sizeofSmallArray #-}
+sizeofSmallArray (SmallArray sa#) =
+ case sizeofSmallArray# sa# of
+ s -> I# s
-- | Index a small-array (no bounds checking!)
indexSmallArray
@@ -71,9 +84,51 @@ indexSmallArray
-> Int -- ^ index
-> a
{-# INLINE indexSmallArray #-}
-indexSmallArray (SmallArray sa#) (I# i) = case indexSmallArray# sa# i of
- (# v #) -> v
+indexSmallArray (SmallArray sa#) (I# i) =
+ case indexSmallArray# sa# i of
+ (# v #) -> v
+-- | Map a function over the elements of a 'SmallArray'
+--
+mapSmallArray :: (a -> b) -> SmallArray a -> SmallArray b
+{-# INLINE mapSmallArray #-}
+mapSmallArray f sa = runST $ ST $ \s ->
+ let
+ n = sizeofSmallArray sa
+ go !i saMut# state#
+ | i < n =
+ let
+ a = indexSmallArray sa i
+ newState# = writeSmallArray saMut# i (f a) state#
+ in
+ go (i + 1) saMut# newState#
+ | otherwise = state#
+ in
+ case newSmallArray n (error "SmallArray: internal error, uninitialised elements") s of
+ (# s', mutArr #) ->
+ case go 0 mutArr s' of
+ s'' -> unsafeFreezeSmallArray mutArr s''
+
+-- | Fold the values of a 'SmallArray' into a 'Monoid m' of choice
+foldMapSmallArray :: Monoid m => (a -> m) -> SmallArray a -> m
+{-# INLINE foldMapSmallArray #-}
+foldMapSmallArray f sa = go 0
+ where
+ n = sizeofSmallArray sa
+ go i
+ | i < n = f (indexSmallArray sa i) `mappend` go (i + 1)
+ | otherwise = mempty
+
+-- | Force the elements of the given 'SmallArray'
+--
+rnfSmallArray :: NFData a => SmallArray a -> ()
+{-# INLINE rnfSmallArray #-}
+rnfSmallArray sa = go 0
+ where
+ n = sizeofSmallArray sa
+ go !i
+ | i < n = rnf (indexSmallArray sa i) `seq` go (i + 1)
+ | otherwise = ()
-- | Convert a list into an array.
listToArray :: Int -> (e -> Int) -> (e -> a) -> [e] -> SmallArray a
=====================================
compiler/ghc.cabal.in
=====================================
@@ -414,6 +414,7 @@ Library
GHC.Data.FastString
GHC.Data.FastString.Env
GHC.Data.FiniteMap
+ GHC.Data.FlatBag
GHC.Data.Graph.Base
GHC.Data.Graph.Color
GHC.Data.Graph.Collapse
=====================================
testsuite/tests/count-deps/CountDepsAst.stdout
=====================================
@@ -63,6 +63,7 @@ GHC.Data.FastMutInt
GHC.Data.FastString
GHC.Data.FastString.Env
GHC.Data.FiniteMap
+GHC.Data.FlatBag
GHC.Data.Graph.Directed
GHC.Data.Graph.UnVar
GHC.Data.List.Infinite
@@ -70,6 +71,7 @@ GHC.Data.List.SetOps
GHC.Data.Maybe
GHC.Data.OrdList
GHC.Data.Pair
+GHC.Data.SmallArray
GHC.Data.Strict
GHC.Data.StringBuffer
GHC.Data.TrieMap
=====================================
testsuite/tests/count-deps/CountDepsParser.stdout
=====================================
@@ -64,6 +64,7 @@ GHC.Data.FastMutInt
GHC.Data.FastString
GHC.Data.FastString.Env
GHC.Data.FiniteMap
+GHC.Data.FlatBag
GHC.Data.Graph.Directed
GHC.Data.Graph.UnVar
GHC.Data.List.Infinite
@@ -71,6 +72,7 @@ GHC.Data.List.SetOps
GHC.Data.Maybe
GHC.Data.OrdList
GHC.Data.Pair
+GHC.Data.SmallArray
GHC.Data.Strict
GHC.Data.StringBuffer
GHC.Data.TrieMap
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0c4a96862081f03e2946a2ed7e80c108f06205a1...82cfe10c8c3ec68e1b054e2d6b88e1a8830c60bf
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0c4a96862081f03e2946a2ed7e80c108f06205a1...82cfe10c8c3ec68e1b054e2d6b88e1a8830c60bf
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240404/eee7677c/attachment-0001.html>
More information about the ghc-commits
mailing list