[Git][ghc/ghc][master] 2 commits: Replace `SizedSeq` with `FlatBag` for flattened structure

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu Apr 4 18:48:00 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
5f085d3a by Fendor at 2024-04-04T14:47:33-04:00
Replace `SizedSeq` with `FlatBag` for flattened structure

LinkedLists are notoriously memory ineffiecient when all we do is
traversing a structure.
As 'UnlinkedBCO' has been identified as a data structure that impacts
the overall memory usage of GHCi sessions, we avoid linked lists and
prefer flattened structure for storing.

We introduce a new memory efficient representation of sequential
elements that has special support for the cases:

* Empty
* Singleton
* Tuple Elements

This improves sharing in the 'Empty' case and avoids the overhead of
'Array' until its constant overhead is justified.

- - - - -
82cfe10c by Fendor at 2024-04-04T14:47:33-04:00
Compact FlatBag array representation

`Array` contains three additional `Word`'s we do not need in `FlatBag`. Move
`FlatBag` to `SmallArray`.

Expand the API of SmallArray by `sizeofSmallArray` and add common
traversal functions, such as `mapSmallArray` and `foldMapSmallArray`.
Additionally, allow users to force the elements of a `SmallArray`
via `rnfSmallArray`.

- - - - -


8 changed files:

- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Linker.hs
- compiler/GHC/ByteCode/Types.hs
- + compiler/GHC/Data/FlatBag.hs
- compiler/GHC/Data/SmallArray.hs
- compiler/ghc.cabal.in
- testsuite/tests/count-deps/CountDepsAst.stdout
- testsuite/tests/count-deps/CountDepsParser.stdout


Changes:

=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -71,9 +71,9 @@ bcoFreeNames bco
   where
     bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
         = unionManyUniqDSets (
-             mkUniqDSet [ n | BCOPtrName n <- ssElts ptrs ] :
-             mkUniqDSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
-             map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
+             mkUniqDSet [ n | BCOPtrName n <- elemsFlatBag ptrs ] :
+             mkUniqDSet [ n | BCONPtrItbl n <- elemsFlatBag nonptrs ] :
+             map bco_refs [ bco | BCOPtrBCO bco <- elemsFlatBag ptrs ]
           )
 
 -- -----------------------------------------------------------------------------
@@ -215,7 +215,7 @@ assembleBCO platform (ProtoBCO { protoBCOName       = nm
   let asm_insns = ssElts final_insns
       insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns
       bitmap_arr = mkBitmapArray bsize bitmap
-      ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr final_lits final_ptrs
+      ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr (fromSizedSeq final_lits) (fromSizedSeq final_ptrs)
 
   -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
   -- objects, since they might get run too early.  Disable this until


=====================================
compiler/GHC/ByteCode/Linker.hs
=====================================
@@ -60,10 +60,10 @@ linkBCO interp le bco_ix
            (UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do
   -- fromIntegral Word -> Word64 should be a no op if Word is Word64
   -- otherwise it will result in a cast to longlong on 32bit systems.
-  lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (ssElts lits0)
-  ptrs <- mapM (resolvePtr interp le bco_ix) (ssElts ptrs0)
+  lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (elemsFlatBag lits0)
+  ptrs <- mapM (resolvePtr interp le bco_ix) (elemsFlatBag ptrs0)
   return (ResolvedBCO isLittleEndian arity insns bitmap
-              (listArray (0, fromIntegral (sizeSS lits0)-1) lits)
+              (listArray (0, fromIntegral (sizeFlatBag lits0)-1) lits)
               (addListToSS emptySS ptrs))
 
 lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word


=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -18,12 +18,13 @@ module GHC.ByteCode.Types
   , CgBreakInfo(..)
   , ModBreaks (..), BreakIndex, emptyModBreaks
   , CCostCentre
+  , FlatBag, sizeFlatBag, fromSizedSeq, elemsFlatBag
   ) where
 
 import GHC.Prelude
 
 import GHC.Data.FastString
-import GHC.Data.SizedSeq
+import GHC.Data.FlatBag
 import GHC.Types.Name
 import GHC.Types.Name.Env
 import GHC.Utils.Outputable
@@ -154,8 +155,8 @@ data UnlinkedBCO
         unlinkedBCOArity  :: {-# UNPACK #-} !Int,
         unlinkedBCOInstrs :: !(UArray Int Word16),      -- insns
         unlinkedBCOBitmap :: !(UArray Int Word64),      -- bitmap
-        unlinkedBCOLits   :: !(SizedSeq BCONPtr),       -- non-ptrs
-        unlinkedBCOPtrs   :: !(SizedSeq BCOPtr)         -- ptrs
+        unlinkedBCOLits   :: !(FlatBag BCONPtr),       -- non-ptrs
+        unlinkedBCOPtrs   :: !(FlatBag BCOPtr)         -- ptrs
    }
 
 instance NFData UnlinkedBCO where
@@ -210,8 +211,8 @@ seqCgBreakInfo CgBreakInfo{..} =
 instance Outputable UnlinkedBCO where
    ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs)
       = sep [text "BCO", ppr nm, text "with",
-             ppr (sizeSS lits), text "lits",
-             ppr (sizeSS ptrs), text "ptrs" ]
+             ppr (sizeFlatBag lits), text "lits",
+             ppr (sizeFlatBag ptrs), text "ptrs" ]
 
 instance Outputable CgBreakInfo where
    ppr info = text "CgBreakInfo" <+>


=====================================
compiler/GHC/Data/FlatBag.hs
=====================================
@@ -0,0 +1,129 @@
+{-# LANGUAGE UnboxedTuples #-}
+module GHC.Data.FlatBag
+  ( FlatBag
+  , emptyFlatBag
+  , unitFlatBag
+  , sizeFlatBag
+  , elemsFlatBag
+  , mappendFlatBag
+  -- * Construction
+  , fromList
+  , fromSizedSeq
+  ) where
+
+import GHC.Prelude
+
+import GHC.Data.SizedSeq (SizedSeq, ssElts, sizeSS)
+
+import Control.DeepSeq
+
+import GHC.Data.SmallArray
+
+-- | Store elements in a flattened representation.
+--
+-- A 'FlatBag' is a data structure that stores an ordered list of elements
+-- in a flat structure, avoiding the overhead of a linked list.
+-- Use this data structure, if the code requires the following properties:
+--
+-- * Elements are stored in a long-lived object, and benefit from a flattened
+--   representation.
+-- * The 'FlatBag' will be traversed but not extended or filtered.
+-- * The number of elements should be known.
+-- * Sharing of the empty case improves memory behaviour.
+--
+-- A 'FlagBag' aims to have as little overhead as possible to store its elements.
+-- To achieve that, it distinguishes between the empty case, singleton, tuple
+-- and general case.
+-- Thus, we only pay for the additional three words of an 'Array' if we have at least
+-- three elements.
+data FlatBag a
+  = EmptyFlatBag
+  | UnitFlatBag !a
+  | TupleFlatBag !a !a
+  | FlatBag {-# UNPACK #-} !(SmallArray a)
+
+instance Functor FlatBag where
+  fmap _ EmptyFlatBag = EmptyFlatBag
+  fmap f (UnitFlatBag a) = UnitFlatBag $ f a
+  fmap f (TupleFlatBag a b) = TupleFlatBag (f a) (f b)
+  fmap f (FlatBag e) = FlatBag $ mapSmallArray f e
+
+instance Foldable FlatBag where
+  foldMap _ EmptyFlatBag = mempty
+  foldMap f (UnitFlatBag a) = f a
+  foldMap f (TupleFlatBag a b) = f a `mappend` f b
+  foldMap f (FlatBag arr) = foldMapSmallArray f arr
+
+  length = fromIntegral . sizeFlatBag
+
+instance Traversable FlatBag where
+  traverse _ EmptyFlatBag = pure EmptyFlatBag
+  traverse f (UnitFlatBag a) = UnitFlatBag <$> f a
+  traverse f (TupleFlatBag a b) = TupleFlatBag <$> f a <*> f b
+  traverse f fl@(FlatBag arr) = fromList (fromIntegral $ sizeofSmallArray arr) <$> traverse f (elemsFlatBag fl)
+
+instance NFData a => NFData (FlatBag a) where
+  rnf EmptyFlatBag = ()
+  rnf (UnitFlatBag a) = rnf a
+  rnf (TupleFlatBag a b) = rnf a `seq` rnf b
+  rnf (FlatBag arr) = rnfSmallArray arr
+
+-- | Create an empty 'FlatBag'.
+--
+-- The empty 'FlatBag' is shared over all instances.
+emptyFlatBag :: FlatBag a
+emptyFlatBag = EmptyFlatBag
+
+-- | Create a singleton 'FlatBag'.
+unitFlatBag :: a -> FlatBag a
+unitFlatBag = UnitFlatBag
+
+-- | Calculate the size of
+sizeFlatBag :: FlatBag a -> Word
+sizeFlatBag EmptyFlatBag = 0
+sizeFlatBag UnitFlatBag{} = 1
+sizeFlatBag TupleFlatBag{} = 2
+sizeFlatBag (FlatBag arr) = fromIntegral $ sizeofSmallArray arr
+
+-- | Get all elements that are stored in the 'FlatBag'.
+elemsFlatBag :: FlatBag a -> [a]
+elemsFlatBag EmptyFlatBag = []
+elemsFlatBag (UnitFlatBag a) = [a]
+elemsFlatBag (TupleFlatBag a b) = [a, b]
+elemsFlatBag (FlatBag arr) =
+  [indexSmallArray arr i | i <- [0 .. sizeofSmallArray arr - 1]]
+
+-- | Combine two 'FlatBag's.
+--
+-- The new 'FlatBag' contains all elements from both 'FlatBag's.
+--
+-- If one of the 'FlatBag's is empty, the old 'FlatBag' is reused.
+mappendFlatBag :: FlatBag a -> FlatBag a -> FlatBag a
+mappendFlatBag EmptyFlatBag b = b
+mappendFlatBag a EmptyFlatBag = a
+mappendFlatBag (UnitFlatBag a) (UnitFlatBag b) = TupleFlatBag a b
+mappendFlatBag a b =
+  fromList (sizeFlatBag a + sizeFlatBag b)
+           (elemsFlatBag a ++ elemsFlatBag b)
+
+-- | Store the list in a flattened memory representation, avoiding the memory overhead
+-- of a linked list.
+--
+-- The size 'n' needs to be smaller or equal to the length of the list.
+-- If it is smaller than the length of the list, overflowing elements are
+-- discarded. It is undefined behaviour to set 'n' to be bigger than the
+-- length of the list.
+fromList :: Word -> [a] -> FlatBag a
+fromList n elts =
+  case elts of
+    [] -> EmptyFlatBag
+    [a] -> UnitFlatBag a
+    [a, b] -> TupleFlatBag a b
+    xs ->
+      FlatBag (listToArray (fromIntegral n) fst snd (zip [0..] xs))
+
+-- | Convert a 'SizedSeq' into its flattened representation.
+-- A 'FlatBag a' is more memory efficient than '[a]', if no further modification
+-- is necessary.
+fromSizedSeq :: SizedSeq a -> FlatBag a
+fromSizedSeq s = fromList (sizeSS s) (ssElts s)


=====================================
compiler/GHC/Data/SmallArray.hs
=====================================
@@ -11,13 +11,18 @@ module GHC.Data.SmallArray
   , freezeSmallArray
   , unsafeFreezeSmallArray
   , indexSmallArray
+  , sizeofSmallArray
   , listToArray
+  , mapSmallArray
+  , foldMapSmallArray
+  , rnfSmallArray
   )
 where
 
 import GHC.Exts
 import GHC.Prelude
 import GHC.ST
+import Control.DeepSeq
 
 data SmallArray a = SmallArray (SmallArray# a)
 
@@ -64,6 +69,14 @@ unsafeFreezeSmallArray (SmallMutableArray ma) s =
   case unsafeFreezeSmallArray# ma s of
     (# s', a #) -> (# s', SmallArray a #)
 
+-- | Get the size of a 'SmallArray'
+sizeofSmallArray
+  :: SmallArray a
+  -> Int
+{-# INLINE sizeofSmallArray #-}
+sizeofSmallArray (SmallArray sa#) =
+  case sizeofSmallArray# sa# of
+    s -> I# s
 
 -- | Index a small-array (no bounds checking!)
 indexSmallArray
@@ -71,9 +84,51 @@ indexSmallArray
   -> Int          -- ^ index
   -> a
 {-# INLINE indexSmallArray #-}
-indexSmallArray (SmallArray sa#) (I# i) = case indexSmallArray# sa# i of
-  (# v #) -> v
+indexSmallArray (SmallArray sa#) (I# i) =
+  case indexSmallArray# sa# i of
+    (# v #) -> v
 
+-- | Map a function over the elements of a 'SmallArray'
+--
+mapSmallArray :: (a -> b) -> SmallArray a -> SmallArray b
+{-# INLINE mapSmallArray #-}
+mapSmallArray f sa = runST $ ST $ \s ->
+  let
+    n = sizeofSmallArray sa
+    go !i saMut# state#
+      | i < n =
+        let
+          a = indexSmallArray sa i
+          newState# = writeSmallArray saMut# i (f a) state#
+        in
+          go (i + 1) saMut# newState#
+      | otherwise = state#
+  in
+  case newSmallArray n (error "SmallArray: internal error, uninitialised elements") s of
+    (# s', mutArr #) ->
+      case go 0 mutArr s' of
+        s'' -> unsafeFreezeSmallArray mutArr s''
+
+-- | Fold the values of a 'SmallArray' into a 'Monoid m' of choice
+foldMapSmallArray :: Monoid m => (a -> m) -> SmallArray a -> m
+{-# INLINE foldMapSmallArray #-}
+foldMapSmallArray f sa = go 0
+  where
+    n = sizeofSmallArray sa
+    go i
+      | i < n = f (indexSmallArray sa i) `mappend` go (i + 1)
+      | otherwise = mempty
+
+-- | Force the elements of the given 'SmallArray'
+--
+rnfSmallArray :: NFData a => SmallArray a -> ()
+{-# INLINE rnfSmallArray #-}
+rnfSmallArray sa = go 0
+  where
+    n = sizeofSmallArray sa
+    go !i
+      | i < n = rnf (indexSmallArray sa i) `seq` go (i + 1)
+      | otherwise = ()
 
 -- | Convert a list into an array.
 listToArray :: Int -> (e -> Int) -> (e -> a) -> [e] -> SmallArray a


=====================================
compiler/ghc.cabal.in
=====================================
@@ -414,6 +414,7 @@ Library
         GHC.Data.FastString
         GHC.Data.FastString.Env
         GHC.Data.FiniteMap
+        GHC.Data.FlatBag
         GHC.Data.Graph.Base
         GHC.Data.Graph.Color
         GHC.Data.Graph.Collapse


=====================================
testsuite/tests/count-deps/CountDepsAst.stdout
=====================================
@@ -63,6 +63,7 @@ GHC.Data.FastMutInt
 GHC.Data.FastString
 GHC.Data.FastString.Env
 GHC.Data.FiniteMap
+GHC.Data.FlatBag
 GHC.Data.Graph.Directed
 GHC.Data.Graph.UnVar
 GHC.Data.List.Infinite
@@ -70,6 +71,7 @@ GHC.Data.List.SetOps
 GHC.Data.Maybe
 GHC.Data.OrdList
 GHC.Data.Pair
+GHC.Data.SmallArray
 GHC.Data.Strict
 GHC.Data.StringBuffer
 GHC.Data.TrieMap


=====================================
testsuite/tests/count-deps/CountDepsParser.stdout
=====================================
@@ -64,6 +64,7 @@ GHC.Data.FastMutInt
 GHC.Data.FastString
 GHC.Data.FastString.Env
 GHC.Data.FiniteMap
+GHC.Data.FlatBag
 GHC.Data.Graph.Directed
 GHC.Data.Graph.UnVar
 GHC.Data.List.Infinite
@@ -71,6 +72,7 @@ GHC.Data.List.SetOps
 GHC.Data.Maybe
 GHC.Data.OrdList
 GHC.Data.Pair
+GHC.Data.SmallArray
 GHC.Data.Strict
 GHC.Data.StringBuffer
 GHC.Data.TrieMap



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0c4a96862081f03e2946a2ed7e80c108f06205a1...82cfe10c8c3ec68e1b054e2d6b88e1a8830c60bf

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0c4a96862081f03e2946a2ed7e80c108f06205a1...82cfe10c8c3ec68e1b054e2d6b88e1a8830c60bf
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240404/eee7677c/attachment-0001.html>


More information about the ghc-commits mailing list