[Git][ghc/ghc][wip/decode_cloned_stack] 3 commits: Add notes

Sven Tennie (@supersven) gitlab at gitlab.haskell.org
Sat Jan 28 17:40:49 UTC 2023



Sven Tennie pushed to branch wip/decode_cloned_stack at Glasgow Haskell Compiler / GHC


Commits:
2023ff1e by Sven Tennie at 2023-01-28T15:52:55+00:00
Add notes

- - - - -
35c79361 by Sven Tennie at 2023-01-28T16:05:08+00:00
Cleanup: Remove old stuff

- - - - -
2a75318f by Sven Tennie at 2023-01-28T17:39:55+00:00
closureSize

- - - - -


5 changed files:

- libraries/ghc-heap/GHC/Exts/DecodeStack.hs
- libraries/ghc-heap/GHC/Exts/Heap/Closures.hs
- libraries/ghc-heap/GHC/Exts/StackConstants.hsc
- libraries/ghc-heap/cbits/Stack.cmm
- libraries/ghc-heap/tests/stack_misc_closures.hs


Changes:

=====================================
libraries/ghc-heap/GHC/Exts/DecodeStack.hs
=====================================
@@ -27,7 +27,6 @@ import Data.Maybe
 import Debug.Trace
 import Foreign
 import GHC.Exts
-import GHC.Exts.DecodeHeap
 import GHC.Exts.Heap.ClosureTypes
 import GHC.Exts.Heap.Closures
 import GHC.Exts.Heap.Constants (wORD_SIZE_IN_BITS)
@@ -36,6 +35,66 @@ import GHC.Exts.StackConstants
 import GHC.Stack.CloneStack
 import Prelude
 
+{- Note [Decoding the stack]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The stack is represented by a chain of StgStack closures. Each of these closures
+is subject to garbage collection. I.e. they can be moved in memory (in a
+simplified perspective) at any time.
+
+The array of closures inside an StgStack (that makeup the execution stack; the
+stack frames) is moved as bare memory by the garbage collector. References
+(pointers) to stack frames are not updated.
+
+As the StgStack closure is moved as whole, the relative offsets inside it stay
+the same. (Though, the absolute addresses change!)
+
+Stack frame iterator
+====================
+
+A StackFrameIter consists of a StackSnapshot# and a relative offset into the the
+array of stack frames (StgStack->stack). The StackSnapshot# represents a
+StgStack closure. It is updated by the garbage collector when the stack closure
+is moved.
+
+The relative offset describes the location of a stack frame. As stack frames
+come in various sizes, one cannot simply step over the stack array with a
+constant offset.
+
+The head of the stack frame array has offset 0. To traverse the stack frames the
+latest stacke frame's offset is incremented by the closure size. The unit of the
+offset is machine words (32bit or 64bit).
+
+Boxes
+=====
+
+As references into thestack frame array aren't updated by the garbage collector,
+creating a Box with a pointer (address) to a stack frame would break as soon as
+the StgStack closure is moved.
+
+To deal with this another kind of Box is introduced: A DecodedBox contains a
+thunk for a decoded stack frame or the closure for the decoded stack frame
+itself. I.e. we're not boxing the closure, but the ghc-heap representation of
+it.
+
+Heap-represented closures referenced by stack frames are boxed the usual way,
+with a Box that contains a pointer to the closure.
+
+Technical details
+=================
+
+- All access to StgStack/StackSnapshot# closures is made through Cmm code. This
+  keeps the closure from being moved by the garbage collector during the
+  operation.
+
+- As StgStacks are mainly used in Cmm and C code, much of the decoding logic is
+  implemented in Cmm and C. It's just easier to reuse existing helper macros and
+  functions, than reinventing them in Haskell.
+
+- Offsets and sizes of closures are imported from DerivedConstants.h via HSC.
+  This keeps the code very portable.
+-}
+
 foreign import prim "derefStackWordzh" derefStackWord# :: StackSnapshot# -> Word# -> Word#
 
 derefStackWord :: StackFrameIter -> Word
@@ -46,12 +105,6 @@ foreign import prim "getUpdateFrameTypezh" getUpdateFrameType# :: StackSnapshot#
 getUpdateFrameType :: StackFrameIter -> UpdateFrameType
 getUpdateFrameType (StackFrameIter {..}) = (toEnum . fromInteger . toInteger) (W# (getUpdateFrameType# stackSnapshot# (wordOffsetToWord# index)))
 
--- TODO: This can be simplified if the offset is always full words
-foreign import prim "unpackClosureReferencedByFramezh" unpackClosureReferencedByFrame# :: Word# -> StackSnapshot# -> Word# -> (# Addr#, ByteArray#, Array# b #)
-
-unpackClosureReferencedByFrame :: WordOffset -> StackSnapshot# -> WordOffset -> (# Addr#, ByteArray#, Array# b #)
-unpackClosureReferencedByFrame wo1 ss# wo2 = unpackClosureReferencedByFrame# (wordOffsetToWord# wo1) ss# (wordOffsetToWord# wo2)
-
 foreign import prim "getUnderflowFrameNextChunkzh" getUnderflowFrameNextChunk# :: StackSnapshot# -> Word# -> StackSnapshot#
 
 getUnderflowFrameNextChunk :: StackFrameIter -> StackSnapshot
@@ -102,9 +155,9 @@ data StackFrameIter = StackFrameIter
     index :: WordOffset
   }
 
--- TODO: Remove this instance (debug only)
-instance Show StackFrameIter where
-  show (StackFrameIter {..}) = "StackFrameIter " ++ "(StackSnapshot _" ++ " " ++ show index
+-- -- TODO: Remove this instance (debug only)
+-- instance Show StackFrameIter where
+--   show (StackFrameIter {..}) = "StackFrameIter " ++ "(StackSnapshot _" ++ " " ++ show index
 
 -- | Get an interator starting with the top-most stack frame
 stackHead :: StackSnapshot -> StackFrameIter
@@ -125,7 +178,6 @@ data BitmapEntry = BitmapEntry
   { closureFrame :: StackFrameIter,
     isPrimitive :: Bool
   }
-  deriving (Show)
 
 wordsToBitmapEntries :: StackFrameIter -> [Word] -> Word -> [BitmapEntry]
 wordsToBitmapEntries _ [] 0 = []


=====================================
libraries/ghc-heap/GHC/Exts/Heap/Closures.hs
=====================================
@@ -53,6 +53,7 @@ import Numeric
 
 #if MIN_VERSION_base(4,17,0)
 import GHC.Stack.CloneStack (StackSnapshot(..))
+import GHC.Exts.StackConstants
 import Unsafe.Coerce (unsafeCoerce)
 #endif
 
@@ -603,6 +604,20 @@ allClosures _ = []
 -- Includes header and payload. Does not follow pointers.
 --
 -- @since 8.10.1
--- TODO: Handle PrimitiveWordHolder
 closureSize :: Box -> Int
 closureSize (Box x) = I# (closureSize# x)
+#if MIN_VERSION_base(4,17,0)
+closureSize (DecodedBox c) = case c of
+  UpdateFrame {} -> sizeStgUpdateFrame
+  CatchFrame {} -> sizeStgCatchFrame
+  CatchStmFrame {} -> sizeStgCatchSTMFrame
+  CatchRetryFrame {} -> sizeStgCatchRetryFrame
+  AtomicallyFrame {} -> sizeStgAtomicallyFrame
+  RetSmall {..} -> sizeStgClosure + length payload
+  RetBig {..} -> sizeStgClosure + length payload
+  RetFun {..} -> sizeStgRetFunFrame + length retFunPayload
+  -- The one additional word is a pointer to the StgBCO in the closure's payload
+  RetBCO {..} -> sizeStgClosure + 1 + length bcoArgs
+  -- TODO: What to do about other closure types?
+  _ -> error "Unexpected closure type"
+#endif


=====================================
libraries/ghc-heap/GHC/Exts/StackConstants.hsc
=====================================
@@ -26,21 +26,33 @@ offsetStgCatchFrameHandler = byteOffsetToWordOffset $ (#const OFFSET_StgCatchFra
 offsetStgCatchFrameExceptionsBlocked :: WordOffset
 offsetStgCatchFrameExceptionsBlocked = byteOffsetToWordOffset $ (#const OFFSET_StgCatchFrame_exceptions_blocked) + (#size StgHeader)
 
+sizeStgCatchFrame :: Int
+sizeStgCatchFrame = bytesToWords $ (#const SIZEOF_StgCatchFrame_NoHdr) + (#size StgHeader)
+
 offsetStgCatchSTMFrameCode :: WordOffset
 offsetStgCatchSTMFrameCode = byteOffsetToWordOffset $ (#const OFFSET_StgCatchSTMFrame_code) + (#size StgHeader)
 
 offsetStgCatchSTMFrameHandler :: WordOffset
 offsetStgCatchSTMFrameHandler = byteOffsetToWordOffset $ (#const OFFSET_StgCatchSTMFrame_handler) + (#size StgHeader)
 
+sizeStgCatchSTMFrame :: Int
+sizeStgCatchSTMFrame = bytesToWords $ (#const SIZEOF_StgCatchSTMFrame_NoHdr) + (#size StgHeader)
+
 offsetStgUpdateFrameUpdatee :: WordOffset
 offsetStgUpdateFrameUpdatee = byteOffsetToWordOffset $ (#const OFFSET_StgUpdateFrame_updatee) + (#size StgHeader)
 
+sizeStgUpdateFrame :: Int
+sizeStgUpdateFrame = bytesToWords $ (#const SIZEOF_StgUpdateFrame_NoHdr) + (#size StgHeader)
+
 offsetStgAtomicallyFrameCode :: WordOffset
 offsetStgAtomicallyFrameCode = byteOffsetToWordOffset $ (#const OFFSET_StgAtomicallyFrame_code) + (#size StgHeader)
 
 offsetStgAtomicallyFrameResult :: WordOffset
 offsetStgAtomicallyFrameResult = byteOffsetToWordOffset $ (#const OFFSET_StgAtomicallyFrame_result) + (#size StgHeader)
 
+sizeStgAtomicallyFrame :: Int
+sizeStgAtomicallyFrame = bytesToWords $ (#const SIZEOF_StgAtomicallyFrame_NoHdr) + (#size StgHeader)
+
 offsetStgCatchRetryFrameRunningAltCode :: WordOffset
 offsetStgCatchRetryFrameRunningAltCode = byteOffsetToWordOffset $ (#const OFFSET_StgCatchRetryFrame_running_alt_code) + (#size StgHeader)
 
@@ -50,6 +62,9 @@ offsetStgCatchRetryFrameRunningFirstCode = byteOffsetToWordOffset $ (#const OFFS
 offsetStgCatchRetryFrameAltCode :: WordOffset
 offsetStgCatchRetryFrameAltCode = byteOffsetToWordOffset $ (#const OFFSET_StgCatchRetryFrame_alt_code) + (#size StgHeader)
 
+sizeStgCatchRetryFrame :: Int
+sizeStgCatchRetryFrame = bytesToWords $ (#const SIZEOF_StgCatchRetryFrame_NoHdr) + (#size StgHeader)
+
 offsetStgRetFunFrameSize :: WordOffset
 -- StgRetFun has no header, but only a pointer to the info table at the beginning.
 offsetStgRetFunFrameSize = byteOffsetToWordOffset $ (#const OFFSET_StgRetFun_size)
@@ -60,6 +75,9 @@ offsetStgRetFunFrameFun = byteOffsetToWordOffset $ (#const OFFSET_StgRetFun_fun)
 offsetStgRetFunFramePayload :: WordOffset
 offsetStgRetFunFramePayload = byteOffsetToWordOffset $ (#const OFFSET_StgRetFun_payload)
 
+sizeStgRetFunFrame :: Int
+sizeStgRetFunFrame = bytesToWords (#const SIZEOF_StgRetFun)
+
 offsetStgBCOFrameInstrs :: ByteOffset
 offsetStgBCOFrameInstrs = (#const OFFSET_StgBCO_instrs) + (#size StgHeader)
 
@@ -78,12 +96,20 @@ offsetStgBCOFrameSize = (#const OFFSET_StgBCO_size) + (#size StgHeader)
 offsetStgClosurePayload :: WordOffset
 offsetStgClosurePayload = byteOffsetToWordOffset $ (#const OFFSET_StgClosure_payload) + (#size StgHeader)
 
+sizeStgClosure :: Int
+sizeStgClosure = bytesToWords (#size StgHeader)
+
 byteOffsetToWordOffset :: ByteOffset -> WordOffset
-byteOffsetToWordOffset bo = if bo `mod` bytesInWord == 0 then
-                              fromIntegral $ bo `div` bytesInWord
-                            else
-                              error "Unexpected struct alignment!"
-  where
-        bytesInWord = (#const SIZEOF_VOID_P)
+byteOffsetToWordOffset = WordOffset . bytesToWords . fromInteger . toInteger
+
+bytesToWords :: Int -> Int
+bytesToWords b =
+  if b `mod` bytesInWord == 0 then
+      fromIntegral $ b `div` bytesInWord
+    else
+      error "Unexpected struct alignment!"
+
+bytesInWord :: Int
+bytesInWord = (#const SIZEOF_VOID_P)
 
 #endif


=====================================
libraries/ghc-heap/cbits/Stack.cmm
=====================================
@@ -120,14 +120,6 @@ getRetFunLargeBitmapzh(P_ stack, W_ offsetWords){
   return (stgArrBytes, size);
 }
 
-unpackClosureReferencedByFramezh(W_ offsetWordsInFrame, P_ stack, W_ offsetWordsBase){
-  P_ closurePtrAddr, closurePtr;
-  closurePtrAddr = (StgStack_sp(stack) + WDS(offsetWordsBase) + WDS(offsetWordsInFrame));
-  closurePtr = P_[closurePtrAddr];
-  ASSERT(LOOKS_LIKE_CLOSURE_PTR(closurePtr));
-  jump stg_unpackClosurezh(closurePtr);
-}
-
 getUpdateFrameTypezh(P_ stack, W_ offsetWords){
   P_ c;
   c = StgStack_sp(stack) + WDS(offsetWords);


=====================================
libraries/ghc-heap/tests/stack_misc_closures.hs
=====================================
@@ -62,8 +62,8 @@ foreign import ccall "maxSmallBitmapBits" maxSmallBitmapBits_c :: Word
 
 foreign import ccall "belchStack" belchStack# :: StackSnapshot# -> IO ()
 
-{-
-__Test stategy:__
+{- Test stategy
+   ~~~~~~~~~~~~
 
 - Create @StgStack at s in C that contain two closures (as they are on stack they
 may also be called "frames"). A stop frame and the frame which's decoding should
@@ -98,6 +98,7 @@ main = do
         assertEqual knownUpdateFrameType NormalUpdateFrame
         assertEqual 1 =<< (getWordFromBlackhole =<< getBoxedClosureData updatee)
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_update_frame# 2
   test any_catch_frame# $
     \case
       CatchFrame {..} -> do
@@ -105,6 +106,7 @@ main = do
         assertEqual exceptions_blocked 1
         assertConstrClosure 1 =<< getBoxedClosureData handler
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_catch_frame# 3
   test any_catch_stm_frame# $
     \case
       CatchStmFrame {..} -> do
@@ -112,6 +114,7 @@ main = do
         assertConstrClosure 1 =<< getBoxedClosureData catchFrameCode
         assertConstrClosure 2 =<< getBoxedClosureData handler
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_catch_stm_frame# 3
   test any_catch_retry_frame# $
     \case
       CatchRetryFrame {..} -> do
@@ -120,6 +123,7 @@ main = do
         assertConstrClosure 1 =<< getBoxedClosureData first_code
         assertConstrClosure 2 =<< getBoxedClosureData alt_code
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_catch_retry_frame# 4
   test any_atomically_frame# $
     \case
       AtomicallyFrame {..} -> do
@@ -127,6 +131,7 @@ main = do
         assertConstrClosure 1 =<< getBoxedClosureData atomicallyFrameCode
         assertConstrClosure 2 =<< getBoxedClosureData result
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_atomically_frame# 3
   -- TODO: Test for UnderflowFrame once it points to a Box payload
   test any_ret_small_prim_frame# $
     \case
@@ -137,6 +142,7 @@ main = do
         assertEqual (length pCs) 1
         assertUnknownTypeWordSizedPrimitive 1 (head pCs)
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_small_prim_frame# 2
   test any_ret_small_closure_frame# $
     \case
       RetSmall {..} -> do
@@ -146,6 +152,7 @@ main = do
         assertEqual (length pCs) 1
         assertConstrClosure 1 (head pCs)
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_small_closure_frame# 2
   test any_ret_small_closures_frame# $
     \case
       RetSmall {..} -> do
@@ -156,6 +163,7 @@ main = do
         let wds = map getWordFromConstr01 pCs
         assertEqual wds [1 .. 58]
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_small_closures_frame# (1 + fromIntegral maxSmallBitmapBits_c)
   test any_ret_small_prims_frame# $
     \case
       RetSmall {..} -> do
@@ -166,6 +174,7 @@ main = do
         let wds = map getWordFromUnknownTypeWordSizedPrimitive pCs
         assertEqual wds [1 .. 58]
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_small_prims_frame# (1 + fromIntegral maxSmallBitmapBits_c)
   test any_ret_big_prims_min_frame# $
     \case
       RetBig {..} -> do
@@ -175,15 +184,7 @@ main = do
         let wds = map getWordFromUnknownTypeWordSizedPrimitive pCs
         assertEqual wds [1 .. 59]
       e -> error $ "Wrong closure type: " ++ show e
-  test any_ret_big_prims_min_frame# $
-    \case
-      RetBig {..} -> do
-        assertEqual (tipe info) RET_BIG
-        pCs <- mapM getBoxedClosureData payload
-        assertEqual (length pCs) 59
-        let wds = map getWordFromUnknownTypeWordSizedPrimitive pCs
-        assertEqual wds [1 .. 59]
-      e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_big_prims_min_frame# (minBigBitmapBits + 1)
   test any_ret_big_closures_min_frame# $
     \case
       RetBig {..} -> do
@@ -193,15 +194,18 @@ main = do
         let wds = map getWordFromConstr01 pCs
         assertEqual wds [1 .. 59]
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_big_closures_min_frame# (minBigBitmapBits + 1)
   test any_ret_big_closures_two_words_frame# $
     \case
       RetBig {..} -> do
         assertEqual (tipe info) RET_BIG
         pCs <- mapM getBoxedClosureData payload
-        assertEqual (length pCs) 65
+        let closureCount = 64 + 1
+        assertEqual (length pCs) closureCount
         let wds = map getWordFromConstr01 pCs
-        assertEqual wds [1 .. 65]
+        assertEqual wds [1 .. (fromIntegral closureCount)]
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_big_closures_two_words_frame# (64 + 1 + 1)
   test any_ret_fun_arg_n_prim_framezh# $
     \case
       RetFun {..} -> do
@@ -232,6 +236,7 @@ main = do
         let wds = map getWordFromConstr01 pCs
         assertEqual wds [1 .. 9]
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_ret_fun_arg_gen_framezh# (3 + 9)
   test any_ret_fun_arg_gen_big_framezh# $
     \case
       RetFun {..} -> do
@@ -249,6 +254,7 @@ main = do
         assertEqual (length pCs) 59
         let wds = map getWordFromConstr01 pCs
         assertEqual wds [1 .. 59]
+  testSize any_ret_fun_arg_gen_big_framezh# (3 + 59)
   test any_bco_frame# $
     \case
       RetBCO {..} -> do
@@ -271,6 +277,7 @@ main = do
               ] bitmap
           e -> error $ "Wrong closure type: " ++ show e
       e -> error $ "Wrong closure type: " ++ show e
+  testSize any_bco_frame# 3
 
 type SetupFunction = State# RealWorld -> (# State# RealWorld, StackSnapshot# #)
 
@@ -300,6 +307,7 @@ test setup assertion = do
     assert sn stack = do
       assertStackInvariants sn stack
       assertEqual (length stack) 2
+      -- TODO: Isn't this also a stack invariant? (assertStackInvariants)
       assertThat
         "Last frame is stop frame"
         ( \case
@@ -309,6 +317,12 @@ test setup assertion = do
         (last stack)
       assertion $ head stack
 
+testSize :: HasCallStack => SetupFunction -> Int -> IO ()
+testSize setup expectedSize = do
+  sn <- getStackSnapshot setup
+  (SimpleStack boxedFrames) <- decodeStack sn
+  assertEqual expectedSize (closureSize (head boxedFrames))
+
 -- | Get a `StackSnapshot` from test setup
 --
 -- This function mostly resembles `cloneStack`. Though, it doesn't clone, but
@@ -375,6 +389,9 @@ assertUnknownTypeWordSizedPrimitive w c = case c of
 unboxSingletonTuple :: (# StackSnapshot# #) -> StackSnapshot#
 unboxSingletonTuple (# s# #) = s#
 
+minBigBitmapBits :: Num a => a
+minBigBitmapBits = 1 + fromIntegral maxSmallBitmapBits_c
+
 -- | A function with 59 arguments
 --
 -- A small bitmap has @64 - 6 = 58@ entries on 64bit machines. On 32bit machines



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fc7e050bf60aa355f5d70cfd4608a317004391d6...2a75318f497dcf39956bc5c83c52886c234fede4

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fc7e050bf60aa355f5d70cfd4608a317004391d6...2a75318f497dcf39956bc5c83c52886c234fede4
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230128/205baaaa/attachment-0001.html>


More information about the ghc-commits mailing list