[Git][ghc/ghc][wip/T22264] 6 commits: rts: Drop racy assertion
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Tue Dec 6 12:29:19 UTC 2022
Ben Gamari pushed to branch wip/T22264 at Glasgow Haskell Compiler / GHC
Commits:
b57fd39a by Ben Gamari at 2022-12-06T07:29:00-05:00
rts: Drop racy assertion
0e274c39bf836d5bb846f5fa08649c75f85326ac added an assertion in
`dirty_MUT_VAR` checking that the MUT_VAR being dirtied was clean.
However, this isn't necessarily the case since another thread may have
raced us to dirty the object.
- - - - -
4fc3d778 by Ben Gamari at 2022-12-06T07:29:00-05:00
rts: Drop SM spinlock
- - - - -
8334376c by Ben Gamari at 2022-12-06T07:29:00-05:00
rts: C++ typing issues
Make the RTS compilable with a C++ compiler by inserting necessary
casts.
- - - - -
d4209888 by Ben Gamari at 2022-12-06T07:29:00-05:00
CheckGC
- - - - -
9828dd44 by Ben Gamari at 2022-12-06T07:29:00-05:00
setNumCapabilities
- - - - -
14f1045d by Ben Gamari at 2022-12-06T07:29:00-05:00
nonmoving: Disable shortcutting
- - - - -
26 changed files:
- libraries/base/GHC/Conc/Sync.hs
- rts/Capability.h
- rts/CheckUnload.c
- rts/Schedule.c
- rts/Schedule.h
- rts/Sparks.h
- rts/Stats.c
- rts/Trace.h
- rts/include/rts/Threads.h
- rts/include/rts/storage/MBlock.h
- rts/rts.cabal.in
- rts/sm/BlockAlloc.c
- + rts/sm/CheckGc.cpp
- rts/sm/GC.c
- rts/sm/GC.h
- rts/sm/GCUtils.c
- rts/sm/GCUtils.h
- rts/sm/HeapAlloc.h
- rts/sm/MarkStack.h
- rts/sm/NonMoving.c
- rts/sm/NonMovingMark.c
- rts/sm/NonMovingMark.h
- rts/sm/NonMovingShortcut.c
- rts/sm/Sanity.h
- rts/sm/Scav.c
- rts/sm/Storage.c
Changes:
=====================================
libraries/base/GHC/Conc/Sync.hs
=====================================
@@ -380,10 +380,15 @@ to avoid contention with other processes in the machine.
setNumCapabilities :: Int -> IO ()
setNumCapabilities i
| i <= 0 = failIO $ "setNumCapabilities: Capability count ("++show i++") must be positive"
- | otherwise = c_setNumCapabilities (fromIntegral i)
+ | otherwise = do
+ ret <- c_setNumCapabilities (fromIntegral i)
+ case ret of
+ 0 -> return ()
+ 1 -> yield >> setNumCapabilities i
+ _ -> failIO $ "setNumCapabilities: Unknown result"
foreign import ccall safe "setNumCapabilities"
- c_setNumCapabilities :: CUInt -> IO ()
+ c_setNumCapabilities :: CUInt -> IO CInt
-- | Returns the number of CPUs that the machine has
--
=====================================
rts/Capability.h
=====================================
@@ -20,10 +20,10 @@
#pragma once
-#include "sm/GC.h" // for evac_fn
#include "Task.h"
#include "Sparks.h"
-#include "sm/NonMovingMark.h" // for MarkQueue
+#include "sm/GC.h" // for evac_fn
+#include "sm/NonMovingMark.h" // for UpdRemSet
#include "IOManager.h" // for CapIOManager
#include "BeginPrivate.h"
=====================================
rts/CheckUnload.c
=====================================
@@ -15,6 +15,7 @@
#include "Hash.h"
#include "LinkerInternals.h"
#include "CheckUnload.h"
+#include "sm/HeapAlloc.h"
#include "sm/Storage.h"
#include "sm/GCThread.h"
#include "sm/HeapUtils.h"
=====================================
rts/Schedule.c
=====================================
@@ -2225,9 +2225,12 @@ forkProcess(HsStablePtr *entry
* Finally we release the Capabilities we are holding, and start
* worker Tasks on the new Capabilities we created.
*
+ * One wrinkle here is that we must also ensure that we don't change the
+ * capability count while the nonmoving mark thread is active.
+ *
* ------------------------------------------------------------------------- */
-void
+int
setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
{
#if !defined(THREADED_RTS)
@@ -2247,11 +2250,15 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
Capability *old_capabilities = NULL;
uint32_t old_n_capabilities = n_capabilities;
+ if (RELAXED_LOAD(&concurrent_coll_running)) {
+ return 1;
+ }
+
if (new_n_capabilities == enabled_capabilities) {
- return;
+ return 0;
} else if (new_n_capabilities <= 0) {
errorBelch("setNumCapabilities: Capability count must be positive");
- return;
+ return 1;
}
@@ -2353,6 +2360,7 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
rts_unlock(cap);
+ return 0;
#endif // THREADED_RTS
}
=====================================
rts/Schedule.h
=====================================
@@ -131,7 +131,7 @@ setRecentActivity(enum RecentActivity new_value)
INLINE_HEADER enum RecentActivity
getRecentActivity(void)
{
- return RELAXED_LOAD_ALWAYS(&recent_activity);
+ return (enum RecentActivity) RELAXED_LOAD_ALWAYS(&recent_activity);
}
extern bool heap_overflow;
=====================================
rts/Sparks.h
=====================================
@@ -8,6 +8,7 @@
#pragma once
+#include "sm/GC.h" // for evac_fn
#include "WSDeque.h"
#include "BeginPrivate.h"
=====================================
rts/Stats.c
=====================================
@@ -963,11 +963,6 @@ static void report_summary(const RTSSummaryStats* sum)
, col_width[1], "SpinLock"
, col_width[2], "Spins"
, col_width[3], "Yields");
- statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
- , col_width[0], ""
- , col_width[1], "gc_alloc_block_sync"
- , col_width[2], gc_alloc_block_sync.spin
- , col_width[3], gc_alloc_block_sync.yield);
statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
, col_width[0], ""
, col_width[1], "whitehole_gc"
@@ -1142,10 +1137,6 @@ static void report_machine_readable (const RTSSummaryStats * sum)
// next, internal counters
#if defined(PROF_SPIN)
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
- MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
- gc_alloc_block_sync.yield);
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin);
MR_STAT("waitForGcThreads_yield", FMT_Word64,
waitForGcThreads_yield);
@@ -1572,9 +1563,6 @@ SpinLock:
Not all of these are actual SpinLocks, see the details below.
Actual SpinLocks:
-* gc_alloc_block:
- This SpinLock protects the block allocator and free list manager. See
- BlockAlloc.c.
* gen[g].sync:
These SpinLocks, one per generation, protect the generations[g] data
structure during garbage collection.
=====================================
rts/Trace.h
=====================================
@@ -33,7 +33,9 @@ void tracingAddCapapilities (uint32_t from, uint32_t to);
#endif /* TRACING */
typedef StgWord32 CapsetID;
+#if !defined(__cplusplus)
typedef StgWord16 CapsetType;
+#endif
enum CapsetType { CapsetTypeCustom = CAPSET_TYPE_CUSTOM,
CapsetTypeOsProcess = CAPSET_TYPE_OSPROCESS,
CapsetTypeClockdomain = CAPSET_TYPE_CLOCKDOMAIN };
=====================================
rts/include/rts/Threads.h
=====================================
@@ -85,4 +85,4 @@ extern Capability MainCapability;
// Change the number of capabilities (only supports increasing the
// current value at the moment).
//
-extern void setNumCapabilities (uint32_t new_);
+extern int setNumCapabilities (uint32_t new_);
=====================================
rts/include/rts/storage/MBlock.h
=====================================
@@ -26,7 +26,3 @@ extern void freeAllMBlocks(void);
extern void *getFirstMBlock(void **state);
extern void *getNextMBlock(void **state, void *mblock);
-#if defined(THREADED_RTS)
-// needed for HEAP_ALLOCED below
-extern SpinLock gc_alloc_block_sync;
-#endif
=====================================
rts/rts.cabal.in
=====================================
@@ -619,6 +619,7 @@ library
linker/elf_tlsgd.c
linker/elf_util.c
sm/BlockAlloc.c
+ sm/CheckGc.cpp
sm/CNF.c
sm/Compact.c
sm/Evac.c
=====================================
rts/sm/BlockAlloc.c
=====================================
@@ -438,6 +438,7 @@ alloc_mega_group (uint32_t node, StgWord mblocks)
bdescr *best, *bd;
StgWord n;
+ ASSERT_SM_LOCK();
n = MBLOCK_GROUP_BLOCKS(mblocks);
if(defer_mblock_frees)
@@ -496,6 +497,7 @@ allocGroupOnNode (uint32_t node, W_ n)
bdescr *bd, *rem;
StgWord ln;
+ ASSERT_SM_LOCK();
if (n == 0) barf("allocGroup: requested zero blocks");
if (n >= BLOCKS_PER_MBLOCK)
@@ -709,6 +711,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
{
bdescr *bd;
StgWord ln, lnmax;
+ ASSERT_SM_LOCK();
if (min >= BLOCKS_PER_MBLOCK) {
return allocGroupOnNode(node,max);
@@ -933,8 +936,7 @@ freeGroup(bdescr *p)
StgWord ln;
uint32_t node;
- // not true in multithreaded GC:
- // ASSERT_SM_LOCK();
+ ASSERT_SM_LOCK();
ASSERT(RELAXED_LOAD(&p->free) != (P_)-1);
=====================================
rts/sm/CheckGc.cpp
=====================================
@@ -0,0 +1,973 @@
+extern "C" {
+#include "Rts.h"
+#include "StableName.h" /* for FOR_EACH_STABLE_NAME */
+#include "StablePtr.h" /* for markStablePtrTable */
+#include "Schedule.h" /* for markScheduler */
+#include "Capability.h"
+#include "HeapAlloc.h"
+#include "STM.h"
+}
+
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <vector>
+#include <queue>
+#include <unordered_set>
+
+class TaggedClosurePtr {
+ StgClosure *ptr;
+public:
+ TaggedClosurePtr(StgClosure* ptr) : ptr(ptr) {}
+ TaggedClosurePtr(StgClosure* ptr, uint8_t tag) : TaggedClosurePtr(TAG_CLOSURE(tag, ptr)) {}
+
+ StgClosure *get_tagged() const {
+ return ptr;
+ }
+ StgClosure *untag() const {
+ return UNTAG_CLOSURE(ptr);
+ }
+ uint8_t get_tag() const {
+ return (StgWord) ptr & TAG_MASK;
+ }
+
+ //inline StgClosure& operator->() { return *untag(); }
+
+ friend inline bool operator==(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) {
+ return lhs.ptr == rhs.ptr;
+ }
+ friend inline bool operator!=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs == rhs); }
+ friend inline bool operator< (const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) {
+ return lhs.ptr < rhs.ptr;
+ }
+ friend inline bool operator> (const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return rhs < lhs; }
+ friend inline bool operator<=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs > rhs); }
+ friend inline bool operator>=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs < rhs); }
+};
+
+template<>
+struct std::hash<TaggedClosurePtr> {
+ std::size_t operator()(TaggedClosurePtr const& p) const noexcept {
+ return std::hash<StgClosure*>{}(p.get_tagged());
+ }
+};
+
+class HeapVisitor {
+public:
+ // Visit an SRT
+ virtual void visit_srt(StgClosure* c);
+
+ // Visit a normal closure
+ virtual void visit_closure(TaggedClosurePtr c);
+
+ virtual void visit_thunk(StgThunk *thunk, size_t n_ptrs);
+ virtual void visit_fun(StgClosure *constr, size_t n_ptrs);
+ virtual void visit_constr(StgClosure *constr, size_t n_ptrs);
+ virtual void visit_array(StgMutArrPtrs *arr);
+ virtual void visit_small_array(StgSmallMutArrPtrs *arr);
+ virtual void visit_bytearray(StgArrBytes* arr);
+
+ virtual void visit_stack(StgPtr sp, StgPtr end);
+ virtual void visit_tso(StgTSO* tso);
+ virtual void visit_weak(StgWeak* w);
+ virtual void visit_mvar(StgMVar* mvar);
+ virtual void visit_tvar(StgTVar* tvar);
+ virtual void visit_trec_header(StgTRecHeader *trec);
+ virtual void visit_trec_chunk(StgTRecChunk* tc);
+ virtual void visit_continuation(StgContinuation* tc);
+
+ virtual void visit_small_bitmap(StgClosure *const *payload, StgWord bitmap, StgWord size);
+ virtual void visit_large_bitmap(StgClosure *const *payload, const StgLargeBitmap *bitmap, StgWord size);
+ void visit_pap_payload(StgClosure *fun, StgClosure **payload, StgWord n_args);
+
+ virtual void visit_invalid(StgClosure *const c);
+};
+
+void HeapVisitor::visit_thunk(StgThunk *thunk, size_t n_ptrs)
+{
+ const StgInfoTable *info = get_itbl((StgClosure *) thunk);
+ const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info);
+ if (thunk_info->i.srt) {
+ StgClosure *srt = (StgClosure*) GET_SRT(thunk_info);
+ visit_srt(srt);
+ };
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(thunk->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_fun(StgClosure *fun, size_t n_ptrs)
+{
+ const StgInfoTable *info = get_itbl(fun);
+ const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info);
+ if (fun_info->i.srt) {
+ StgClosure *srt = (StgClosure*) GET_SRT(fun_info);
+ visit_srt(srt);
+ };
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(fun->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_constr(StgClosure *constr, size_t n_ptrs)
+{
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(constr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_srt(StgClosure* c)
+{
+ visit_closure(c);
+}
+
+void HeapVisitor::visit_invalid(StgClosure *const _c)
+{
+ abort();
+}
+
+void HeapVisitor::visit_weak(StgWeak* w)
+{
+ visit_closure(w->key);
+ visit_closure(w->value);
+ visit_closure(w->finalizer);
+ visit_closure(w->cfinalizers);
+}
+
+void HeapVisitor::visit_mvar(StgMVar* mvar)
+{
+ visit_closure((StgClosure*) mvar->head);
+ visit_closure((StgClosure*) mvar->tail);
+ visit_closure(mvar->value);
+}
+
+void HeapVisitor::visit_small_array(StgSmallMutArrPtrs *arr)
+{
+ for (StgWord i=0; i < arr->ptrs; i++) {
+ visit_closure(arr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_array(StgMutArrPtrs *arr)
+{
+ for (StgWord i=0; i < arr->ptrs; i++) {
+ visit_closure(arr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_bytearray(StgArrBytes* _arr) { }
+
+void HeapVisitor::visit_tso(StgTSO *tso)
+{
+ if (tso->bound != NULL) {
+
+ visit_closure((StgClosure*) tso->bound->tso);
+ }
+ if (tso->label != NULL) {
+ visit_closure({(StgClosure*) tso->label});
+ }
+ visit_closure((StgClosure*) tso->blocked_exceptions);
+ visit_closure((StgClosure*) tso->bq);
+ visit_closure((StgClosure*) tso->stackobj);
+ visit_closure((StgClosure*) tso->_link);
+ visit_trec_header(tso->trec);
+
+ switch (tso->why_blocked) {
+ case BlockedOnMVar:
+ case BlockedOnMVarRead:
+ case BlockedOnBlackHole:
+ case BlockedOnMsgThrowTo:
+ case NotBlocked:
+ visit_closure(tso->block_info.closure);
+ break;
+ default:
+ break;
+ }
+}
+
+void HeapVisitor::visit_continuation(StgContinuation *cont)
+{
+ visit_stack(cont->stack, cont->stack + cont->stack_size);
+}
+
+void HeapVisitor::visit_tvar(StgTVar *tvar)
+{
+ visit_closure(tvar->current_value);
+ visit_closure((StgClosure*) tvar->first_watch_queue_entry);
+}
+
+void HeapVisitor::visit_trec_header(StgTRecHeader *trec)
+{
+ if (trec == NO_TREC) {
+ return;
+ }
+ visit_trec_chunk(trec->current_chunk);
+ visit_closure((StgClosure*) trec->enclosing_trec);
+}
+
+void HeapVisitor::visit_trec_chunk(StgTRecChunk *tc)
+{
+ if (tc->prev_chunk != END_STM_CHUNK_LIST) {
+ visit_closure((StgClosure*) tc->prev_chunk);
+ }
+
+ for (uint32_t i = 0; i < tc->next_entry_idx; i++) {
+ TRecEntry *e = &tc->entries[i];
+ visit_closure((StgClosure*)e->tvar);
+ visit_closure(e->expected_value);
+ visit_closure(e->new_value);
+ }
+}
+
+void HeapVisitor::visit_stack(StgPtr p, StgPtr stack_end)
+{
+ while (p < stack_end) {
+ const StgRetInfoTable* info = get_ret_itbl((StgClosure *) p);
+
+ auto add_srt_ptrs = [&] () {
+ if (info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(info);
+ visit_srt(srt);
+ }
+ };
+
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ visit_closure(frame->updatee);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+ }
+
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case RET_SMALL:
+ {
+ StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ StgWord size = BITMAP_SIZE(info->i.layout.bitmap);
+ // NOTE: the payload starts immediately after the info-ptr, we
+ // don't have an StgHeader in the same sense as a heap closure.
+ p++;
+ visit_small_bitmap((StgClosure**) p, bitmap, size);
+ p += size;
+ add_srt_ptrs();
+ continue;
+ }
+
+ case RET_BCO:
+ {
+ p++;
+ StgBCO *bco = (StgBCO *)*p;
+ visit_closure((StgClosure *) bco);
+ p++;
+ StgWord size = BCO_BITMAP_SIZE(bco);
+ visit_large_bitmap((StgClosure**) p, BCO_BITMAP(bco), size);
+ p += size;
+ continue;
+ }
+
+ case RET_BIG:
+ {
+ StgWord size = GET_LARGE_BITMAP(&info->i)->size;
+ p++;
+ visit_large_bitmap((StgClosure**) p, GET_LARGE_BITMAP(&info->i), size);
+ p += size;
+ // and don't forget to follow the SRT
+ add_srt_ptrs();
+ break;
+ }
+
+ case RET_FUN:
+ {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ visit_closure(ret_fun->fun);
+
+ const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ {
+ StgWord bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ StgWord size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ visit_small_bitmap(ret_fun->payload, bitmap, size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ case ARG_GEN_BIG:
+ {
+ StgWord size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ visit_large_bitmap(ret_fun->payload, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ default:
+ {
+ StgWord bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ StgWord size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ visit_small_bitmap(ret_fun->payload, bitmap, size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ }
+ add_srt_ptrs();
+ break;
+ }
+ default:
+ abort();
+ }
+ }
+}
+
+void HeapVisitor::visit_small_bitmap(
+ StgClosure *const *payload,
+ StgWord bitmap,
+ StgWord size)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ visit_closure(*payload);
+ }
+ payload++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+}
+
+void HeapVisitor::visit_large_bitmap(
+ StgClosure *const * payload,
+ const StgLargeBitmap *large_bitmap,
+ StgWord size)
+{
+ // Bitmap may have more bits than `size` when scavenging PAP payloads. See
+ // comments around StgPAP.
+ ASSERT(large_bitmap->size >= size);
+
+ uint32_t b = 0;
+ for (uint32_t i = 0; i < size; b++) {
+ StgWord bitmap = large_bitmap->bitmap[b];
+ uint32_t j = stg_min(size-i, BITS_IN(W_));
+ i += j;
+ for (; j > 0; j--, payload++) {
+ if ((bitmap & 1) == 0) {
+ visit_closure(*payload);
+ }
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+void HeapVisitor::visit_pap_payload(
+ StgClosure *fun,
+ StgClosure **payload,
+ StgWord n_args)
+{
+ fun = UNTAG_CLOSURE(fun);
+ const StgFunInfoTable *fun_info = get_fun_itbl(fun);
+ ASSERT(fun_info->i.type != PAP);
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ visit_small_bitmap(payload, BITMAP_BITS(fun_info->f.b.bitmap), n_args);
+ break;
+ case ARG_GEN_BIG:
+ visit_large_bitmap(payload, GET_FUN_LARGE_BITMAP(fun_info), n_args);
+ break;
+ case ARG_BCO:
+ visit_large_bitmap(payload, BCO_BITMAP(fun), n_args);
+ break;
+ default:
+ {
+ StgWord bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ visit_small_bitmap(payload, bitmap, n_args);
+ }
+ }
+}
+
+void HeapVisitor::visit_closure(TaggedClosurePtr tagged)
+{
+ StgClosure *c = tagged.untag();
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa || !LOOKS_LIKE_CLOSURE_PTR(c)) {
+ visit_invalid(c);
+ return;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ auto generic_closure = [&] () {
+ for (StgClosure **p = &c->payload[0]; p < &c->payload[info->layout.payload.ptrs]; p++) {
+ visit_closure(*p);
+ }
+ };
+
+ switch (info->type) {
+
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ visit_mvar((StgMVar *) c);
+ break;
+ case TVAR:
+ visit_tvar((StgTVar *) c);
+ break;
+
+ case IND:
+ case IND_STATIC:
+ visit_closure(((StgInd *) c)->indirectee);
+ break;
+
+ case THUNK_0_1:
+ case THUNK_0_2:
+ visit_thunk((StgThunk*) c, 0);
+ break;
+ case THUNK_1_1:
+ case THUNK_1_0:
+ visit_thunk((StgThunk*) c, 1);
+ break;
+ case THUNK_2_0:
+ visit_thunk((StgThunk*) c, 2);
+ break;
+ case THUNK:
+ visit_thunk((StgThunk*) c, info->layout.payload.ptrs);
+ break;
+ case THUNK_STATIC:
+ visit_thunk((StgThunk*) c, 0);
+ break;
+
+ case FUN_1_0:
+ visit_fun(c, 1);
+ break;
+ case FUN_0_1:
+ case FUN_0_2:
+ visit_fun(c, 0);
+ break;
+ case FUN_1_1:
+ visit_fun(c, 1);
+ break;
+ case FUN_2_0:
+ visit_fun(c, 2);
+ break;
+ case FUN:
+ case FUN_STATIC:
+ visit_fun(c, info->layout.payload.ptrs);
+ break;
+
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ visit_constr(c, 0);
+ break;
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ visit_constr(c, 1);
+ break;
+ case CONSTR_2_0:
+ visit_constr(c, 2);
+ break;
+ case CONSTR:
+ case CONSTR_NOCAF:
+ visit_constr(c, info->layout.payload.ptrs);
+ break;
+
+ case PRIM:
+ generic_closure();
+ break;
+ case WEAK:
+ visit_weak((StgWeak*) c);
+ break;
+ case BCO:
+ {
+ StgBCO *bco = (StgBCO *)c;
+ visit_closure((StgClosure*) bco->instrs);
+ visit_closure((StgClosure*) bco->literals);
+ visit_closure((StgClosure*) bco->ptrs);
+ break;
+ }
+ case BLACKHOLE:
+ {
+ StgInd *ind = (StgInd*) c;
+ visit_closure(ind->indirectee);
+ break;
+ }
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ {
+ StgMutVar *mv = (StgMutVar*) c;
+ visit_closure(mv->var);
+ break;
+ }
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)c;
+ visit_closure((StgClosure*) bq->bh);
+ visit_closure((StgClosure*) bq->owner);
+ visit_closure((StgClosure*) bq->queue);
+ visit_closure((StgClosure*) bq->link);
+ break;
+ }
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)c;
+ visit_closure(s->selectee);
+ break;
+ }
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)c;
+ visit_closure(ap->fun);
+ visit_stack((StgPtr) ap->payload, (StgPtr) ap->payload + ap->size);
+ break;
+ }
+ case PAP:
+ {
+ StgPAP *pap = (StgPAP*) c;
+ visit_closure(pap->fun);
+ visit_pap_payload(pap->fun, (StgClosure**) pap->payload, pap->n_args);
+ break;
+ }
+ case AP:
+ {
+ StgAP *ap = (StgAP*) c;
+ visit_closure(ap->fun);
+ visit_pap_payload(ap->fun, (StgClosure**) ap->payload, ap->n_args);
+ break;
+ }
+ case ARR_WORDS:
+ visit_bytearray((StgArrBytes *) c);
+ break;
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ visit_array((StgMutArrPtrs *) c);
+ break;
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ visit_small_array((StgSmallMutArrPtrs *) c);
+ break;
+ case TSO:
+ visit_tso((StgTSO *) c);
+ break;
+ case STACK:
+ {
+ StgStack *stack = (StgStack *) c;
+ visit_stack(stack->sp, stack->stack + stack->stack_size);
+ break;
+ }
+ case MUT_PRIM:
+ generic_closure();
+ break;
+ case TREC_CHUNK:
+ visit_trec_chunk((StgTRecChunk *) c);
+ break;
+ case CONTINUATION:
+ visit_continuation((StgContinuation *) c);
+ break;
+ default:
+ visit_invalid(c);
+ break;
+ }
+}
+
+class PredicatedHeapVisitor : HeapVisitor {
+ bool should_visit(StgClosure *);
+
+ virtual void visit_srt(StgClosure* c) {
+ if (should_visit(c)) { HeapVisitor::visit_srt(c); }
+ }
+
+ virtual void visit_closure(TaggedClosurePtr c) {
+ if (should_visit(c.untag())) { HeapVisitor::visit_closure(c); }
+ }
+};
+
+// Collect direct pointers
+struct CollectPointers : HeapVisitor {
+ std::set<TaggedClosurePtr> accum;
+ bool invalid;
+ CollectPointers() : accum(), invalid(false) {}
+ void visit_root(StgClosure *c) {
+ HeapVisitor::visit_closure(c);
+ }
+ void visit_closure(TaggedClosurePtr c) {
+ accum.insert(c);
+ }
+ void visit_invalid(StgClosure *const _c) { invalid = true; }
+};
+
+static std::set<TaggedClosurePtr> collect_pointers(StgClosure* c)
+{
+ CollectPointers v;
+ v.visit_root(c);
+ return v.accum;
+}
+
+
+
+struct Error {
+ StgClosure *closure;
+ std::string what;
+ Error(StgClosure *closure, std::string what) : closure(closure), what(what) {}
+};
+
+static std::ostream& operator<<(std::ostream& os, const Error& err) {
+ os << std::hex << "0x" << (StgWord) err.closure << ": " << err.what << "\n";
+ return os;
+}
+
+class CheckVisitor : HeapVisitor {
+ std::vector<Error> errors;
+ uint8_t tag;
+ void visit_constr(StgClosure* c) {
+ const StgInfoTable *info = get_itbl(c);
+ if (tag != 0) {
+ uint8_t constr_tag = info->srt; // zero-based
+ if (tag != std::min(TAG_MASK, constr_tag+1)) {
+ errors.push_back(Error(c, "invalid tag"));
+ }
+ }
+ }
+
+ void visit_closure(TaggedClosurePtr c) { }
+public:
+ const std::vector<Error>& get_errors() const { return errors; }
+
+ void check_closure(TaggedClosurePtr c) {
+ tag = c.get_tag();
+ HeapVisitor::visit_closure(c);
+ }
+};
+
+struct CheckGc {
+ std::queue<TaggedClosurePtr> queue;
+ std::unordered_set<TaggedClosurePtr> enqueued;
+
+ void enqueue(TaggedClosurePtr ptr) {
+ ASSERT(ptr != NULL);
+ if (!is_enqueued(ptr)) {
+ queue.push(ptr);
+ enqueued.insert(ptr);
+ }
+ }
+
+ bool finished() {
+ return queue.empty();
+ }
+
+ TaggedClosurePtr pop() {
+ TaggedClosurePtr p = queue.front();
+ queue.pop();
+ return p;
+ }
+
+ bool is_enqueued(TaggedClosurePtr ptr) {
+ return enqueued.find(ptr) != enqueued.end();
+ }
+};
+
+static void enqueue_root(void *user_data, StgClosure **root)
+{
+ CheckGc* env = (CheckGc*) user_data;
+ env->enqueue(*root);
+}
+
+static void enqueue_roots(CheckGc& env)
+{
+ FOR_EACH_STABLE_NAME(p, if (p->sn_obj) env.enqueue(p->sn_obj););
+ markStablePtrTable(enqueue_root, &env);
+ for (uint32_t n = 0; n < getNumCapabilities(); n++) {
+ markCapability(enqueue_root, (void*) &env, getCapability(n), false/*mark sparks*/);
+ }
+ markCAFs(enqueue_root, &env);
+ markScheduler(enqueue_root, &env);
+
+ for (StgWeak *w = nonmoving_weak_ptr_list; w != NULL; w = w->link) {
+ env.enqueue((StgClosure *) w);
+ }
+
+ for (uint32_t g = 0; g <= N; g++) {
+ generation *gen = &generations[g];
+ for (StgWeak *w = gen->weak_ptr_list; w != NULL; w = RELAXED_LOAD(&w->link)) {
+ env.enqueue((StgClosure *) w);
+ }
+ }
+}
+
+extern "C" {
+void check_gc();
+}
+
+struct NodeName {
+ const StgClosure *c;
+ NodeName(const StgClosure *c) : c(c) {}
+};
+
+static std::ostream& operator<<(std::ostream& os, const NodeName& n) {
+ os << std::hex << "\"" << n.c << "\"" << std::dec;
+ return os;
+}
+
+static void dump_heap(std::ofstream& of)
+{
+ of << "digraph {\n";
+ CheckGc env;
+ enqueue_roots(env);
+ while (!env.finished()) {
+ TaggedClosurePtr tagged = env.pop();
+ StgClosure* c = tagged.untag();
+ NodeName n(c);
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa) {
+ of << n << " [type=invalid];\n";
+ continue;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ switch (info->type) {
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF:
+ {
+ const StgConInfoTable *con_info = get_con_itbl(c);
+ of << n << " [type=CONSTR constr=\"" << GET_CON_DESC(con_info) << "\"];\n";
+ break;
+ }
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ of << n << " [type=FUN];\n";
+ break;
+ case FUN_STATIC:
+ of << n << " [type=FUN_STATIC];\n";
+ break;
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ of << n << " [type=THUNK];\n";
+ break;
+ case THUNK_STATIC:
+ of << n << " [type=THUNK_STATIC];\n";
+ break;
+ case THUNK_SELECTOR:
+ of << n << " [type=THUNK_SEL];\n";
+ break;
+ case BCO:
+ of << n << " [type=BCO];\n";
+ break;
+ case AP:
+ of << n << " [type=AP];\n";
+ break;
+ case PAP:
+ of << n << " [type=PAP];\n";
+ break;
+ case AP_STACK:
+ of << n << " [type=AP_STACK];\n";
+ break;
+ case IND:
+ of << n << " [type=IND];\n";
+ break;
+ case IND_STATIC:
+ of << n << " [type=IND_STATIC];\n";
+ break;
+ case BLOCKING_QUEUE:
+ of << n << " [type=BLOCKING_QUEUE];\n";
+ break;
+ case BLACKHOLE:
+ of << n << " [type=BLACKHOLE];\n";
+ break;
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ of << n << " [type=MVAR];\n";
+ break;
+ case TVAR:
+ of << n << " [type=TVAR];\n";
+ break;
+ case ARR_WORDS:
+ of << n << " [type=ARR_WORDS];\n";
+ break;
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ of << n << " [type=MUT_ARR_PTRS];\n";
+ break;
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ of << n << " [type=SMALL_MUT_ARR_PTRS];\n";
+ break;
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ of << n << " [type=MUT_VAR];\n";
+ break;
+ case WEAK:
+ of << n << " [type=WEAK];\n";
+ break;
+ case PRIM:
+ of << n << " [type=PRIM];\n";
+ break;
+ case MUT_PRIM:
+ of << n << " [type=MUT_PRIM];\n";
+ break;
+ case TSO:
+ of << n << " [type=TSO];\n";
+ break;
+ case STACK:
+ of << n << " [type=STACK];\n";
+ break;
+ case TREC_CHUNK:
+ of << n << " [type=TREC_CHUNK];\n";
+ break;
+ case WHITEHOLE:
+ of << n << " [type=WHITEHOLE];\n";
+ break;
+ case COMPACT_NFDATA:
+ of << n << " [type=COMPACT_NFDATA];\n";
+ break;
+ case CONTINUATION:
+ of << n << " [type=CONTINUATION];\n";
+ break;
+ default:
+ of << n << " [type=unknown];\n";
+ break;
+ }
+
+ if (!HEAP_ALLOCED((StgPtr) c)) {
+ of << n << " [static=yes];\n";
+ } else {
+ bdescr *bd = Bdescr((StgPtr) c);
+ of << n << " [gen=" << bd->gen_no << "];\n";
+ if (bd->flags & BF_EVACUATED) {
+ of << n << " [evacuated=yes];\n";
+ }
+ if (bd->flags & BF_PINNED) {
+ of << n << " [pinned=yes];\n";
+ }
+ if (bd->flags & BF_LARGE) {
+ of << n << " [large=yes];\n";
+ } else if (bd->flags & BF_NONMOVING) {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) c);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) c);
+ uint8_t mark = nonmovingGetMark(seg, block_idx);
+ StgClosure *snapshot_loc =
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
+ if (c > snapshot_loc) {
+ of << n << " [nonmoving=yes new=yes mark=" << (StgWord) mark << "];\n";
+ } else {
+ of << n << " [nonmoving=yes mark=" << (StgWord) mark << "];\n";
+ }
+ } else {
+ of << n << " [moving=yes];\n";
+ }
+ }
+ for (TaggedClosurePtr p : collect_pointers(c)) {
+ of << n << " -> " << NodeName(p.untag()) << ";\n";
+ env.enqueue(p);
+ }
+ }
+ of << "}\n";
+}
+
+void dump_heap_to(const char *fname);
+void dump_heap_to(const char *fname)
+{
+ std::ofstream out(fname);
+ dump_heap(out);
+ out.flush();
+}
+
+void check_gc()
+{
+ CheckGc env;
+ enqueue_roots(env);
+ std::vector<Error> errors;
+
+ while (!env.finished()) {
+ TaggedClosurePtr tagged = env.pop();
+ StgClosure* c = tagged.untag();
+
+ {
+ CheckVisitor check;
+ check.check_closure(tagged);
+ for (const Error& e : check.get_errors()) {
+ errors.push_back(e);
+ }
+ }
+
+ for (TaggedClosurePtr p : collect_pointers(c)) {
+ env.enqueue(p);
+ }
+
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa) {
+ errors.push_back(Error(c, "is invalid closure"));
+ continue;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ if (!HEAP_ALLOCED((StgPtr) c)) {
+ switch (info->type) {
+ case THUNK_STATIC:
+ if (info->srt != 0) {
+
+ }
+ }
+ } else {
+ bdescr *bd = Bdescr((StgPtr) c);
+ if (bd->gen_no < 1) {
+ /* nothing to check as we are focused on post nonmoving-GC checking */
+ } else if (bd->flags & BF_NONMOVING && bd->flags & BF_LARGE) {
+ if (bd->flags & BF_NONMOVING_SWEEPING && !(bd->flags & BF_MARKED)) {
+ errors.push_back(Error(c, "is not marked yet being swept"));
+ }
+ } else if (bd->flags & BF_NONMOVING) {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) c);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) c);
+ uint8_t mark = nonmovingGetMark(seg, block_idx);
+ StgClosure *snapshot_loc =
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
+ if (bd->flags & BF_NONMOVING_SWEEPING) {
+ /* in a swept segment */
+ if (mark != nonmovingMarkEpoch) {
+ errors.push_back(Error(c, "is unmarked nonmoving object being swept"));
+ }
+ } else if (c < snapshot_loc) {
+ /* not in a swept segment but in the snapshot */
+ if (mark != nonmovingMarkEpoch) {
+ errors.push_back(Error(c, "is unmarked nonmoving object in the snapshot"));
+ }
+ } else {
+ /* not in the snapshot; nothing to assert */
+ }
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_MARKED)) {
+ errors.push_back(Error(c, "is unmarked large object"));
+ }
+ } else {
+ if (!(bd->flags & BF_EVACUATED)) {
+ //errors.push_back(Error(c, "is in from-space block"));
+ }
+ }
+ }
+ }
+
+ if (!errors.empty()) {
+ for (auto err : errors) {
+ std::cerr << err << "\n";
+ }
+ dump_heap_to("heap.dot");
+ abort();
+ }
+}
+
=====================================
rts/sm/GC.c
=====================================
@@ -312,8 +312,6 @@ GarbageCollect (uint32_t collect_gen,
CostCentreStack *save_CCS[getNumCapabilities()];
#endif
- ACQUIRE_SM_LOCK;
-
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
// block signals
@@ -589,9 +587,7 @@ GarbageCollect (uint32_t collect_gen,
// the current garbage collection, so we invoke LdvCensusForDead().
if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
|| RtsFlags.ProfFlags.bioSelector != NULL) {
- RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
LdvCensusForDead(N);
- ACQUIRE_SM_LOCK;
}
#endif
@@ -760,7 +756,7 @@ GarbageCollect (uint32_t collect_gen,
}
else // not compacted
{
- freeChain(gen->old_blocks);
+ freeChain_lock(gen->old_blocks);
}
gen->old_blocks = NULL;
@@ -771,7 +767,7 @@ GarbageCollect (uint32_t collect_gen,
* collection from large_objects. Any objects left on the
* large_objects list are therefore dead, so we free them here.
*/
- freeChain(gen->large_objects);
+ freeChain_lock(gen->large_objects);
gen->large_objects = gen->scavenged_large_objects;
gen->n_large_blocks = gen->n_scavenged_large_blocks;
gen->n_large_words = countOccupied(gen->large_objects);
@@ -890,7 +886,7 @@ GarbageCollect (uint32_t collect_gen,
if (mark_stack_top_bd != NULL) {
debugTrace(DEBUG_gc, "mark stack: %d blocks",
countBlocks(mark_stack_top_bd));
- freeChain(mark_stack_top_bd);
+ freeChain_lock(mark_stack_top_bd);
}
// Free any bitmaps.
@@ -942,9 +938,7 @@ GarbageCollect (uint32_t collect_gen,
// Start any pending finalizers. Must be after
// updateStableTables() and stableUnlock() (see #4221).
- RELEASE_SM_LOCK;
scheduleFinalizers(cap, dead_weak_ptr_list);
- ACQUIRE_SM_LOCK;
// check sanity after GC
// before resurrectThreads(), because that might overwrite some
@@ -959,9 +953,7 @@ GarbageCollect (uint32_t collect_gen,
// behind.
if (do_heap_census) {
debugTrace(DEBUG_sched, "performing heap census");
- RELEASE_SM_LOCK;
heapCensus(mut_time);
- ACQUIRE_SM_LOCK;
}
#if defined(TICKY_TICKY)
@@ -975,14 +967,14 @@ GarbageCollect (uint32_t collect_gen,
#endif
// send exceptions to any threads which were about to die
- RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
- ACQUIRE_SM_LOCK;
// Finally free the deferred mblocks by sorting the deferred free list and
// merging it into the actual sorted free list. This needs to happen here so
// that the `returnMemoryToOS` call down below can successfully free memory.
+ ACQUIRE_SM_LOCK;
commitMBlockFreeing();
+ RELEASE_SM_LOCK;
if (major_gc) {
W_ need_prealloc, need_live, need, got;
@@ -1095,8 +1087,6 @@ GarbageCollect (uint32_t collect_gen,
}
#endif
- RELEASE_SM_LOCK;
-
SET_GCT(saved_gct);
}
@@ -1146,7 +1136,7 @@ new_gc_thread (uint32_t n, gc_thread *t)
// but can't, because it uses gct which isn't set up at this point.
// Hence, allocate a block for todo_bd manually:
{
- bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
+ bdescr *bd = allocBlockOnNode_lock(capNoToNumaNode(n));
// no lock, locks aren't initialised yet
initBdescr(bd, ws->gen, ws->gen->to);
bd->flags = BF_EVACUATED;
@@ -1605,7 +1595,7 @@ static void
stash_mut_list (Capability *cap, uint32_t gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
+ RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_lock(cap->node));
}
/* ----------------------------------------------------------------------------
@@ -1632,9 +1622,9 @@ prepare_collected_gen (generation *gen)
// a check for NULL in recordMutable().
for (i = 0; i < getNumCapabilities(); i++) {
bdescr *old = RELAXED_LOAD(&getCapability(i)->mut_lists[g]);
- freeChain(old);
+ freeChain_lock(old);
- bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
+ bdescr *new = allocBlockOnNode_lock(capNoToNumaNode(i));
RELAXED_STORE(&getCapability(i)->mut_lists[g], new);
}
}
@@ -1717,7 +1707,7 @@ prepare_collected_gen (generation *gen)
bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
if (bitmap_size > 0) {
- bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
+ bitmap_bdescr = allocGroup_lock((StgWord)BLOCK_ROUND_UP(bitmap_size)
/ BLOCK_SIZE);
gen->bitmap = bitmap_bdescr;
bitmap = bitmap_bdescr->start;
=====================================
rts/sm/GC.h
=====================================
@@ -13,8 +13,6 @@
#pragma once
-#include "HeapAlloc.h"
-
#include "BeginPrivate.h"
void GarbageCollect (uint32_t collect_gen,
=====================================
rts/sm/GCUtils.c
=====================================
@@ -26,38 +26,15 @@
#include "WSDeque.h"
#endif
-#if defined(THREADED_RTS)
-SpinLock gc_alloc_block_sync;
-#endif
-
static void push_todo_block(bdescr *bd, gen_workspace *ws);
-bdescr* allocGroup_sync(uint32_t n)
-{
- bdescr *bd;
- uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n)
-{
- bdescr *bd;
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
static uint32_t
-allocBlocks_sync(uint32_t n, bdescr **hd)
+allocBlocks_lock(uint32_t n, bdescr **hd)
{
bdescr *bd;
uint32_t i;
uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bd = allocLargeChunkOnNode(node,1,n);
// NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
// fragmentation-friendly way.
@@ -70,27 +47,11 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
bd[n-1].link = NULL;
// We have to hold the lock until we've finished fiddling with the metadata,
// otherwise the block allocator can get confused.
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
*hd = bd;
return n;
}
-void
-freeChain_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeChain(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
-void
-freeGroup_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeGroup(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
/* -----------------------------------------------------------------------------
Workspace utilities
-------------------------------------------------------------------------- */
@@ -303,7 +264,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
// object. However, if the object we're copying is
// larger than a block, then we might have an empty
// block here.
- freeGroup_sync(bd);
+ freeGroup_lock(bd);
} else {
push_scanned_block(bd, ws);
}
@@ -343,14 +304,14 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
else
{
if (size > BLOCK_SIZE_W) {
- bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_))
+ bd = allocGroup_lock((W_)BLOCK_ROUND_UP(size*sizeof(W_))
/ BLOCK_SIZE);
} else {
if (gct->free_blocks) {
bd = gct->free_blocks;
gct->free_blocks = bd->link;
} else {
- allocBlocks_sync(16, &bd);
+ allocBlocks_lock(16, &bd);
gct->free_blocks = bd->link;
}
}
=====================================
rts/sm/GCUtils.h
=====================================
@@ -17,22 +17,6 @@
#include "BeginPrivate.h"
-bdescr* allocGroup_sync(uint32_t n);
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n);
-
-INLINE_HEADER bdescr *allocBlock_sync(void)
-{
- return allocGroup_sync(1);
-}
-
-INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node)
-{
- return allocGroupOnNode_sync(node,1);
-}
-
-void freeChain_sync(bdescr *bd);
-void freeGroup_sync(bdescr *bd);
-
void push_scanned_block (bdescr *bd, gen_workspace *ws);
StgPtr todo_block_full (uint32_t size, gen_workspace *ws);
StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size);
@@ -62,7 +46,7 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)
bd = gct->mut_lists[gen_no];
if (bd->free >= bd->start + BLOCK_SIZE_W) {
bdescr *new_bd;
- new_bd = allocBlock_sync();
+ new_bd = allocBlock_lock();
new_bd->link = bd;
bd = new_bd;
gct->mut_lists[gen_no] = bd;
=====================================
rts/sm/HeapAlloc.h
=====================================
@@ -10,6 +10,8 @@
#include "BeginPrivate.h"
+#include "Storage.h"
+
/* -----------------------------------------------------------------------------
The HEAP_ALLOCED() test.
@@ -210,9 +212,9 @@ StgBool HEAP_ALLOCED_GC(const void *p)
} else {
// putting the rest out of line turned out to be a slight
// performance improvement:
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK; // TODO: this may be too expensive
b = HEAP_ALLOCED_miss(mblock,p);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
return b;
}
}
=====================================
rts/sm/MarkStack.h
=====================================
@@ -32,7 +32,7 @@ push_mark_stack(StgPtr p)
}
else
{
- bd = allocBlock_sync();
+ bd = allocBlock_lock();
bd->link = mark_stack_bd;
bd->u.back = NULL;
mark_stack_bd->u.back = bd; // double-link the new block on
=====================================
rts/sm/NonMoving.c
=====================================
@@ -597,14 +597,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
// Nothing in the free list, allocate a new segment...
if (ret == NULL) {
- // Take gc spinlock: another thread may be scavenging a moving
- // generation and call `todo_block_full`
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
// See Note [Live data accounting in nonmoving collector].
oldest_gen->n_blocks += bd->blocks;
oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
for (StgWord32 i = 0; i < bd->blocks; ++i) {
initBdescr(&bd[i], oldest_gen, oldest_gen);
@@ -667,7 +663,7 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
}
}
-/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
+/* Allocate a block in the nonmoving heap. sz is in words */
GNUC_ATTR_HOT
void *nonmovingAllocate(Capability *cap, StgWord sz)
{
@@ -707,7 +703,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// there are no active segments, allocate new segment
if (new_current == NULL) {
+ ACQUIRE_SM_LOCK;
new_current = nonmovingAllocSegment(cap->node);
+ RELEASE_SM_LOCK;
nonmovingInitSegment(new_current, log_block_size);
}
@@ -790,14 +788,13 @@ void nonmovingExit(void)
/*
* Assumes that no garbage collector or mutator threads are running to safely
* resize the nonmoving_allocators.
- *
- * Must hold sm_mutex.
*/
void nonmovingAddCapabilities(uint32_t new_n_caps)
{
unsigned int old_n_caps = nonmovingHeap.n_caps;
struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
+ ACQUIRE_SM_LOCK;
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
struct NonmovingAllocator *old = allocs[i];
allocs[i] = alloc_nonmoving_allocator(new_n_caps);
@@ -819,6 +816,7 @@ void nonmovingAddCapabilities(uint32_t new_n_caps)
}
}
nonmovingHeap.n_caps = new_n_caps;
+ RELEASE_SM_LOCK;
}
void nonmovingClearBitmap(struct NonmovingSegment *seg)
@@ -1238,6 +1236,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
#if defined(THREADED_RTS) && defined(NONCONCURRENT_SWEEP)
#if defined(DEBUG)
+ check_gc();
checkNonmovingHeap(&nonmovingHeap);
checkSanity(true, true);
#endif
=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -295,10 +295,8 @@ static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
nonmovingAddUpdRemSetBlocks_(rset);
// Reset the state of the remembered set.
- ACQUIRE_SM_LOCK;
init_mark_queue_(rset);
rset->is_upd_rem_set = true;
- RELEASE_SM_LOCK;
}
/*
@@ -472,9 +470,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
}
/* A variant of push to be used by the minor GC when it encounters a reference
- * to an object in the non-moving heap. In contrast to the other push
- * operations this uses the gc_alloc_block_sync spinlock instead of the
- * SM_LOCK to allocate new blocks in the event that the mark queue is full.
+ * to an object in the non-moving heap.
*/
void
markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
@@ -495,13 +491,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
// allocate a fresh block.
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
bd->link = q->blocks;
q->blocks = bd;
q->top = (MarkQueueBlock *) bd->start;
q->top->head = 0;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
}
MarkQueueEnt ent = {
@@ -932,7 +928,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q)
/* Must hold sm_mutex. */
static void init_mark_queue_ (MarkQueue *queue)
{
- bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ bdescr *bd = allocGroup_lock(MARK_QUEUE_BLOCKS);
ASSERT(queue->blocks == NULL);
queue->blocks = bd;
queue->top = (MarkQueueBlock *) bd->start;
@@ -943,14 +939,12 @@ static void init_mark_queue_ (MarkQueue *queue)
#endif
}
-/* Must hold sm_mutex. */
void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
queue->is_upd_rem_set = false;
}
-/* Must hold sm_mutex. */
void nonmovingInitUpdRemSet (UpdRemSet *rset)
{
init_mark_queue_(&rset->queue);
@@ -1610,7 +1604,7 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
// selectee unreachable. However, we must mark the selectee regardless
// to satisfy the snapshot invariant.
PUSH_FIELD(sel, selectee);
- nonmoving_eval_thunk_selector(queue, sel, origin);
+ //nonmoving_eval_thunk_selector(queue, sel, origin);
break;
}
=====================================
rts/sm/NonMovingMark.h
=====================================
@@ -9,10 +9,11 @@
#pragma once
#include "Task.h"
-#include "NonMoving.h"
#include "BeginPrivate.h"
+struct NonMovingHeap;
+
enum EntryType {
NULL_ENTRY = 0,
MARK_CLOSURE = 1,
@@ -63,7 +64,7 @@ INLINE_HEADER enum EntryType nonmovingMarkQueueEntryType(MarkQueueEnt *ent)
{
uintptr_t tag = (uintptr_t) ent->null_entry.p & TAG_MASK;
ASSERT(tag <= MARK_ARRAY);
- return tag;
+ return (enum EntryType) tag;
}
typedef struct {
@@ -155,7 +156,7 @@ void markQueueAddRoot(MarkQueue* q, StgClosure** root);
void initMarkQueue(MarkQueue *queue);
void freeMarkQueue(MarkQueue *queue);
-void nonmovingMark(struct MarkQueue_ *restrict queue);
+void nonmovingMark(struct MarkQueue_ *__restrict__ queue);
void nonmovingMarkWeakPtrList(struct MarkQueue_ *queue);
bool nonmovingTidyWeaks(struct MarkQueue_ *queue);
=====================================
rts/sm/NonMovingShortcut.c
=====================================
@@ -10,6 +10,7 @@
#include "Rts.h"
#include "GC.h"
#include "SMPClosureOps.h"
+#include "NonMoving.h"
#include "NonMovingMark.h"
#include "NonMovingShortcut.h"
#include "Printer.h"
=====================================
rts/sm/Sanity.h
=====================================
@@ -19,6 +19,8 @@
# define MAX_SLOTS 100000
# endif
+struct NonmovingHeap;
+
/* debugging routines */
void checkSanity ( bool after_gc, bool major_gc );
void checkNurserySanity ( nursery *nursery );
=====================================
rts/sm/Scav.c
=====================================
@@ -1730,7 +1730,7 @@ scavenge_capability_mut_lists (Capability *cap)
if (RtsFlags.GcFlags.useNonmoving && major_gc) {
uint32_t g = oldest_gen->no;
scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
return;
}
@@ -1743,7 +1743,7 @@ scavenge_capability_mut_lists (Capability *cap)
*/
for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) {
scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
}
}
=====================================
rts/sm/Storage.c
=====================================
@@ -193,14 +193,13 @@ initStorage (void)
initMutex(&sm_mutex);
#endif
- ACQUIRE_SM_LOCK;
-
/* allocate generation info array */
generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct generation_),
"initStorage: gens");
/* Initialise all generations */
+ ACQUIRE_SM_LOCK;
for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
initGeneration(&generations[g], g);
}
@@ -214,16 +213,11 @@ initStorage (void)
generations[g].to = &generations[g+1];
}
oldest_gen->to = oldest_gen;
+ RELEASE_SM_LOCK;
// Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
nonmovingInit();
-#if defined(THREADED_RTS)
- // nonmovingAddCapabilities allocates segments, which requires taking the gc
- // sync lock, so initialize it before nonmovingAddCapabilities
- initSpinLock(&gc_alloc_block_sync);
-#endif
-
if (RtsFlags.GcFlags.useNonmoving)
nonmovingAddCapabilities(getNumCapabilities());
@@ -261,8 +255,6 @@ initStorage (void)
IF_DEBUG(gc, statDescribeGens());
- RELEASE_SM_LOCK;
-
traceInitEvent(traceHeapInfo);
}
@@ -314,12 +306,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
assignNurseriesToCapabilities(from,to);
// allocate a block for each mut list
+ ACQUIRE_SM_LOCK;
for (n = from; n < to; n++) {
for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
getCapability(n)->mut_lists[g] =
allocBlockOnNode(capNoToNumaNode(n));
}
}
+ RELEASE_SM_LOCK;
// Initialize NonmovingAllocators and UpdRemSets
if (RtsFlags.GcFlags.useNonmoving) {
@@ -565,9 +559,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// Allocate the blackhole indirection closure
if (RtsFlags.GcFlags.useNonmoving) {
// See Note [Static objects under the nonmoving collector].
- ACQUIRE_SM_LOCK;
bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
- RELEASE_SM_LOCK;
recordMutableCap((StgClosure*)bh,
regTableToCapability(reg), oldest_gen->no);
} else {
@@ -725,6 +717,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
// automatic prefetching works across nursery blocks. This is a
// tiny optimisation (~0.5%), but it's free.
+ ACQUIRE_SM_LOCK;
while (blocks > 0) {
n = stg_min(BLOCKS_PER_MBLOCK, blocks);
// allocLargeChunk will prefer large chunks, but will pick up
@@ -760,6 +753,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
tail = &bd[0];
}
+ RELEASE_SM_LOCK;
return &bd[0];
}
@@ -879,7 +873,7 @@ resizeNurseriesEach (W_ blocks)
next_bd = bd->link;
next_bd->u.back = NULL;
nursery_blocks -= bd->blocks; // might be a large block
- freeGroup(bd);
+ freeGroup_lock(bd);
bd = next_bd;
}
nursery->blocks = bd;
@@ -1300,9 +1294,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
if (bd == NULL) {
// The pinned block list is empty: allocate a fresh block (we can't fail
// here).
- ACQUIRE_SM_LOCK;
bd = allocNursery(cap->node, NULL, PINNED_EMPTY_SIZE);
- RELEASE_SM_LOCK;
}
// Bump up the nursery pointer to avoid the pathological situation
@@ -1405,8 +1397,6 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
void
dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mvar, StgClosure *old)
{
- ASSERT(RELAXED_LOAD(&mvar->header.info) == &stg_MUT_VAR_CLEAN_info);
-
Capability *cap = regTableToCapability(reg);
// No barrier required here as no other heap object fields are read. See
// Note [Heap memory barriers] in SMP.h.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4b4207f32fd4c0a6995e2be1a59bf2b3eff53d51...14f1045d178417cd7d508c3b72cd70ce5b681d1a
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4b4207f32fd4c0a6995e2be1a59bf2b3eff53d51...14f1045d178417cd7d508c3b72cd70ce5b681d1a
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221206/0667f479/attachment-0001.html>
More information about the ghc-commits
mailing list