[Git][ghc/ghc][wip/T22264] 35 commits: rts: Drop SM spinlock
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Sun Dec 4 19:06:58 UTC 2022
Ben Gamari pushed to branch wip/T22264 at Glasgow Haskell Compiler / GHC
Commits:
be9950df by Ben Gamari at 2022-11-19T20:24:58-05:00
rts: Drop SM spinlock
- - - - -
8b8cd0fe by Ben Gamari at 2022-11-23T19:41:35-05:00
Various
- - - - -
562dd80c by Ben Gamari at 2022-11-23T19:41:43-05:00
Take SM mutex in setNumCapabilities
- - - - -
4b023123 by Ben Gamari at 2022-11-23T19:42:02-05:00
setRecentActivities
- - - - -
68b4a6ab by Ben Gamari at 2022-11-23T19:42:13-05:00
Fix type issues in Sparks.h
- - - - -
f205a5d3 by Ben Gamari at 2022-11-23T19:42:56-05:00
locking
- - - - -
07134f76 by Ben Gamari at 2022-11-23T19:43:20-05:00
Various
- - - - -
2b6f9bd4 by Ben Gamari at 2022-11-23T19:43:48-05:00
C++ typing issues
- - - - -
dfa7f23d by Ben Gamari at 2022-11-23T19:44:18-05:00
Sanity: Look at nonmoving saved_filled lists
- - - - -
6b911ba7 by Ben Gamari at 2022-11-23T19:44:39-05:00
CheckGC
- - - - -
a0111bbd by Ben Gamari at 2022-11-23T21:25:32-05:00
CheckGc: Refactor
- - - - -
da615ef3 by Ben Gamari at 2022-11-23T22:36:16-05:00
Fix it
- - - - -
7850a894 by Ben Gamari at 2022-11-25T14:00:28-05:00
check-gc
- - - - -
be510990 by Ben Gamari at 2022-11-25T17:48:25-05:00
Check gc
- - - - -
cff00edb by Ben Gamari at 2022-11-25T18:52:46-05:00
Check Gc
- - - - -
c25eb12f by Ben Gamari at 2022-11-25T18:52:56-05:00
locking
- - - - -
0ad4d2fb by Ben Gamari at 2022-11-25T20:09:30-05:00
Locking
- - - - -
230d8760 by Ben Gamari at 2022-11-26T21:27:12-05:00
locking
- - - - -
da5bdfb6 by Ben Gamari at 2022-11-26T21:27:29-05:00
Check GC
- - - - -
4dcea2c5 by Ben Gamari at 2022-11-26T21:27:46-05:00
Locking
- - - - -
6ed7e21a by Ben Gamari at 2022-11-26T21:28:08-05:00
Locking
- - - - -
0c097193 by Ben Gamari at 2022-11-26T21:28:20-05:00
FIX THE DAMN THING
- - - - -
cac0caeb by Ben Gamari at 2022-11-29T23:09:48-05:00
Evac: Squash data race in eval_selector_chain
- - - - -
9fe222a6 by Ben Gamari at 2022-11-29T23:11:01-05:00
nonmoving: Paranoia
- - - - -
a2daf6bf by Ben Gamari at 2022-11-29T23:11:26-05:00
nonmoving: Fix weaks
- - - - -
5fef104b by Ben Gamari at 2022-11-30T15:43:05-05:00
nonmoving: Fix selectors
- - - - -
f0161e29 by Ben Gamari at 2022-11-30T15:43:36-05:00
nonmoving: Avoid n_caps race
- - - - -
3f4f06e4 by Ben Gamari at 2022-11-30T15:44:16-05:00
nonmoving: Paranoia
- - - - -
571ac83f by Ben Gamari at 2022-11-30T15:44:41-05:00
nonmoving: Assert weak pointer consistency
- - - - -
b56776c3 by Ben Gamari at 2022-12-02T17:03:56-05:00
CheckGc: Distinguish tagged pointers
- - - - -
4b2dfabe by Ben Gamari at 2022-12-02T17:04:19-05:00
CheckGc: Flush
- - - - -
d382a58c by Ben Gamari at 2022-12-02T17:04:48-05:00
rts/gc: Add missing write barriers in selector optimisation
- - - - -
4f45b245 by Ben Gamari at 2022-12-02T17:06:42-05:00
CheckGc: Add missing SRT
- - - - -
fb45fba5 by Ben Gamari at 2022-12-02T17:21:32-05:00
CheckGc: Factor out constructor, fun, thunk traversal
- - - - -
f095454b by Ben Gamari at 2022-12-02T17:50:04-05:00
CheckGc: More sanity checking
- - - - -
26 changed files:
- rts/Capability.h
- rts/CheckUnload.c
- rts/Proftimer.c
- rts/Schedule.h
- rts/Sparks.h
- rts/Stats.c
- rts/Trace.h
- rts/include/rts/storage/MBlock.h
- rts/rts.cabal.in
- rts/sm/BlockAlloc.c
- + rts/sm/CheckGc.cpp
- rts/sm/Evac.c
- rts/sm/GC.c
- rts/sm/GC.h
- rts/sm/GCUtils.c
- rts/sm/GCUtils.h
- rts/sm/HeapAlloc.h
- rts/sm/MarkStack.h
- rts/sm/NonMoving.c
- rts/sm/NonMovingMark.c
- rts/sm/NonMovingMark.h
- rts/sm/NonMovingShortcut.c
- rts/sm/Sanity.c
- rts/sm/Sanity.h
- rts/sm/Scav.c
- rts/sm/Storage.c
Changes:
=====================================
rts/Capability.h
=====================================
@@ -20,10 +20,10 @@
#pragma once
-#include "sm/GC.h" // for evac_fn
#include "Task.h"
#include "Sparks.h"
-#include "sm/NonMovingMark.h" // for MarkQueue
+#include "sm/GC.h" // for evac_fn
+#include "sm/NonMovingMark.h" // for UpdRemSet
#include "BeginPrivate.h"
=====================================
rts/CheckUnload.c
=====================================
@@ -15,6 +15,7 @@
#include "Hash.h"
#include "LinkerInternals.h"
#include "CheckUnload.h"
+#include "sm/HeapAlloc.h"
#include "sm/Storage.h"
#include "sm/GCThread.h"
#include "sm/HeapUtils.h"
=====================================
rts/Proftimer.c
=====================================
@@ -123,7 +123,7 @@ handleProfTick(void)
if (RELAXED_LOAD_ALWAYS(&do_prof_ticks)) {
uint32_t n;
for (n=0; n < getNumCapabilities(); n++) {
- Capability *cap = getCapbility(n);
+ Capability *cap = getCapability(n);
cap->r.rCCCS->time_ticks++;
traceProfSampleCostCentre(cap, cap->r.rCCCS, total_ticks);
}
=====================================
rts/Schedule.h
=====================================
@@ -132,7 +132,7 @@ setRecentActivity(enum RecentActivity new_value)
INLINE_HEADER enum RecentActivity
getRecentActivity(void)
{
- return RELAXED_LOAD_ALWAYS(&recent_activity);
+ return (enum RecentActivity) RELAXED_LOAD_ALWAYS(&recent_activity);
}
/* Thread queues.
=====================================
rts/Sparks.h
=====================================
@@ -8,6 +8,7 @@
#pragma once
+#include "sm/GC.h" // for evac_fn
#include "WSDeque.h"
#include "BeginPrivate.h"
@@ -56,7 +57,7 @@ INLINE_HEADER long sparkPoolSize (SparkPool *pool);
INLINE_HEADER StgClosure* reclaimSpark(SparkPool *pool)
{
- return popWSDeque(pool);
+ return (StgClosure*) popWSDeque(pool);
}
INLINE_HEADER bool looksEmpty(SparkPool* deque)
@@ -89,7 +90,7 @@ INLINE_HEADER void discardSparks (SparkPool *pool)
INLINE_HEADER StgClosure * tryStealSpark (SparkPool *pool)
{
- return stealWSDeque_(pool);
+ return (StgClosure *) stealWSDeque_(pool);
// use the no-loopy version, stealWSDeque_(), since if we get a
// spurious NULL here the caller may want to try stealing from
// other pools before trying again.
=====================================
rts/Stats.c
=====================================
@@ -963,11 +963,6 @@ static void report_summary(const RTSSummaryStats* sum)
, col_width[1], "SpinLock"
, col_width[2], "Spins"
, col_width[3], "Yields");
- statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
- , col_width[0], ""
- , col_width[1], "gc_alloc_block_sync"
- , col_width[2], gc_alloc_block_sync.spin
- , col_width[3], gc_alloc_block_sync.yield);
statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
, col_width[0], ""
, col_width[1], "whitehole_gc"
@@ -1142,10 +1137,6 @@ static void report_machine_readable (const RTSSummaryStats * sum)
// next, internal counters
#if defined(PROF_SPIN)
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
- MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
- gc_alloc_block_sync.yield);
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin);
MR_STAT("waitForGcThreads_yield", FMT_Word64,
waitForGcThreads_yield);
@@ -1572,9 +1563,6 @@ SpinLock:
Not all of these are actual SpinLocks, see the details below.
Actual SpinLocks:
-* gc_alloc_block:
- This SpinLock protects the block allocator and free list manager. See
- BlockAlloc.c.
* gen[g].sync:
These SpinLocks, one per generation, protect the generations[g] data
structure during garbage collection.
=====================================
rts/Trace.h
=====================================
@@ -33,7 +33,9 @@ void tracingAddCapapilities (uint32_t from, uint32_t to);
#endif /* TRACING */
typedef StgWord32 CapsetID;
+#if !defined(__cplusplus)
typedef StgWord16 CapsetType;
+#endif
enum CapsetType { CapsetTypeCustom = CAPSET_TYPE_CUSTOM,
CapsetTypeOsProcess = CAPSET_TYPE_OSPROCESS,
CapsetTypeClockdomain = CAPSET_TYPE_CLOCKDOMAIN };
=====================================
rts/include/rts/storage/MBlock.h
=====================================
@@ -26,7 +26,3 @@ extern void freeAllMBlocks(void);
extern void *getFirstMBlock(void **state);
extern void *getNextMBlock(void **state, void *mblock);
-#if defined(THREADED_RTS)
-// needed for HEAP_ALLOCED below
-extern SpinLock gc_alloc_block_sync;
-#endif
=====================================
rts/rts.cabal.in
=====================================
@@ -448,6 +448,9 @@ library
-- AutoApply is generated
AutoApply.cmm
+ -- Debugging
+ c-sources: sm/CheckGc.cpp
+
-- Adjustor stuff
if flag(libffi-adjustors)
c-sources: adjustor/LibffiAdjustor.c
=====================================
rts/sm/BlockAlloc.c
=====================================
@@ -438,6 +438,7 @@ alloc_mega_group (uint32_t node, StgWord mblocks)
bdescr *best, *bd;
StgWord n;
+ ASSERT_SM_LOCK();
n = MBLOCK_GROUP_BLOCKS(mblocks);
if(defer_mblock_frees)
@@ -496,6 +497,7 @@ allocGroupOnNode (uint32_t node, W_ n)
bdescr *bd, *rem;
StgWord ln;
+ ASSERT_SM_LOCK();
if (n == 0) barf("allocGroup: requested zero blocks");
if (n >= BLOCKS_PER_MBLOCK)
@@ -709,6 +711,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
{
bdescr *bd;
StgWord ln, lnmax;
+ ASSERT_SM_LOCK();
if (min >= BLOCKS_PER_MBLOCK) {
return allocGroupOnNode(node,max);
@@ -933,8 +936,7 @@ freeGroup(bdescr *p)
StgWord ln;
uint32_t node;
- // not true in multithreaded GC:
- // ASSERT_SM_LOCK();
+ ASSERT_SM_LOCK();
ASSERT(RELAXED_LOAD(&p->free) != (P_)-1);
=====================================
rts/sm/CheckGc.cpp
=====================================
@@ -0,0 +1,973 @@
+extern "C" {
+#include "Rts.h"
+#include "StableName.h" /* for FOR_EACH_STABLE_NAME */
+#include "StablePtr.h" /* for markStablePtrTable */
+#include "Schedule.h" /* for markScheduler */
+#include "Capability.h"
+#include "HeapAlloc.h"
+#include "STM.h"
+}
+
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <vector>
+#include <queue>
+#include <unordered_set>
+
+class TaggedClosurePtr {
+ StgClosure *ptr;
+public:
+ TaggedClosurePtr(StgClosure* ptr) : ptr(ptr) {}
+ TaggedClosurePtr(StgClosure* ptr, uint8_t tag) : TaggedClosurePtr(TAG_CLOSURE(tag, ptr)) {}
+
+ StgClosure *get_tagged() const {
+ return ptr;
+ }
+ StgClosure *untag() const {
+ return UNTAG_CLOSURE(ptr);
+ }
+ uint8_t get_tag() const {
+ return (StgWord) ptr & TAG_MASK;
+ }
+
+ //inline StgClosure& operator->() { return *untag(); }
+
+ friend inline bool operator==(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) {
+ return lhs.ptr == rhs.ptr;
+ }
+ friend inline bool operator!=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs == rhs); }
+ friend inline bool operator< (const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) {
+ return lhs.ptr < rhs.ptr;
+ }
+ friend inline bool operator> (const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return rhs < lhs; }
+ friend inline bool operator<=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs > rhs); }
+ friend inline bool operator>=(const TaggedClosurePtr& lhs, const TaggedClosurePtr& rhs) { return !(lhs < rhs); }
+};
+
+template<>
+struct std::hash<TaggedClosurePtr> {
+ std::size_t operator()(TaggedClosurePtr const& p) const noexcept {
+ return std::hash<StgClosure*>{}(p.get_tagged());
+ }
+};
+
+class HeapVisitor {
+public:
+ // Visit an SRT
+ virtual void visit_srt(StgClosure* c);
+
+ // Visit a normal closure
+ virtual void visit_closure(TaggedClosurePtr c);
+
+ virtual void visit_thunk(StgThunk *thunk, size_t n_ptrs);
+ virtual void visit_fun(StgClosure *constr, size_t n_ptrs);
+ virtual void visit_constr(StgClosure *constr, size_t n_ptrs);
+ virtual void visit_array(StgMutArrPtrs *arr);
+ virtual void visit_small_array(StgSmallMutArrPtrs *arr);
+ virtual void visit_bytearray(StgArrBytes* arr);
+
+ virtual void visit_stack(StgPtr sp, StgPtr end);
+ virtual void visit_tso(StgTSO* tso);
+ virtual void visit_weak(StgWeak* w);
+ virtual void visit_mvar(StgMVar* mvar);
+ virtual void visit_tvar(StgTVar* tvar);
+ virtual void visit_trec_header(StgTRecHeader *trec);
+ virtual void visit_trec_chunk(StgTRecChunk* tc);
+ virtual void visit_continuation(StgContinuation* tc);
+
+ virtual void visit_small_bitmap(StgClosure *const *payload, StgWord bitmap, StgWord size);
+ virtual void visit_large_bitmap(StgClosure *const *payload, const StgLargeBitmap *bitmap, StgWord size);
+ void visit_pap_payload(StgClosure *fun, StgClosure **payload, StgWord n_args);
+
+ virtual void visit_invalid(StgClosure *const c);
+};
+
+void HeapVisitor::visit_thunk(StgThunk *thunk, size_t n_ptrs)
+{
+ const StgInfoTable *info = get_itbl((StgClosure *) thunk);
+ const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info);
+ if (thunk_info->i.srt) {
+ StgClosure *srt = (StgClosure*) GET_SRT(thunk_info);
+ visit_srt(srt);
+ };
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(thunk->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_fun(StgClosure *fun, size_t n_ptrs)
+{
+ const StgInfoTable *info = get_itbl(fun);
+ const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info);
+ if (fun_info->i.srt) {
+ StgClosure *srt = (StgClosure*) GET_SRT(fun_info);
+ visit_srt(srt);
+ };
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(fun->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_constr(StgClosure *constr, size_t n_ptrs)
+{
+ for (size_t i=0; i < n_ptrs; i++) {
+ visit_closure(constr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_srt(StgClosure* c)
+{
+ visit_closure(c);
+}
+
+void HeapVisitor::visit_invalid(StgClosure *const _c)
+{
+ abort();
+}
+
+void HeapVisitor::visit_weak(StgWeak* w)
+{
+ visit_closure(w->key);
+ visit_closure(w->value);
+ visit_closure(w->finalizer);
+ visit_closure(w->cfinalizers);
+}
+
+void HeapVisitor::visit_mvar(StgMVar* mvar)
+{
+ visit_closure((StgClosure*) mvar->head);
+ visit_closure((StgClosure*) mvar->tail);
+ visit_closure(mvar->value);
+}
+
+void HeapVisitor::visit_small_array(StgSmallMutArrPtrs *arr)
+{
+ for (StgWord i=0; i < arr->ptrs; i++) {
+ visit_closure(arr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_array(StgMutArrPtrs *arr)
+{
+ for (StgWord i=0; i < arr->ptrs; i++) {
+ visit_closure(arr->payload[i]);
+ }
+}
+
+void HeapVisitor::visit_bytearray(StgArrBytes* _arr) { }
+
+void HeapVisitor::visit_tso(StgTSO *tso)
+{
+ if (tso->bound != NULL) {
+
+ visit_closure((StgClosure*) tso->bound->tso);
+ }
+ if (tso->label != NULL) {
+ visit_closure({(StgClosure*) tso->label});
+ }
+ visit_closure((StgClosure*) tso->blocked_exceptions);
+ visit_closure((StgClosure*) tso->bq);
+ visit_closure((StgClosure*) tso->stackobj);
+ visit_closure((StgClosure*) tso->_link);
+ visit_trec_header(tso->trec);
+
+ switch (tso->why_blocked) {
+ case BlockedOnMVar:
+ case BlockedOnMVarRead:
+ case BlockedOnBlackHole:
+ case BlockedOnMsgThrowTo:
+ case NotBlocked:
+ visit_closure(tso->block_info.closure);
+ break;
+ default:
+ break;
+ }
+}
+
+void HeapVisitor::visit_continuation(StgContinuation *cont)
+{
+ visit_stack(cont->stack, cont->stack + cont->stack_size);
+}
+
+void HeapVisitor::visit_tvar(StgTVar *tvar)
+{
+ visit_closure(tvar->current_value);
+ visit_closure((StgClosure*) tvar->first_watch_queue_entry);
+}
+
+void HeapVisitor::visit_trec_header(StgTRecHeader *trec)
+{
+ if (trec == NO_TREC) {
+ return;
+ }
+ visit_trec_chunk(trec->current_chunk);
+ visit_closure((StgClosure*) trec->enclosing_trec);
+}
+
+void HeapVisitor::visit_trec_chunk(StgTRecChunk *tc)
+{
+ if (tc->prev_chunk != END_STM_CHUNK_LIST) {
+ visit_closure((StgClosure*) tc->prev_chunk);
+ }
+
+ for (uint32_t i = 0; i < tc->next_entry_idx; i++) {
+ TRecEntry *e = &tc->entries[i];
+ visit_closure((StgClosure*)e->tvar);
+ visit_closure(e->expected_value);
+ visit_closure(e->new_value);
+ }
+}
+
+void HeapVisitor::visit_stack(StgPtr p, StgPtr stack_end)
+{
+ while (p < stack_end) {
+ const StgRetInfoTable* info = get_ret_itbl((StgClosure *) p);
+
+ auto add_srt_ptrs = [&] () {
+ if (info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(info);
+ visit_srt(srt);
+ }
+ };
+
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ visit_closure(frame->updatee);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+ }
+
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case RET_SMALL:
+ {
+ StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ StgWord size = BITMAP_SIZE(info->i.layout.bitmap);
+ // NOTE: the payload starts immediately after the info-ptr, we
+ // don't have an StgHeader in the same sense as a heap closure.
+ p++;
+ visit_small_bitmap((StgClosure**) p, bitmap, size);
+ p += size;
+ add_srt_ptrs();
+ continue;
+ }
+
+ case RET_BCO:
+ {
+ p++;
+ StgBCO *bco = (StgBCO *)*p;
+ visit_closure((StgClosure *) bco);
+ p++;
+ StgWord size = BCO_BITMAP_SIZE(bco);
+ visit_large_bitmap((StgClosure**) p, BCO_BITMAP(bco), size);
+ p += size;
+ continue;
+ }
+
+ case RET_BIG:
+ {
+ StgWord size = GET_LARGE_BITMAP(&info->i)->size;
+ p++;
+ visit_large_bitmap((StgClosure**) p, GET_LARGE_BITMAP(&info->i), size);
+ p += size;
+ // and don't forget to follow the SRT
+ add_srt_ptrs();
+ break;
+ }
+
+ case RET_FUN:
+ {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ visit_closure(ret_fun->fun);
+
+ const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ {
+ StgWord bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ StgWord size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ visit_small_bitmap(ret_fun->payload, bitmap, size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ case ARG_GEN_BIG:
+ {
+ StgWord size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ visit_large_bitmap(ret_fun->payload, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ default:
+ {
+ StgWord bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ StgWord size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ visit_small_bitmap(ret_fun->payload, bitmap, size);
+ p = (StgPtr) ((StgClosure **) &ret_fun->payload + size);
+ break;
+ }
+ }
+ add_srt_ptrs();
+ break;
+ }
+ default:
+ abort();
+ }
+ }
+}
+
+void HeapVisitor::visit_small_bitmap(
+ StgClosure *const *payload,
+ StgWord bitmap,
+ StgWord size)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ visit_closure(*payload);
+ }
+ payload++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+}
+
+void HeapVisitor::visit_large_bitmap(
+ StgClosure *const * payload,
+ const StgLargeBitmap *large_bitmap,
+ StgWord size)
+{
+ // Bitmap may have more bits than `size` when scavenging PAP payloads. See
+ // comments around StgPAP.
+ ASSERT(large_bitmap->size >= size);
+
+ uint32_t b = 0;
+ for (uint32_t i = 0; i < size; b++) {
+ StgWord bitmap = large_bitmap->bitmap[b];
+ uint32_t j = stg_min(size-i, BITS_IN(W_));
+ i += j;
+ for (; j > 0; j--, payload++) {
+ if ((bitmap & 1) == 0) {
+ visit_closure(*payload);
+ }
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+void HeapVisitor::visit_pap_payload(
+ StgClosure *fun,
+ StgClosure **payload,
+ StgWord n_args)
+{
+ fun = UNTAG_CLOSURE(fun);
+ const StgFunInfoTable *fun_info = get_fun_itbl(fun);
+ ASSERT(fun_info->i.type != PAP);
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ visit_small_bitmap(payload, BITMAP_BITS(fun_info->f.b.bitmap), n_args);
+ break;
+ case ARG_GEN_BIG:
+ visit_large_bitmap(payload, GET_FUN_LARGE_BITMAP(fun_info), n_args);
+ break;
+ case ARG_BCO:
+ visit_large_bitmap(payload, BCO_BITMAP(fun), n_args);
+ break;
+ default:
+ {
+ StgWord bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ visit_small_bitmap(payload, bitmap, n_args);
+ }
+ }
+}
+
+void HeapVisitor::visit_closure(TaggedClosurePtr tagged)
+{
+ StgClosure *c = tagged.untag();
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa || !LOOKS_LIKE_CLOSURE_PTR(c)) {
+ visit_invalid(c);
+ return;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ auto generic_closure = [&] () {
+ for (StgClosure **p = &c->payload[0]; p < &c->payload[info->layout.payload.ptrs]; p++) {
+ visit_closure(*p);
+ }
+ };
+
+ switch (info->type) {
+
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ visit_mvar((StgMVar *) c);
+ break;
+ case TVAR:
+ visit_tvar((StgTVar *) c);
+ break;
+
+ case IND:
+ case IND_STATIC:
+ visit_closure(((StgInd *) c)->indirectee);
+ break;
+
+ case THUNK_0_1:
+ case THUNK_0_2:
+ visit_thunk((StgThunk*) c, 0);
+ break;
+ case THUNK_1_1:
+ case THUNK_1_0:
+ visit_thunk((StgThunk*) c, 1);
+ break;
+ case THUNK_2_0:
+ visit_thunk((StgThunk*) c, 2);
+ break;
+ case THUNK:
+ visit_thunk((StgThunk*) c, info->layout.payload.ptrs);
+ break;
+ case THUNK_STATIC:
+ visit_thunk((StgThunk*) c, 0);
+ break;
+
+ case FUN_1_0:
+ visit_fun(c, 1);
+ break;
+ case FUN_0_1:
+ case FUN_0_2:
+ visit_fun(c, 0);
+ break;
+ case FUN_1_1:
+ visit_fun(c, 1);
+ break;
+ case FUN_2_0:
+ visit_fun(c, 2);
+ break;
+ case FUN:
+ case FUN_STATIC:
+ visit_fun(c, info->layout.payload.ptrs);
+ break;
+
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ visit_constr(c, 0);
+ break;
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ visit_constr(c, 1);
+ break;
+ case CONSTR_2_0:
+ visit_constr(c, 2);
+ break;
+ case CONSTR:
+ case CONSTR_NOCAF:
+ visit_constr(c, info->layout.payload.ptrs);
+ break;
+
+ case PRIM:
+ generic_closure();
+ break;
+ case WEAK:
+ visit_weak((StgWeak*) c);
+ break;
+ case BCO:
+ {
+ StgBCO *bco = (StgBCO *)c;
+ visit_closure((StgClosure*) bco->instrs);
+ visit_closure((StgClosure*) bco->literals);
+ visit_closure((StgClosure*) bco->ptrs);
+ break;
+ }
+ case BLACKHOLE:
+ {
+ StgInd *ind = (StgInd*) c;
+ visit_closure(ind->indirectee);
+ break;
+ }
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ {
+ StgMutVar *mv = (StgMutVar*) c;
+ visit_closure(mv->var);
+ break;
+ }
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)c;
+ visit_closure((StgClosure*) bq->bh);
+ visit_closure((StgClosure*) bq->owner);
+ visit_closure((StgClosure*) bq->queue);
+ visit_closure((StgClosure*) bq->link);
+ break;
+ }
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)c;
+ visit_closure(s->selectee);
+ break;
+ }
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)c;
+ visit_closure(ap->fun);
+ visit_stack((StgPtr) ap->payload, (StgPtr) ap->payload + ap->size);
+ break;
+ }
+ case PAP:
+ {
+ StgPAP *pap = (StgPAP*) c;
+ visit_closure(pap->fun);
+ visit_pap_payload(pap->fun, (StgClosure**) pap->payload, pap->n_args);
+ break;
+ }
+ case AP:
+ {
+ StgAP *ap = (StgAP*) c;
+ visit_closure(ap->fun);
+ visit_pap_payload(ap->fun, (StgClosure**) ap->payload, ap->n_args);
+ break;
+ }
+ case ARR_WORDS:
+ visit_bytearray((StgArrBytes *) c);
+ break;
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ visit_array((StgMutArrPtrs *) c);
+ break;
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ visit_small_array((StgSmallMutArrPtrs *) c);
+ break;
+ case TSO:
+ visit_tso((StgTSO *) c);
+ break;
+ case STACK:
+ {
+ StgStack *stack = (StgStack *) c;
+ visit_stack(stack->sp, stack->stack + stack->stack_size);
+ break;
+ }
+ case MUT_PRIM:
+ generic_closure();
+ break;
+ case TREC_CHUNK:
+ visit_trec_chunk((StgTRecChunk *) c);
+ break;
+ case CONTINUATION:
+ visit_continuation((StgContinuation *) c);
+ break;
+ default:
+ visit_invalid(c);
+ break;
+ }
+}
+
+class PredicatedHeapVisitor : HeapVisitor {
+ bool should_visit(StgClosure *);
+
+ virtual void visit_srt(StgClosure* c) {
+ if (should_visit(c)) { HeapVisitor::visit_srt(c); }
+ }
+
+ virtual void visit_closure(TaggedClosurePtr c) {
+ if (should_visit(c.untag())) { HeapVisitor::visit_closure(c); }
+ }
+};
+
+// Collect direct pointers
+struct CollectPointers : HeapVisitor {
+ std::set<TaggedClosurePtr> accum;
+ bool invalid;
+ CollectPointers() : accum(), invalid(false) {}
+ void visit_root(StgClosure *c) {
+ HeapVisitor::visit_closure(c);
+ }
+ void visit_closure(TaggedClosurePtr c) {
+ accum.insert(c);
+ }
+ void visit_invalid(StgClosure *const _c) { invalid = true; }
+};
+
+static std::set<TaggedClosurePtr> collect_pointers(StgClosure* c)
+{
+ CollectPointers v;
+ v.visit_root(c);
+ return v.accum;
+}
+
+
+
+struct Error {
+ StgClosure *closure;
+ std::string what;
+ Error(StgClosure *closure, std::string what) : closure(closure), what(what) {}
+};
+
+static std::ostream& operator<<(std::ostream& os, const Error& err) {
+ os << std::hex << "0x" << (StgWord) err.closure << ": " << err.what << "\n";
+ return os;
+}
+
+class CheckVisitor : HeapVisitor {
+ std::vector<Error> errors;
+ uint8_t tag;
+ void visit_constr(StgClosure* c) {
+ const StgInfoTable *info = get_itbl(c);
+ if (tag != 0) {
+ uint8_t constr_tag = info->srt; // zero-based
+ if (tag != std::min(TAG_MASK, constr_tag+1)) {
+ errors.push_back(Error(c, "invalid tag"));
+ }
+ }
+ }
+
+ void visit_closure(TaggedClosurePtr c) { }
+public:
+ const std::vector<Error>& get_errors() const { return errors; }
+
+ void check_closure(TaggedClosurePtr c) {
+ tag = c.get_tag();
+ HeapVisitor::visit_closure(c);
+ }
+};
+
+struct CheckGc {
+ std::queue<TaggedClosurePtr> queue;
+ std::unordered_set<TaggedClosurePtr> enqueued;
+
+ void enqueue(TaggedClosurePtr ptr) {
+ ASSERT(ptr != NULL);
+ if (!is_enqueued(ptr)) {
+ queue.push(ptr);
+ enqueued.insert(ptr);
+ }
+ }
+
+ bool finished() {
+ return queue.empty();
+ }
+
+ TaggedClosurePtr pop() {
+ TaggedClosurePtr p = queue.front();
+ queue.pop();
+ return p;
+ }
+
+ bool is_enqueued(TaggedClosurePtr ptr) {
+ return enqueued.find(ptr) != enqueued.end();
+ }
+};
+
+static void enqueue_root(void *user_data, StgClosure **root)
+{
+ CheckGc* env = (CheckGc*) user_data;
+ env->enqueue(*root);
+}
+
+static void enqueue_roots(CheckGc& env)
+{
+ FOR_EACH_STABLE_NAME(p, if (p->sn_obj) env.enqueue(p->sn_obj););
+ markStablePtrTable(enqueue_root, &env);
+ for (uint32_t n = 0; n < getNumCapabilities(); n++) {
+ markCapability(enqueue_root, (void*) &env, getCapability(n), false/*mark sparks*/);
+ }
+ markCAFs(enqueue_root, &env);
+ markScheduler(enqueue_root, &env);
+
+ for (StgWeak *w = nonmoving_weak_ptr_list; w != NULL; w = w->link) {
+ env.enqueue((StgClosure *) w);
+ }
+
+ for (uint32_t g = 0; g <= N; g++) {
+ generation *gen = &generations[g];
+ for (StgWeak *w = gen->weak_ptr_list; w != NULL; w = RELAXED_LOAD(&w->link)) {
+ env.enqueue((StgClosure *) w);
+ }
+ }
+}
+
+extern "C" {
+void check_gc();
+}
+
+struct NodeName {
+ const StgClosure *c;
+ NodeName(const StgClosure *c) : c(c) {}
+};
+
+static std::ostream& operator<<(std::ostream& os, const NodeName& n) {
+ os << std::hex << "\"" << n.c << "\"" << std::dec;
+ return os;
+}
+
+static void dump_heap(std::ofstream& of)
+{
+ of << "digraph {\n";
+ CheckGc env;
+ enqueue_roots(env);
+ while (!env.finished()) {
+ TaggedClosurePtr tagged = env.pop();
+ StgClosure* c = tagged.untag();
+ NodeName n(c);
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa) {
+ of << n << " [type=invalid];\n";
+ continue;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ switch (info->type) {
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF:
+ {
+ const StgConInfoTable *con_info = get_con_itbl(c);
+ of << n << " [type=CONSTR constr=\"" << GET_CON_DESC(con_info) << "\"];\n";
+ break;
+ }
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ of << n << " [type=FUN];\n";
+ break;
+ case FUN_STATIC:
+ of << n << " [type=FUN_STATIC];\n";
+ break;
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ of << n << " [type=THUNK];\n";
+ break;
+ case THUNK_STATIC:
+ of << n << " [type=THUNK_STATIC];\n";
+ break;
+ case THUNK_SELECTOR:
+ of << n << " [type=THUNK_SEL];\n";
+ break;
+ case BCO:
+ of << n << " [type=BCO];\n";
+ break;
+ case AP:
+ of << n << " [type=AP];\n";
+ break;
+ case PAP:
+ of << n << " [type=PAP];\n";
+ break;
+ case AP_STACK:
+ of << n << " [type=AP_STACK];\n";
+ break;
+ case IND:
+ of << n << " [type=IND];\n";
+ break;
+ case IND_STATIC:
+ of << n << " [type=IND_STATIC];\n";
+ break;
+ case BLOCKING_QUEUE:
+ of << n << " [type=BLOCKING_QUEUE];\n";
+ break;
+ case BLACKHOLE:
+ of << n << " [type=BLACKHOLE];\n";
+ break;
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ of << n << " [type=MVAR];\n";
+ break;
+ case TVAR:
+ of << n << " [type=TVAR];\n";
+ break;
+ case ARR_WORDS:
+ of << n << " [type=ARR_WORDS];\n";
+ break;
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ of << n << " [type=MUT_ARR_PTRS];\n";
+ break;
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ of << n << " [type=SMALL_MUT_ARR_PTRS];\n";
+ break;
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ of << n << " [type=MUT_VAR];\n";
+ break;
+ case WEAK:
+ of << n << " [type=WEAK];\n";
+ break;
+ case PRIM:
+ of << n << " [type=PRIM];\n";
+ break;
+ case MUT_PRIM:
+ of << n << " [type=MUT_PRIM];\n";
+ break;
+ case TSO:
+ of << n << " [type=TSO];\n";
+ break;
+ case STACK:
+ of << n << " [type=STACK];\n";
+ break;
+ case TREC_CHUNK:
+ of << n << " [type=TREC_CHUNK];\n";
+ break;
+ case WHITEHOLE:
+ of << n << " [type=WHITEHOLE];\n";
+ break;
+ case COMPACT_NFDATA:
+ of << n << " [type=COMPACT_NFDATA];\n";
+ break;
+ case CONTINUATION:
+ of << n << " [type=CONTINUATION];\n";
+ break;
+ default:
+ of << n << " [type=unknown];\n";
+ break;
+ }
+
+ if (!HEAP_ALLOCED((StgPtr) c)) {
+ of << n << " [static=yes];\n";
+ } else {
+ bdescr *bd = Bdescr((StgPtr) c);
+ of << n << " [gen=" << bd->gen_no << "];\n";
+ if (bd->flags & BF_EVACUATED) {
+ of << n << " [evacuated=yes];\n";
+ }
+ if (bd->flags & BF_PINNED) {
+ of << n << " [pinned=yes];\n";
+ }
+ if (bd->flags & BF_LARGE) {
+ of << n << " [large=yes];\n";
+ } else if (bd->flags & BF_NONMOVING) {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) c);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) c);
+ uint8_t mark = nonmovingGetMark(seg, block_idx);
+ StgClosure *snapshot_loc =
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
+ if (c > snapshot_loc) {
+ of << n << " [nonmoving=yes new=yes mark=" << (StgWord) mark << "];\n";
+ } else {
+ of << n << " [nonmoving=yes mark=" << (StgWord) mark << "];\n";
+ }
+ } else {
+ of << n << " [moving=yes];\n";
+ }
+ }
+ for (TaggedClosurePtr p : collect_pointers(c)) {
+ of << n << " -> " << NodeName(p.untag()) << ";\n";
+ env.enqueue(p);
+ }
+ }
+ of << "}\n";
+}
+
+void dump_heap_to(const char *fname);
+void dump_heap_to(const char *fname)
+{
+ std::ofstream out(fname);
+ dump_heap(out);
+ out.flush();
+}
+
+void check_gc()
+{
+ CheckGc env;
+ enqueue_roots(env);
+ std::vector<Error> errors;
+
+ while (!env.finished()) {
+ TaggedClosurePtr tagged = env.pop();
+ StgClosure* c = tagged.untag();
+
+ {
+ CheckVisitor check;
+ check.check_closure(tagged);
+ for (const Error& e : check.get_errors()) {
+ errors.push_back(e);
+ }
+ }
+
+ for (TaggedClosurePtr p : collect_pointers(c)) {
+ env.enqueue(p);
+ }
+
+ if (c->header.info == (StgInfoTable *) 0xaaaaaaaaaaaaaaaa) {
+ errors.push_back(Error(c, "is invalid closure"));
+ continue;
+ }
+
+ const StgInfoTable *info = get_itbl(c);
+ if (!HEAP_ALLOCED((StgPtr) c)) {
+ switch (info->type) {
+ case THUNK_STATIC:
+ if (info->srt != 0) {
+
+ }
+ }
+ } else {
+ bdescr *bd = Bdescr((StgPtr) c);
+ if (bd->gen_no < 1) {
+ /* nothing to check as we are focused on post nonmoving-GC checking */
+ } else if (bd->flags & BF_NONMOVING && bd->flags & BF_LARGE) {
+ if (bd->flags & BF_NONMOVING_SWEEPING && !(bd->flags & BF_MARKED)) {
+ errors.push_back(Error(c, "is not marked yet being swept"));
+ }
+ } else if (bd->flags & BF_NONMOVING) {
+ struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) c);
+ nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) c);
+ uint8_t mark = nonmovingGetMark(seg, block_idx);
+ StgClosure *snapshot_loc =
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
+ if (bd->flags & BF_NONMOVING_SWEEPING) {
+ /* in a swept segment */
+ if (mark != nonmovingMarkEpoch) {
+ errors.push_back(Error(c, "is unmarked nonmoving object being swept"));
+ }
+ } else if (c < snapshot_loc) {
+ /* not in a swept segment but in the snapshot */
+ if (mark != nonmovingMarkEpoch) {
+ errors.push_back(Error(c, "is unmarked nonmoving object in the snapshot"));
+ }
+ } else {
+ /* not in the snapshot; nothing to assert */
+ }
+ } else if (bd->flags & BF_LARGE) {
+ if (! (bd->flags & BF_MARKED)) {
+ errors.push_back(Error(c, "is unmarked large object"));
+ }
+ } else {
+ if (!(bd->flags & BF_EVACUATED)) {
+ //errors.push_back(Error(c, "is in from-space block"));
+ }
+ }
+ }
+ }
+
+ if (!errors.empty()) {
+ for (auto err : errors) {
+ std::cerr << err << "\n";
+ }
+ dump_heap_to("heap.dot");
+ abort();
+ }
+}
+
=====================================
rts/sm/Evac.c
=====================================
@@ -1232,7 +1232,7 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
-------------------------------------------------------------------------- */
static void
-eval_thunk_selector (StgClosure **q, StgSelector *p, bool evac)
+eval_thunk_selector (StgClosure **const q, StgSelector *p, bool evac)
// NB. for legacy reasons, p & q are swapped around :(
{
uint32_t field;
@@ -1252,13 +1252,22 @@ selector_chain:
bd = Bdescr((StgPtr)p);
if (HEAP_ALLOCED_GC(p)) {
+ const uint16_t flags = RELAXED_LOAD(&bd->flags);
+
+ // We should never see large objects here
+ ASSERT(!(flags & BF_LARGE));
+
// If the THUNK_SELECTOR is in to-space or in a generation that we
// are not collecting, then bale out early. We won't be able to
// save any space in any case, and updating with an indirection is
// trickier in a non-collected gen: we would have to update the
// mutable list.
- if (RELAXED_LOAD(&bd->flags) & (BF_EVACUATED | BF_NONMOVING)) {
+ if (flags & (BF_EVACUATED | BF_NONMOVING)) {
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
+ if (flags & BF_NONMOVING) {
+ // See Note [Non-moving GC: Marking evacuated objects].
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure*) p);
+ }
*q = (StgClosure *)p;
// shortcut, behave as for: if (evac) evacuate(q);
if (evac && bd->gen_no < gct->evac_gen_no) {
@@ -1273,7 +1282,7 @@ selector_chain:
// (scavenge_mark_stack doesn't deal with IND). BEWARE! This
// bit is very tricky to get right. If you make changes
// around here, test by compiling stage 3 with +RTS -c -RTS.
- if (bd->flags & BF_MARKED) {
+ if (flags & BF_MARKED) {
// must call evacuate() to mark this closure if evac==true
*q = (StgClosure *)p;
if (evac) evacuate(q);
@@ -1313,6 +1322,12 @@ selector_chain:
// - undo the chain we've built to point to p.
SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr);
RELEASE_STORE(q, (StgClosure *) p);
+ if (Bdescr((StgPtr)p)->flags & BF_NONMOVING) {
+ // See Note [Non-moving GC: Marking evacuated objects].
+ // TODO: This really shouldn't be necessary since whoever won
+ // the race should have pushed
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure*) p);
+ }
if (evac) evacuate(q);
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
return;
@@ -1403,6 +1418,11 @@ selector_loop:
case THUNK_SELECTOR:
// Use payload to make a list of thunk selectors, to be
// used in unchain_thunk_selectors
+ //
+ // FIXME: This seems racy; should we lock this selector to
+ // ensure that another thread doesn't clobber this node
+ // of the chain. This would result in some previous
+ // selectors not being updated when we unchain.
RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector);
prev_thunk_selector = p;
p = (StgSelector*)val;
@@ -1427,6 +1447,12 @@ selector_loop:
// eval_thunk_selector(), because we know val is not
// a THUNK_SELECTOR.
if (evac) evacuate(q);
+
+ if (isNonmovingClosure(*q)) {
+ // See Note [Non-moving GC: Marking evacuated objects].
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure*) *q);
+ }
+
return;
}
@@ -1471,6 +1497,10 @@ selector_loop:
// recurse indefinitely, so we impose a depth bound.
// See Note [Selector optimisation depth limit].
if (gct->thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) {
+ if (isNonmovingClosure((StgClosure *) p)) {
+ // See Note [Non-moving GC: Marking evacuated objects].
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure*) p);
+ }
goto bale_out;
}
@@ -1517,5 +1547,9 @@ bale_out:
if (evac) {
copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no);
}
+ if (isNonmovingClosure(*q)) {
+ // See Note [Non-moving GC: Marking evacuated objects].
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, *q);
+ }
unchain_thunk_selectors(prev_thunk_selector, *q);
}
=====================================
rts/sm/GC.c
=====================================
@@ -312,8 +312,6 @@ GarbageCollect (uint32_t collect_gen,
CostCentreStack *save_CCS[getNumCapabilities()];
#endif
- ACQUIRE_SM_LOCK;
-
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
// block signals
@@ -591,9 +589,7 @@ GarbageCollect (uint32_t collect_gen,
// the current garbage collection, so we invoke LdvCensusForDead().
if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
|| RtsFlags.ProfFlags.bioSelector != NULL) {
- RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
LdvCensusForDead(N);
- ACQUIRE_SM_LOCK;
}
#endif
@@ -762,7 +758,7 @@ GarbageCollect (uint32_t collect_gen,
}
else // not compacted
{
- freeChain(gen->old_blocks);
+ freeChain_lock(gen->old_blocks);
}
gen->old_blocks = NULL;
@@ -773,7 +769,7 @@ GarbageCollect (uint32_t collect_gen,
* collection from large_objects. Any objects left on the
* large_objects list are therefore dead, so we free them here.
*/
- freeChain(gen->large_objects);
+ freeChain_lock(gen->large_objects);
gen->large_objects = gen->scavenged_large_objects;
gen->n_large_blocks = gen->n_scavenged_large_blocks;
gen->n_large_words = countOccupied(gen->large_objects);
@@ -892,7 +888,7 @@ GarbageCollect (uint32_t collect_gen,
if (mark_stack_top_bd != NULL) {
debugTrace(DEBUG_gc, "mark stack: %d blocks",
countBlocks(mark_stack_top_bd));
- freeChain(mark_stack_top_bd);
+ freeChain_lock(mark_stack_top_bd);
}
// Free any bitmaps.
@@ -944,9 +940,7 @@ GarbageCollect (uint32_t collect_gen,
// Start any pending finalizers. Must be after
// updateStableTables() and stableUnlock() (see #4221).
- RELEASE_SM_LOCK;
scheduleFinalizers(cap, dead_weak_ptr_list);
- ACQUIRE_SM_LOCK;
// check sanity after GC
// before resurrectThreads(), because that might overwrite some
@@ -961,9 +955,7 @@ GarbageCollect (uint32_t collect_gen,
// behind.
if (do_heap_census) {
debugTrace(DEBUG_sched, "performing heap census");
- RELEASE_SM_LOCK;
heapCensus(mut_time);
- ACQUIRE_SM_LOCK;
}
#if defined(TICKY_TICKY)
@@ -977,14 +969,14 @@ GarbageCollect (uint32_t collect_gen,
#endif
// send exceptions to any threads which were about to die
- RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
- ACQUIRE_SM_LOCK;
// Finally free the deferred mblocks by sorting the deferred free list and
// merging it into the actual sorted free list. This needs to happen here so
// that the `returnMemoryToOS` call down below can successfully free memory.
+ ACQUIRE_SM_LOCK;
commitMBlockFreeing();
+ RELEASE_SM_LOCK;
if (major_gc) {
W_ need_prealloc, need_live, need, got;
@@ -1097,8 +1089,6 @@ GarbageCollect (uint32_t collect_gen,
}
#endif
- RELEASE_SM_LOCK;
-
SET_GCT(saved_gct);
}
@@ -1148,7 +1138,7 @@ new_gc_thread (uint32_t n, gc_thread *t)
// but can't, because it uses gct which isn't set up at this point.
// Hence, allocate a block for todo_bd manually:
{
- bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
+ bdescr *bd = allocBlockOnNode_lock(capNoToNumaNode(n));
// no lock, locks aren't initialised yet
initBdescr(bd, ws->gen, ws->gen->to);
bd->flags = BF_EVACUATED;
@@ -1607,7 +1597,7 @@ static void
stash_mut_list (Capability *cap, uint32_t gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
+ RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_lock(cap->node));
}
/* ----------------------------------------------------------------------------
@@ -1634,9 +1624,9 @@ prepare_collected_gen (generation *gen)
// a check for NULL in recordMutable().
for (i = 0; i < getNumCapabilities(); i++) {
bdescr *old = RELAXED_LOAD(&getCapability(i)->mut_lists[g]);
- freeChain(old);
+ freeChain_lock(old);
- bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
+ bdescr *new = allocBlockOnNode_lock(capNoToNumaNode(i));
RELAXED_STORE(&getCapability(i)->mut_lists[g], new);
}
}
@@ -1719,7 +1709,7 @@ prepare_collected_gen (generation *gen)
bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
if (bitmap_size > 0) {
- bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
+ bitmap_bdescr = allocGroup_lock((StgWord)BLOCK_ROUND_UP(bitmap_size)
/ BLOCK_SIZE);
gen->bitmap = bitmap_bdescr;
bitmap = bitmap_bdescr->start;
=====================================
rts/sm/GC.h
=====================================
@@ -13,8 +13,6 @@
#pragma once
-#include "HeapAlloc.h"
-
#include "BeginPrivate.h"
void GarbageCollect (uint32_t collect_gen,
=====================================
rts/sm/GCUtils.c
=====================================
@@ -26,38 +26,15 @@
#include "WSDeque.h"
#endif
-#if defined(THREADED_RTS)
-SpinLock gc_alloc_block_sync;
-#endif
-
static void push_todo_block(bdescr *bd, gen_workspace *ws);
-bdescr* allocGroup_sync(uint32_t n)
-{
- bdescr *bd;
- uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n)
-{
- bdescr *bd;
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
static uint32_t
-allocBlocks_sync(uint32_t n, bdescr **hd)
+allocBlocks_lock(uint32_t n, bdescr **hd)
{
bdescr *bd;
uint32_t i;
uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bd = allocLargeChunkOnNode(node,1,n);
// NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
// fragmentation-friendly way.
@@ -70,27 +47,11 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
bd[n-1].link = NULL;
// We have to hold the lock until we've finished fiddling with the metadata,
// otherwise the block allocator can get confused.
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
*hd = bd;
return n;
}
-void
-freeChain_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeChain(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
-void
-freeGroup_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeGroup(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
/* -----------------------------------------------------------------------------
Workspace utilities
-------------------------------------------------------------------------- */
@@ -303,7 +264,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
// object. However, if the object we're copying is
// larger than a block, then we might have an empty
// block here.
- freeGroup_sync(bd);
+ freeGroup_lock(bd);
} else {
push_scanned_block(bd, ws);
}
@@ -343,14 +304,14 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
else
{
if (size > BLOCK_SIZE_W) {
- bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_))
+ bd = allocGroup_lock((W_)BLOCK_ROUND_UP(size*sizeof(W_))
/ BLOCK_SIZE);
} else {
if (gct->free_blocks) {
bd = gct->free_blocks;
gct->free_blocks = bd->link;
} else {
- allocBlocks_sync(16, &bd);
+ allocBlocks_lock(16, &bd);
gct->free_blocks = bd->link;
}
}
=====================================
rts/sm/GCUtils.h
=====================================
@@ -17,22 +17,6 @@
#include "BeginPrivate.h"
-bdescr* allocGroup_sync(uint32_t n);
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n);
-
-INLINE_HEADER bdescr *allocBlock_sync(void)
-{
- return allocGroup_sync(1);
-}
-
-INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node)
-{
- return allocGroupOnNode_sync(node,1);
-}
-
-void freeChain_sync(bdescr *bd);
-void freeGroup_sync(bdescr *bd);
-
void push_scanned_block (bdescr *bd, gen_workspace *ws);
StgPtr todo_block_full (uint32_t size, gen_workspace *ws);
StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size);
@@ -62,7 +46,7 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)
bd = gct->mut_lists[gen_no];
if (bd->free >= bd->start + BLOCK_SIZE_W) {
bdescr *new_bd;
- new_bd = allocBlock_sync();
+ new_bd = allocBlock_lock();
new_bd->link = bd;
bd = new_bd;
gct->mut_lists[gen_no] = bd;
=====================================
rts/sm/HeapAlloc.h
=====================================
@@ -10,6 +10,8 @@
#include "BeginPrivate.h"
+#include "Storage.h"
+
/* -----------------------------------------------------------------------------
The HEAP_ALLOCED() test.
@@ -210,9 +212,9 @@ StgBool HEAP_ALLOCED_GC(const void *p)
} else {
// putting the rest out of line turned out to be a slight
// performance improvement:
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK; // TODO: this may be too expensive
b = HEAP_ALLOCED_miss(mblock,p);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
return b;
}
}
=====================================
rts/sm/MarkStack.h
=====================================
@@ -32,7 +32,7 @@ push_mark_stack(StgPtr p)
}
else
{
- bd = allocBlock_sync();
+ bd = allocBlock_lock();
bd->link = mark_stack_bd;
bd->u.back = NULL;
mark_stack_bd->u.back = bd; // double-link the new block on
=====================================
rts/sm/NonMoving.c
=====================================
@@ -598,14 +598,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
// Nothing in the free list, allocate a new segment...
if (ret == NULL) {
- // Take gc spinlock: another thread may be scavenging a moving
- // generation and call `todo_block_full`
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
// See Note [Live data accounting in nonmoving collector].
oldest_gen->n_blocks += bd->blocks;
oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
for (StgWord32 i = 0; i < bd->blocks; ++i) {
initBdescr(&bd[i], oldest_gen, oldest_gen);
@@ -668,7 +664,7 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
}
}
-/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
+/* Allocate a block in the nonmoving heap. sz is in words */
GNUC_ATTR_HOT
void *nonmovingAllocate(Capability *cap, StgWord sz)
{
@@ -708,7 +704,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// there are no active segments, allocate new segment
if (new_current == NULL) {
+ ACQUIRE_SM_LOCK;
new_current = nonmovingAllocSegment(cap->node);
+ RELEASE_SM_LOCK;
nonmovingInitSegment(new_current, log_block_size);
}
@@ -791,14 +789,13 @@ void nonmovingExit(void)
/*
* Assumes that no garbage collector or mutator threads are running to safely
* resize the nonmoving_allocators.
- *
- * Must hold sm_mutex.
*/
void nonmovingAddCapabilities(uint32_t new_n_caps)
{
unsigned int old_n_caps = nonmovingHeap.n_caps;
struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
+ ACQUIRE_SM_LOCK;
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
struct NonmovingAllocator *old = allocs[i];
allocs[i] = alloc_nonmoving_allocator(new_n_caps);
@@ -820,6 +817,7 @@ void nonmovingAddCapabilities(uint32_t new_n_caps)
}
}
nonmovingHeap.n_caps = new_n_caps;
+ RELEASE_SM_LOCK;
}
void nonmovingClearBitmap(struct NonmovingSegment *seg)
@@ -844,13 +842,14 @@ static void nonmovingPrepareMark(void)
struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
// Update current segments' snapshot pointers
- for (uint32_t cap_n = 0; cap_n < getNumCapabilities(); ++cap_n) {
+ for (uint32_t cap_n = 0; cap_n < nonmovingHeap.n_caps; ++cap_n) {
struct NonmovingSegment *seg = alloca->current[cap_n];
nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
}
// Save the filled segments for later processing during the concurrent
// mark phase.
+ ASSERT(alloca->saved_filled == NULL);
alloca->saved_filled = alloca->filled;
alloca->filled = NULL;
@@ -926,6 +925,7 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
ASSERT(n_nonmoving_marked_compact_blocks == 0);
MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
+ mark_queue->blocks = NULL;
initMarkQueue(mark_queue);
current_mark_queue = mark_queue;
@@ -1090,6 +1090,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
seg->link = nonmovingHeap.sweep_list;
nonmovingHeap.sweep_list = filled;
}
+ nonmovingHeap.allocators[alloca_idx]->saved_filled = NULL;
}
// Mark Weak#s
@@ -1183,7 +1184,8 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmoving_old_threads = END_TSO_QUEUE;
}
- nonmoving_weak_ptr_list = nonmoving_old_weak_ptr_list;
+ // At this point point any weak that remains on nonmoving_old_weak_ptr_list
+ // has a dead key.
nonmoving_old_weak_ptr_list = NULL;
// Prune spark lists
@@ -1236,6 +1238,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
#if defined(THREADED_RTS) && defined(NONCONCURRENT_SWEEP)
#if defined(DEBUG)
+ check_gc();
checkNonmovingHeap(&nonmovingHeap);
checkSanity(true, true);
#endif
@@ -1293,7 +1296,7 @@ void assert_in_nonmoving_heap(StgPtr p)
for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
// Search current segments
- for (uint32_t cap_idx = 0; cap_idx < getNumCapabilities(); ++cap_idx) {
+ for (uint32_t cap_idx = 0; cap_idx < nonmovingHeap.n_caps; ++cap_idx) {
struct NonmovingSegment *seg = alloca->current[cap_idx];
if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
return;
@@ -1365,7 +1368,7 @@ void nonmovingPrintAllocator(struct NonmovingAllocator *alloc)
debugBelch("%p ", (void*)seg);
}
debugBelch("\nCurrent segments:\n");
- for (uint32_t i = 0; i < getNumCapabilities(); ++i) {
+ for (uint32_t i = 0; i < nonmovingHeap.n_caps; ++i) {
debugBelch("%p ", alloc->current[i]);
}
debugBelch("\n");
@@ -1376,7 +1379,7 @@ void locate_object(P_ obj)
// Search allocators
for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
- for (uint32_t cap = 0; cap < getNumCapabilities(); ++cap) {
+ for (uint32_t cap = 0; cap < nonmovingHeap.n_caps; ++cap) {
struct NonmovingSegment *seg = alloca->current[cap];
if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg);
=====================================
rts/sm/NonMovingMark.c
=====================================
@@ -37,6 +37,7 @@ static void trace_PAP_payload (MarkQueue *queue,
StgClosure *fun,
StgClosure **payload,
StgWord size);
+static bool isNonmovingWeak(StgWeak *weak);
// How many Array# entries to add to the mark queue at once?
#define MARK_ARRAY_CHUNK_LENGTH 128
@@ -270,6 +271,7 @@ static void nonmovingAddUpdRemSetBlocks_(MarkQueue *rset)
bdescr *end = start;
while (end->link != NULL)
end = end->link;
+ rset->blocks = NULL;
// add the blocks to the global remembered set
ACQUIRE_LOCK(&upd_rem_set_lock);
@@ -291,10 +293,8 @@ static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
nonmovingAddUpdRemSetBlocks_(rset);
// Reset the state of the remembered set.
- ACQUIRE_SM_LOCK;
init_mark_queue_(rset);
rset->is_upd_rem_set = true;
- RELEASE_SM_LOCK;
}
/*
@@ -468,9 +468,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
}
/* A variant of push to be used by the minor GC when it encounters a reference
- * to an object in the non-moving heap. In contrast to the other push
- * operations this uses the gc_alloc_block_sync spinlock instead of the
- * SM_LOCK to allocate new blocks in the event that the mark queue is full.
+ * to an object in the non-moving heap.
*/
void
markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
@@ -491,13 +489,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
// allocate a fresh block.
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
bd->link = q->blocks;
q->blocks = bd;
q->top = (MarkQueueBlock *) bd->start;
q->top->head = 0;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
}
MarkQueueEnt ent = {
@@ -647,6 +645,16 @@ void updateRemembSetPushThunkEager(Capability *cap,
}
break;
}
+ case THUNK_SELECTOR:
+ {
+ StgSelector *sel = (StgSelector *) thunk;
+ if (check_in_nonmoving_heap(sel->selectee)) {
+ // Don't bother to push origin; it makes the barrier needlessly
+ // expensive with little benefit.
+ push_closure(queue, sel->selectee, NULL);
+ }
+ break;
+ }
case AP:
{
StgAP *ap = (StgAP *) thunk;
@@ -656,9 +664,11 @@ void updateRemembSetPushThunkEager(Capability *cap,
trace_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
break;
}
- case THUNK_SELECTOR:
+ // We may end up here if a thunk update races with another update.
+ // In this case there is nothing to do as the other thread will have
+ // already pushed the updated thunk's free variables to the update
+ // remembered set.
case BLACKHOLE:
- // TODO: This is right, right?
break;
// The selector optimization performed by the nonmoving mark may have
// overwritten a thunk which we are updating with an indirection.
@@ -916,7 +926,8 @@ static MarkQueueEnt markQueuePop (MarkQueue *q)
/* Must hold sm_mutex. */
static void init_mark_queue_ (MarkQueue *queue)
{
- bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ bdescr *bd = allocGroup_lock(MARK_QUEUE_BLOCKS);
+ ASSERT(queue->blocks == NULL);
queue->blocks = bd;
queue->top = (MarkQueueBlock *) bd->start;
queue->top->head = 0;
@@ -926,14 +937,12 @@ static void init_mark_queue_ (MarkQueue *queue)
#endif
}
-/* Must hold sm_mutex. */
void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
queue->is_upd_rem_set = false;
}
-/* Must hold sm_mutex. */
void nonmovingInitUpdRemSet (UpdRemSet *rset)
{
init_mark_queue_(&rset->queue);
@@ -1522,10 +1531,12 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
break;
}
+ case WEAK:
+ ASSERT(isNonmovingWeak((StgWeak*) p));
+ // fallthrough
gen_obj:
case CONSTR:
case CONSTR_NOCAF:
- case WEAK:
case PRIM:
{
for (StgWord i = 0; i < info->layout.payload.ptrs; i++) {
@@ -1582,8 +1593,15 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
}
case THUNK_SELECTOR:
- nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
+ {
+ StgSelector *sel = (StgSelector *) p;
+ // We may be able to evaluate this selector which may render the
+ // selectee unreachable. However, we must mark the selectee regardless
+ // to satisfy the snapshot invariant.
+ PUSH_FIELD(sel, selectee);
+ nonmoving_eval_thunk_selector(queue, sel, origin);
break;
+ }
case AP_STACK: {
StgAP_STACK *ap = (StgAP_STACK *)p;
@@ -1915,6 +1933,17 @@ void nonmovingMarkWeakPtrList (struct MarkQueue_ *queue)
}
}
+static bool isNonmovingWeak(StgWeak *weak)
+{
+ for (StgWeak *w = nonmoving_old_weak_ptr_list; w != NULL; w = w->link) {
+ if (w == weak) return true;
+ }
+ for (StgWeak *w = nonmoving_weak_ptr_list; w != NULL; w = w->link) {
+ if (w == weak) return true;
+ }
+ return false;
+}
+
// Non-moving heap variant of `tidyWeakList`
bool nonmovingTidyWeaks (struct MarkQueue_ *queue)
{
@@ -1947,7 +1976,7 @@ bool nonmovingTidyWeaks (struct MarkQueue_ *queue)
*last_w = w->link;
next_w = w->link;
- // and put it on the weak ptr list
+ // and put it on nonmoving_weak_ptr_list
w->link = nonmoving_weak_ptr_list;
nonmoving_weak_ptr_list = w;
} else {
=====================================
rts/sm/NonMovingMark.h
=====================================
@@ -9,10 +9,11 @@
#pragma once
#include "Task.h"
-#include "NonMoving.h"
#include "BeginPrivate.h"
+struct NonMovingHeap;
+
enum EntryType {
NULL_ENTRY = 0,
MARK_CLOSURE = 1,
@@ -63,7 +64,7 @@ INLINE_HEADER enum EntryType nonmovingMarkQueueEntryType(MarkQueueEnt *ent)
{
uintptr_t tag = (uintptr_t) ent->null_entry.p & TAG_MASK;
ASSERT(tag <= MARK_ARRAY);
- return tag;
+ return (enum EntryType) tag;
}
typedef struct {
@@ -155,7 +156,7 @@ void markQueueAddRoot(MarkQueue* q, StgClosure** root);
void initMarkQueue(MarkQueue *queue);
void freeMarkQueue(MarkQueue *queue);
-void nonmovingMark(struct MarkQueue_ *restrict queue);
+void nonmovingMark(struct MarkQueue_ *__restrict__ queue);
void nonmovingMarkWeakPtrList(struct MarkQueue_ *queue);
bool nonmovingTidyWeaks(struct MarkQueue_ *queue);
=====================================
rts/sm/NonMovingShortcut.c
=====================================
@@ -10,6 +10,7 @@
#include "Rts.h"
#include "GC.h"
#include "SMPClosureOps.h"
+#include "NonMoving.h"
#include "NonMovingMark.h"
#include "NonMovingShortcut.h"
#include "Printer.h"
=====================================
rts/sm/Sanity.c
=====================================
@@ -639,6 +639,7 @@ void checkNonmovingHeap (const struct NonmovingHeap *heap)
for (unsigned int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
const struct NonmovingAllocator *alloc = heap->allocators[i];
checkNonmovingSegments(alloc->filled);
+ checkNonmovingSegments(alloc->saved_filled);
checkNonmovingSegments(alloc->active);
for (unsigned int cap=0; cap < getNumCapabilities(); cap++) {
checkNonmovingSegments(alloc->current[cap]);
@@ -1071,6 +1072,7 @@ findMemoryLeak (void)
for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i];
markNonMovingSegments(alloc->filled);
+ markNonMovingSegments(alloc->saved_filled);
markNonMovingSegments(alloc->active);
for (j = 0; j < getNumCapabilities(); j++) {
markNonMovingSegments(alloc->current[j]);
=====================================
rts/sm/Sanity.h
=====================================
@@ -19,6 +19,8 @@
# define MAX_SLOTS 100000
# endif
+struct NonmovingHeap;
+
/* debugging routines */
void checkSanity ( bool after_gc, bool major_gc );
void checkNurserySanity ( nursery *nursery );
=====================================
rts/sm/Scav.c
=====================================
@@ -1730,7 +1730,7 @@ scavenge_capability_mut_lists (Capability *cap)
if (RtsFlags.GcFlags.useNonmoving && major_gc) {
uint32_t g = oldest_gen->no;
scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
return;
}
@@ -1743,7 +1743,7 @@ scavenge_capability_mut_lists (Capability *cap)
*/
for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) {
scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
}
}
=====================================
rts/sm/Storage.c
=====================================
@@ -193,14 +193,13 @@ initStorage (void)
initMutex(&sm_mutex);
#endif
- ACQUIRE_SM_LOCK;
-
/* allocate generation info array */
generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct generation_),
"initStorage: gens");
/* Initialise all generations */
+ ACQUIRE_SM_LOCK;
for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
initGeneration(&generations[g], g);
}
@@ -214,16 +213,11 @@ initStorage (void)
generations[g].to = &generations[g+1];
}
oldest_gen->to = oldest_gen;
+ RELEASE_SM_LOCK;
// Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
nonmovingInit();
-#if defined(THREADED_RTS)
- // nonmovingAddCapabilities allocates segments, which requires taking the gc
- // sync lock, so initialize it before nonmovingAddCapabilities
- initSpinLock(&gc_alloc_block_sync);
-#endif
-
if (RtsFlags.GcFlags.useNonmoving)
nonmovingAddCapabilities(getNumCapabilities());
@@ -261,8 +255,6 @@ initStorage (void)
IF_DEBUG(gc, statDescribeGens());
- RELEASE_SM_LOCK;
-
traceInitEvent(traceHeapInfo);
}
@@ -314,17 +306,20 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
assignNurseriesToCapabilities(from,to);
// allocate a block for each mut list
+ ACQUIRE_SM_LOCK;
for (n = from; n < to; n++) {
for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
getCapability(n)->mut_lists[g] =
allocBlockOnNode(capNoToNumaNode(n));
}
}
+ RELEASE_SM_LOCK;
// Initialize NonmovingAllocators and UpdRemSets
if (RtsFlags.GcFlags.useNonmoving) {
nonmovingAddCapabilities(to);
- for (i = 0; i < to; ++i) {
+ for (i = from; i < to; i++) {
+ getCapability(i)->upd_rem_set.queue.blocks = NULL;
nonmovingInitUpdRemSet(&getCapability(i)->upd_rem_set);
}
}
@@ -564,9 +559,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// Allocate the blackhole indirection closure
if (RtsFlags.GcFlags.useNonmoving) {
// See Note [Static objects under the nonmoving collector].
- ACQUIRE_SM_LOCK;
bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
- RELEASE_SM_LOCK;
recordMutableCap((StgClosure*)bh,
regTableToCapability(reg), oldest_gen->no);
} else {
@@ -724,6 +717,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
// automatic prefetching works across nursery blocks. This is a
// tiny optimisation (~0.5%), but it's free.
+ ACQUIRE_SM_LOCK;
while (blocks > 0) {
n = stg_min(BLOCKS_PER_MBLOCK, blocks);
// allocLargeChunk will prefer large chunks, but will pick up
@@ -759,6 +753,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
tail = &bd[0];
}
+ RELEASE_SM_LOCK;
return &bd[0];
}
@@ -878,7 +873,7 @@ resizeNurseriesEach (W_ blocks)
next_bd = bd->link;
next_bd->u.back = NULL;
nursery_blocks -= bd->blocks; // might be a large block
- freeGroup(bd);
+ freeGroup_lock(bd);
bd = next_bd;
}
nursery->blocks = bd;
@@ -1299,9 +1294,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
if (bd == NULL) {
// The pinned block list is empty: allocate a fresh block (we can't fail
// here).
- ACQUIRE_SM_LOCK;
bd = allocNursery(cap->node, NULL, PINNED_EMPTY_SIZE);
- RELEASE_SM_LOCK;
}
// Bump up the nursery pointer to avoid the pathological situation
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1fdffe157170bb42741c44bb03d44f61ed5f8822...f095454b5bcf0d604e62c784931f80b5d7f0fb88
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1fdffe157170bb42741c44bb03d44f61ed5f8822...f095454b5bcf0d604e62c784931f80b5d7f0fb88
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221204/6c1957d2/attachment-0001.html>
More information about the ghc-commits
mailing list