[Git][ghc/ghc][wip/tsan/storage] 7 commits: rts/GC: Use atomics
Ben Gamari
gitlab at gitlab.haskell.org
Fri Oct 30 04:41:28 UTC 2020
Ben Gamari pushed to branch wip/tsan/storage at Glasgow Haskell Compiler / GHC
Commits:
aad1f803 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts/GC: Use atomics
- - - - -
d0bc0517 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts: Use RELEASE ordering in unlockClosure
- - - - -
d44f5232 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts/Storage: Accept races on heap size counters
- - - - -
4e4a7386 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts: Join to concurrent mark thread during shutdown
Previously we would take all capabilities but fail to join on the thread
itself, potentially resulting in a leaked thread.
- - - - -
a80cc857 by GHC GitLab CI at 2020-10-30T00:41:14-04:00
rts: Fix race in GC CPU time accounting
Ensure that the GC leader synchronizes with workers before calling
stat_endGC.
- - - - -
7dcd5f53 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts/SpinLock: Separate out slow path
Not only is this in general a good idea, but it turns out that GCC
unrolls the retry loop, resulting is massive code bloat in critical
parts of the RTS (e.g. `evacuate`).
- - - - -
96f8bde7 by Ben Gamari at 2020-10-30T00:41:14-04:00
rts: Use relaxed ordering on spinlock counters
- - - - -
21 changed files:
- includes/rts/OSThreads.h
- includes/rts/SpinLock.h
- includes/rts/storage/GC.h
- includes/stg/SMP.h
- rts/Capability.h
- rts/SMPClosureOps.h
- rts/Schedule.c
- + rts/SpinLock.c
- rts/posix/OSThreads.c
- rts/rts.cabal.in
- rts/sm/Evac.c
- rts/sm/GC.c
- rts/sm/GCAux.c
- rts/sm/GCUtils.c
- rts/sm/GCUtils.h
- rts/sm/MarkWeak.c
- rts/sm/NonMoving.c
- rts/sm/Scav.c
- rts/sm/Storage.c
- rts/sm/Storage.h
- rts/win32/OSThreads.c
Changes:
=====================================
includes/rts/OSThreads.h
=====================================
@@ -164,7 +164,8 @@ typedef void* OSThreadProcAttr OSThreadProc(void *);
extern int createOSThread ( OSThreadId* tid, char *name,
OSThreadProc *startProc, void *param);
extern bool osThreadIsAlive ( OSThreadId id );
-extern void interruptOSThread (OSThreadId id);
+extern void interruptOSThread ( OSThreadId id );
+extern void joinOSThread ( OSThreadId id );
//
// Condition Variables
=====================================
includes/rts/SpinLock.h
=====================================
@@ -39,19 +39,14 @@ typedef struct SpinLock_
#define IF_PROF_SPIN(x)
#endif
+void acquire_spin_lock_slow_path(SpinLock * p);
+
// acquire spin lock
INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
{
- do {
- for (uint32_t i = 0; i < SPIN_COUNT; i++) {
- StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
- if (r != 0) return;
- IF_PROF_SPIN(__atomic_fetch_add(&p->spin, 1, __ATOMIC_RELAXED));
- busy_wait_nop();
- }
- IF_PROF_SPIN(__atomic_fetch_add(&p->yield, 1, __ATOMIC_RELAXED));
- yieldThread();
- } while (1);
+ StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+ if (RTS_UNLIKELY(r != 0))
+ acquire_spin_lock_slow_path(p);
}
// release spin lock
=====================================
includes/rts/storage/GC.h
=====================================
@@ -247,9 +247,9 @@ extern bool keepCAFs;
INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
{
- bd->gen = gen;
- bd->gen_no = gen->no;
- bd->dest_no = dest->no;
+ RELAXED_STORE(&bd->gen, gen);
+ RELAXED_STORE(&bd->gen_no, gen->no);
+ RELAXED_STORE(&bd->dest_no, dest->no);
#if !IN_STG_CODE
/* See Note [RtsFlags is a pointer in STG code] */
=====================================
includes/stg/SMP.h
=====================================
@@ -440,6 +440,7 @@ load_load_barrier(void) {
// Relaxed atomic operations.
#define RELAXED_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED)
#define RELAXED_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELAXED)
+#define RELAXED_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED)
// Acquire/release atomic operations
#define ACQUIRE_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE)
@@ -466,6 +467,7 @@ EXTERN_INLINE void load_load_barrier () {} /* nothing */
// Relaxed atomic operations
#define RELAXED_LOAD(ptr) *ptr
#define RELAXED_STORE(ptr,val) *ptr = val
+#define RELAXED_ADD(ptr,val) *ptr += val
// Acquire/release atomic operations
#define ACQUIRE_LOAD(ptr) *ptr
=====================================
rts/Capability.h
=====================================
@@ -419,14 +419,16 @@ recordMutableCap (const StgClosure *p, Capability *cap, uint32_t gen)
// ASSERT(cap->running_task == myTask());
// NO: assertion is violated by performPendingThrowTos()
bd = cap->mut_lists[gen];
- if (bd->free >= bd->start + BLOCK_SIZE_W) {
+ if (RELAXED_LOAD(&bd->free) >= bd->start + BLOCK_SIZE_W) {
bdescr *new_bd;
new_bd = allocBlockOnNode_lock(cap->node);
new_bd->link = bd;
+ new_bd->free = new_bd->start;
bd = new_bd;
cap->mut_lists[gen] = bd;
}
- *bd->free++ = (StgWord)p;
+ RELAXED_STORE(bd->free, (StgWord) p);
+ NONATOMIC_ADD(&bd->free, 1);
}
EXTERN_INLINE void
=====================================
rts/SMPClosureOps.h
=====================================
@@ -119,9 +119,8 @@ tryLockClosure(StgClosure *p)
EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info)
{
- // This is a strictly ordered write, so we need a write_barrier():
- write_barrier();
- p->header.info = info;
+ // This is a strictly ordered write, so we need a RELEASE ordering.
+ RELEASE_STORE(&p->header.info, info);
}
#endif /* CMINUSMINUS */
=====================================
rts/Schedule.c
=====================================
@@ -433,7 +433,7 @@ run_thread:
cap->interrupt = 0;
cap->in_haskell = true;
- cap->idle = 0;
+ RELAXED_STORE(&cap->idle, false);
dirty_TSO(cap,t);
dirty_STACK(cap,t->stackobj);
@@ -1780,7 +1780,7 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
debugTrace(DEBUG_sched, "%d idle caps", n_idle_caps);
for (i=0; i < n_capabilities; i++) {
- capabilities[i]->idle++;
+ NONATOMIC_ADD(&capabilities[i]->idle, 1);
}
// For all capabilities participating in this GC, wait until
=====================================
rts/SpinLock.c
=====================================
@@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * Spin locks
+ *
+ * These are simple spin-only locks as opposed to Mutexes which
+ * probably spin for a while before blocking in the kernel. We use
+ * these when we are sure that all our threads are actively running on
+ * a CPU, eg. in the GC.
+ *
+ * TODO: measure whether we really need these, or whether Mutexes
+ * would do (and be a bit safer if a CPU becomes loaded).
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "Rts.h"
+
+void acquire_spin_lock_slow_path(SpinLock * p)
+{
+ do {
+ for (uint32_t i = 0; i < SPIN_COUNT; i++) {
+ StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+ if (r != 0) return;
+ IF_PROF_SPIN(RELAXED_ADD(&p->spin, 1));
+ busy_wait_nop();
+ }
+ IF_PROF_SPIN(RELAXED_ADD(&p->yield, 1));
+ yieldThread();
+ } while (1);
+}
=====================================
rts/posix/OSThreads.c
=====================================
@@ -398,6 +398,14 @@ interruptOSThread (OSThreadId id)
pthread_kill(id, SIGPIPE);
}
+void
+joinOSThread (OSThreadId id)
+{
+ if (pthread_join(id, NULL) != 0) {
+ sysErrorBelch("joinOSThread: error %d", errno);
+ }
+}
+
KernelThreadId kernelThreadId (void)
{
#if defined(linux_HOST_OS)
=====================================
rts/rts.cabal.in
=====================================
@@ -462,6 +462,7 @@ library
STM.c
Schedule.c
Sparks.c
+ SpinLock.c
StableName.c
StablePtr.c
StaticPtrTable.c
=====================================
rts/sm/Evac.c
=====================================
@@ -171,7 +171,11 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
#endif
return evacuate(p); // does the failed_to_evac stuff
} else {
- *p = TAG_CLOSURE(tag,(StgClosure*)to);
+ // This doesn't need to have RELEASE ordering since we are guaranteed
+ // to scavenge the to-space object on the current core therefore
+ // no-one else will follow this pointer (FIXME: Is this true in
+ // light of the selector optimization?).
+ RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to));
}
}
#else
@@ -206,9 +210,9 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
// if somebody else reads the forwarding pointer, we better make
// sure there's a closure at the end of it.
- write_barrier();
- *p = TAG_CLOSURE(tag,(StgClosure*)to);
- src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
+ RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to));
+ RELEASE_STORE(&src->header.info, \
+ (const StgInfoTable *)MK_FORWARDING_PTR(to));
// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
// __builtin_prefetch(to + size + 2, 1);
@@ -245,7 +249,7 @@ spin:
goto spin;
}
if (IS_FORWARDING_PTR(info)) {
- src->header.info = (const StgInfoTable *)info;
+ RELEASE_STORE(&src->header.info, (const StgInfoTable *)info);
evacuate(p); // does the failed_to_evac stuff
return false;
}
@@ -261,9 +265,8 @@ spin:
to[i] = from[i];
}
- write_barrier();
- *p = (StgClosure *)to;
- src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
+ RELEASE_STORE(p, (StgClosure *) to);
+ RELEASE_STORE(&src->header.info, (const StgInfoTable*)MK_FORWARDING_PTR(to));
#if defined(PROFILING)
// We store the size of the just evacuated object in the LDV word so that
@@ -306,12 +309,12 @@ evacuate_large(StgPtr p)
gen_workspace *ws;
bd = Bdescr(p);
- gen = bd->gen;
- gen_no = bd->gen_no;
+ gen = RELAXED_LOAD(&bd->gen);
+ gen_no = RELAXED_LOAD(&bd->gen_no);
ACQUIRE_SPIN_LOCK(&gen->sync);
// already evacuated?
- if (bd->flags & BF_EVACUATED) {
+ if (RELAXED_LOAD(&bd->flags) & BF_EVACUATED) {
/* Don't forget to set the gct->failed_to_evac flag if we didn't get
* the desired destination (see comments in evacuate()).
*/
@@ -344,9 +347,9 @@ evacuate_large(StgPtr p)
ws = &gct->gens[new_gen_no];
new_gen = &generations[new_gen_no];
- bd->flags |= BF_EVACUATED;
+ __atomic_fetch_or(&bd->flags, BF_EVACUATED, __ATOMIC_ACQ_REL);
if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
- bd->flags |= BF_NONMOVING;
+ __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_ACQ_REL);
}
initBdescr(bd, new_gen, new_gen->to);
@@ -354,7 +357,7 @@ evacuate_large(StgPtr p)
// these objects, because they aren't allowed to contain any outgoing
// pointers. For these blocks, we skip the scavenge stage and put
// them straight on the scavenged_large_objects list.
- if (bd->flags & BF_PINNED) {
+ if (RELAXED_LOAD(&bd->flags) & BF_PINNED) {
ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);
if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); }
@@ -389,7 +392,7 @@ evacuate_static_object (StgClosure **link_field, StgClosure *q)
return;
}
- StgWord link = (StgWord)*link_field;
+ StgWord link = RELAXED_LOAD((StgWord*) link_field);
// See Note [STATIC_LINK fields] for how the link field bits work
if (((link & STATIC_BITS) | prev_static_flag) != 3) {
@@ -435,7 +438,7 @@ evacuate_compact (StgPtr p)
bd = Bdescr((StgPtr)str);
gen_no = bd->gen_no;
- if (bd->flags & BF_NONMOVING) {
+ if (RELAXED_LOAD(&bd->flags) & BF_NONMOVING) {
// We may have evacuated the block to the nonmoving generation. If so
// we need to make sure it is added to the mark queue since the only
// reference to it may be from the moving heap.
@@ -500,7 +503,7 @@ evacuate_compact (StgPtr p)
// in the GC, and that should never see blocks other than the first)
bd->flags |= BF_EVACUATED;
if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
- bd->flags |= BF_NONMOVING;
+ __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_RELAXED);
}
initBdescr(bd, new_gen, new_gen->to);
@@ -581,7 +584,7 @@ evacuate(StgClosure **p)
const StgInfoTable *info;
StgWord tag;
- q = *p;
+ q = RELAXED_LOAD(p);
loop:
/* The tag and the pointer are split, to be merged after evacing */
@@ -638,10 +641,11 @@ loop:
bd = Bdescr((P_)q);
- if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) {
+ uint16_t flags = RELAXED_LOAD(&bd->flags);
+ if ((flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) {
// Pointer to non-moving heap. Non-moving heap is collected using
// mark-sweep so this object should be marked and then retained in sweep.
- if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) {
+ if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) {
// NOTE: large objects in nonmoving heap are also marked with
// BF_NONMOVING. Those are moved to scavenged_large_objects list in
// mark phase.
@@ -656,11 +660,11 @@ loop:
// happen often, but allowing it makes certain things a bit
// easier; e.g. scavenging an object is idempotent, so it's OK to
// have an object on the mutable list multiple times.
- if (bd->flags & BF_EVACUATED) {
+ if (flags & BF_EVACUATED) {
// We aren't copying this object, so we have to check
// whether it is already in the target generation. (this is
// the write barrier).
- if (bd->gen_no < gct->evac_gen_no) {
+ if (RELAXED_LOAD(&bd->gen_no) < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
@@ -671,20 +675,20 @@ loop:
// right thing for objects that are half way in the middle of the first
// block of a compact (and would be treated as large objects even though
// they are not)
- if (bd->flags & BF_COMPACT) {
+ if (flags & BF_COMPACT) {
evacuate_compact((P_)q);
return;
}
/* evacuate large objects by re-linking them onto a different list.
*/
- if (bd->flags & BF_LARGE) {
+ if (flags & BF_LARGE) {
evacuate_large((P_)q);
// We may have evacuated the block to the nonmoving generation. If so
// we need to make sure it is added to the mark queue since the only
// reference to it may be from the moving heap.
- if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
+ if (major_gc && flags & BF_NONMOVING && !deadlock_detect_gc) {
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
}
return;
@@ -702,7 +706,7 @@ loop:
gen_no = bd->dest_no;
- info = q->header.info;
+ info = ACQUIRE_LOAD(&q->header.info);
if (IS_FORWARDING_PTR(info))
{
/* Already evacuated, just return the forwarding address.
@@ -722,9 +726,12 @@ loop:
* check if gen is too low.
*/
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
- *p = TAG_CLOSURE(tag,e);
+ RELAXED_STORE(p, TAG_CLOSURE(tag,e));
if (gen_no < gct->evac_gen_no) { // optimisation
- if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) {
+ // The ACQUIRE here is necessary to ensure that we see gen_no if the
+ // evacuted object lives in a block newly-allocated by a GC thread on
+ // another core.
+ if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
@@ -752,15 +759,17 @@ loop:
if (info == Czh_con_info &&
// unsigned, so always true: (StgChar)w >= MIN_CHARLIKE &&
(StgChar)w <= MAX_CHARLIKE) {
- *p = TAG_CLOSURE(tag,
- (StgClosure *)CHARLIKE_CLOSURE((StgChar)w)
- );
+ RELAXED_STORE(p, \
+ TAG_CLOSURE(tag, \
+ (StgClosure *)CHARLIKE_CLOSURE((StgChar)w)
+ ));
}
else if (info == Izh_con_info &&
(StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
- *p = TAG_CLOSURE(tag,
- (StgClosure *)INTLIKE_CLOSURE((StgInt)w)
- );
+ RELAXED_STORE(p, \
+ TAG_CLOSURE(tag, \
+ (StgClosure *)INTLIKE_CLOSURE((StgInt)w)
+ ));
}
else {
copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag);
@@ -814,10 +823,10 @@ loop:
const StgInfoTable *i;
r = ((StgInd*)q)->indirectee;
if (GET_CLOSURE_TAG(r) == 0) {
- i = r->header.info;
+ i = ACQUIRE_LOAD(&r->header.info);
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
- i = r->header.info;
+ i = ACQUIRE_LOAD(&r->header.info);
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
@@ -842,7 +851,7 @@ loop:
ASSERT(i != &stg_IND_info);
}
q = r;
- *p = r;
+ RELEASE_STORE(p, r);
goto loop;
}
@@ -868,8 +877,8 @@ loop:
case IND:
// follow chains of indirections, don't evacuate them
- q = ((StgInd*)q)->indirectee;
- *p = q;
+ q = RELAXED_LOAD(&((StgInd*)q)->indirectee);
+ RELAXED_STORE(p, q);
goto loop;
case RET_BCO:
@@ -983,11 +992,12 @@ evacuate_BLACKHOLE(StgClosure **p)
ASSERT(GET_CLOSURE_TAG(q) == 0);
bd = Bdescr((P_)q);
+ const uint16_t flags = RELAXED_LOAD(&bd->flags);
// blackholes can't be in a compact
- ASSERT((bd->flags & BF_COMPACT) == 0);
+ ASSERT((flags & BF_COMPACT) == 0);
- if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) {
+ if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) {
if (major_gc && !deadlock_detect_gc)
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
@@ -996,18 +1006,18 @@ evacuate_BLACKHOLE(StgClosure **p)
// blackholes *can* be in a large object: when raiseAsync() creates an
// AP_STACK the payload might be large enough to create a large object.
// See #14497.
- if (bd->flags & BF_LARGE) {
+ if (flags & BF_LARGE) {
evacuate_large((P_)q);
return;
}
- if (bd->flags & BF_EVACUATED) {
+ if (flags & BF_EVACUATED) {
if (bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
return;
}
- if (bd->flags & BF_MARKED) {
+ if (flags & BF_MARKED) {
if (!is_marked((P_)q,bd)) {
mark((P_)q,bd);
push_mark_stack((P_)q);
@@ -1015,13 +1025,13 @@ evacuate_BLACKHOLE(StgClosure **p)
return;
}
gen_no = bd->dest_no;
- info = q->header.info;
+ info = ACQUIRE_LOAD(&q->header.info);
if (IS_FORWARDING_PTR(info))
{
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
*p = e;
if (gen_no < gct->evac_gen_no) { // optimisation
- if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) {
+ if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
@@ -1090,13 +1100,11 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
// XXX we do not have BLACKHOLEs any more; replace with
// a THUNK_SELECTOR again. This will go into a loop if it is
// entered, and should result in a NonTermination exception.
- ((StgThunk *)p)->payload[0] = val;
- write_barrier();
- SET_INFO((StgClosure *)p, &stg_sel_0_upd_info);
+ RELAXED_STORE(&((StgThunk *)p)->payload[0], val);
+ SET_INFO_RELEASE((StgClosure *)p, &stg_sel_0_upd_info);
} else {
- ((StgInd *)p)->indirectee = val;
- write_barrier();
- SET_INFO((StgClosure *)p, &stg_IND_info);
+ RELAXED_STORE(&((StgInd *)p)->indirectee, val);
+ SET_INFO_RELEASE((StgClosure *)p, &stg_IND_info);
}
// For the purposes of LDV profiling, we have created an
@@ -1143,7 +1151,7 @@ selector_chain:
// save any space in any case, and updating with an indirection is
// trickier in a non-collected gen: we would have to update the
// mutable list.
- if (bd->flags & (BF_EVACUATED | BF_NONMOVING)) {
+ if (RELAXED_LOAD(&bd->flags) & (BF_EVACUATED | BF_NONMOVING)) {
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
*q = (StgClosure *)p;
// shortcut, behave as for: if (evac) evacuate(q);
@@ -1198,8 +1206,7 @@ selector_chain:
// need the write-barrier stuff.
// - undo the chain we've built to point to p.
SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr);
- write_barrier();
- *q = (StgClosure *)p;
+ RELEASE_STORE(q, (StgClosure *) p);
if (evac) evacuate(q);
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
return;
@@ -1225,7 +1232,7 @@ selector_loop:
// from-space during marking, for example. We rely on the property
// that evacuate() doesn't mind if it gets passed a to-space pointer.
- info = (StgInfoTable*)selectee->header.info;
+ info = RELAXED_LOAD((StgInfoTable**) &selectee->header.info);
if (IS_FORWARDING_PTR(info)) {
// We don't follow pointers into to-space; the constructor
@@ -1252,7 +1259,7 @@ selector_loop:
info->layout.payload.nptrs));
// Select the right field from the constructor
- StgClosure *val = selectee->payload[field];
+ StgClosure *val = RELAXED_LOAD(&selectee->payload[field]);
#if defined(PROFILING)
// For the purposes of LDV profiling, we have destroyed
@@ -1278,19 +1285,19 @@ selector_loop:
// evaluating until we find the real value, and then
// update the whole chain to point to the value.
val_loop:
- info_ptr = (StgWord)UNTAG_CLOSURE(val)->header.info;
+ info_ptr = ACQUIRE_LOAD((StgWord*) &UNTAG_CLOSURE(val)->header.info);
if (!IS_FORWARDING_PTR(info_ptr))
{
info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr);
switch (info->type) {
case IND:
case IND_STATIC:
- val = ((StgInd *)val)->indirectee;
+ val = RELAXED_LOAD(&((StgInd *)val)->indirectee);
goto val_loop;
case THUNK_SELECTOR:
// Use payload to make a list of thunk selectors, to be
// used in unchain_thunk_selectors
- ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector;
+ RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector);
prev_thunk_selector = p;
p = (StgSelector*)val;
goto selector_chain;
@@ -1298,7 +1305,7 @@ selector_loop:
break;
}
}
- ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector;
+ RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector);
prev_thunk_selector = p;
*q = val;
@@ -1320,22 +1327,22 @@ selector_loop:
case IND:
case IND_STATIC:
// Again, we might need to untag a constructor.
- selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee );
+ selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) );
goto selector_loop;
case BLACKHOLE:
{
StgClosure *r;
const StgInfoTable *i;
- r = ((StgInd*)selectee)->indirectee;
+ r = ACQUIRE_LOAD(&((StgInd*)selectee)->indirectee);
// establish whether this BH has been updated, and is now an
// indirection, as in evacuate().
if (GET_CLOSURE_TAG(r) == 0) {
- i = r->header.info;
+ i = ACQUIRE_LOAD(&r->header.info);
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
- i = r->header.info;
+ i = RELAXED_LOAD(&r->header.info);
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
@@ -1346,7 +1353,7 @@ selector_loop:
ASSERT(i != &stg_IND_info);
}
- selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee );
+ selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) );
goto selector_loop;
}
=====================================
rts/sm/GC.c
=====================================
@@ -537,37 +537,37 @@ GarbageCollect (uint32_t collect_gen,
const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
- copied += gc_threads[i]->copied;
+ copied += RELAXED_LOAD(&gc_threads[i]->copied);
}
for (i=0; i < n_gc_threads; i++) {
thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
debugTrace(DEBUG_gc," copied %ld",
- thread->copied * sizeof(W_));
+ RELAXED_LOAD(&thread->copied) * sizeof(W_));
debugTrace(DEBUG_gc," scanned %ld",
- thread->scanned * sizeof(W_));
+ RELAXED_LOAD(&thread->scanned) * sizeof(W_));
debugTrace(DEBUG_gc," any_work %ld",
- thread->any_work);
+ RELAXED_LOAD(&thread->any_work));
debugTrace(DEBUG_gc," no_work %ld",
- thread->no_work);
+ RELAXED_LOAD(&thread->no_work));
debugTrace(DEBUG_gc," scav_find_work %ld",
- thread->scav_find_work);
+ RELAXED_LOAD(&thread->scav_find_work));
#if defined(THREADED_RTS) && defined(PROF_SPIN)
- gc_spin_spin += thread->gc_spin.spin;
- gc_spin_yield += thread->gc_spin.yield;
- mut_spin_spin += thread->mut_spin.spin;
- mut_spin_yield += thread->mut_spin.yield;
+ gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin);
+ gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield);
+ mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin);
+ mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield);
#endif
- any_work += thread->any_work;
- no_work += thread->no_work;
- scav_find_work += thread->scav_find_work;
+ any_work += RELAXED_LOAD(&thread->any_work);
+ no_work += RELAXED_LOAD(&thread->no_work);
+ scav_find_work += RELAXED_LOAD(&thread->scav_find_work);
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);
par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
+ stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
}
}
if (n_gc_threads > 1) {
@@ -1130,7 +1130,7 @@ inc_running (void)
static StgWord
dec_running (void)
{
- ASSERT(gc_running_threads != 0);
+ ASSERT(RELAXED_LOAD(&gc_running_threads) != 0);
return atomic_dec(&gc_running_threads);
}
@@ -1140,7 +1140,7 @@ any_work (void)
int g;
gen_workspace *ws;
- gct->any_work++;
+ NONATOMIC_ADD(&gct->any_work, 1);
write_barrier();
@@ -1173,7 +1173,7 @@ any_work (void)
}
#endif
- gct->no_work++;
+ __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED);
#if defined(THREADED_RTS)
yieldThread();
#endif
@@ -1214,7 +1214,7 @@ loop:
debugTrace(DEBUG_gc, "%d GC threads still running", r);
- while (gc_running_threads != 0) {
+ while (SEQ_CST_LOAD(&gc_running_threads) != 0) {
// usleep(1);
if (any_work()) {
inc_running();
@@ -1251,7 +1251,7 @@ gcWorkerThread (Capability *cap)
// measurements more accurate on Linux, perhaps because it syncs
// the CPU time across the multiple cores. Without this, CPU time
// is heavily skewed towards GC rather than MUT.
- gct->wakeup = GC_THREAD_STANDING_BY;
+ SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY);
debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
ACQUIRE_SPIN_LOCK(&gct->gc_spin);
@@ -1278,10 +1278,13 @@ gcWorkerThread (Capability *cap)
// Wait until we're told to continue
RELEASE_SPIN_LOCK(&gct->gc_spin);
- gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
gct->thread_index);
stat_endGCWorker (cap, gct);
+ // This must come *after* stat_endGCWorker since it serves to
+ // synchronize us with the GC leader, which will later aggregate the
+ // GC statistics.
+ SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE);
ACQUIRE_SPIN_LOCK(&gct->mut_spin);
debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
@@ -1306,7 +1309,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
while(retry) {
for (i=0; i < n_threads; i++) {
if (i == me || idle_cap[i]) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
prodCapability(capabilities[i], cap->running_task);
}
}
@@ -1316,7 +1319,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
if (i == me || idle_cap[i]) continue;
write_barrier();
interruptCapability(capabilities[i]);
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
retry = true;
}
}
@@ -1373,10 +1376,10 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS,
if (i == me || idle_cap[i]) continue;
inc_running();
debugTrace(DEBUG_gc, "waking up gc thread %d", i);
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY)
barf("wakeup_gc_threads");
- gc_threads[i]->wakeup = GC_THREAD_RUNNING;
+ SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING);
ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
}
@@ -1397,9 +1400,8 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS,
for (i=0; i < n_gc_threads; i++) {
if (i == me || idle_cap[i]) continue;
- while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
+ while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) {
busy_wait_nop();
- write_barrier();
}
}
#endif
@@ -1414,10 +1416,10 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
uint32_t i;
for (i=0; i < n_threads; i++) {
if (i == me || idle_cap[i]) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ if (RELAXED_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE)
barf("releaseGCThreads");
- gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
+ RELAXED_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE);
ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
}
@@ -1433,7 +1435,7 @@ static void
stash_mut_list (Capability *cap, uint32_t gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
+ RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
}
/* ----------------------------------------------------------------------------
@@ -1459,9 +1461,11 @@ prepare_collected_gen (generation *gen)
// mutable list always has at least one block; this means we can avoid
// a check for NULL in recordMutable().
for (i = 0; i < n_capabilities; i++) {
- freeChain(capabilities[i]->mut_lists[g]);
- capabilities[i]->mut_lists[g] =
- allocBlockOnNode(capNoToNumaNode(i));
+ bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]);
+ freeChain(old);
+
+ bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
+ RELAXED_STORE(&capabilities[i]->mut_lists[g], new);
}
}
@@ -1675,7 +1679,7 @@ collect_pinned_object_blocks (void)
bdescr *last = NULL;
if (use_nonmoving && gen == oldest_gen) {
// Mark objects as belonging to the nonmoving heap
- for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+ for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) {
bd->flags |= BF_NONMOVING;
bd->gen = oldest_gen;
bd->gen_no = oldest_gen->no;
@@ -1694,8 +1698,8 @@ collect_pinned_object_blocks (void)
if (gen->large_objects != NULL) {
gen->large_objects->u.back = last;
}
- gen->large_objects = capabilities[n]->pinned_object_blocks;
- capabilities[n]->pinned_object_blocks = NULL;
+ g0->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks);
+ RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL);
}
}
}
=====================================
rts/sm/GCAux.c
=====================================
@@ -83,7 +83,7 @@ isAlive(StgClosure *p)
return p;
}
- info = q->header.info;
+ info = RELAXED_LOAD(&q->header.info);
if (IS_FORWARDING_PTR(info)) {
// alive!
=====================================
rts/sm/GCUtils.c
=====================================
@@ -249,8 +249,8 @@ todo_block_full (uint32_t size, gen_workspace *ws)
return p;
}
- gct->copied += ws->todo_free - bd->free;
- bd->free = ws->todo_free;
+ gct->copied += ws->todo_free - RELAXED_LOAD(&bd->free);
+ RELAXED_STORE(&bd->free, ws->todo_free);
ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free);
@@ -330,10 +330,11 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
gct->free_blocks = bd->link;
}
}
- // blocks in to-space get the BF_EVACUATED flag.
- bd->flags = BF_EVACUATED;
- bd->u.scan = bd->start;
initBdescr(bd, ws->gen, ws->gen->to);
+ RELAXED_STORE(&bd->u.scan, RELAXED_LOAD(&bd->start));
+ // blocks in to-space get the BF_EVACUATED flag.
+ // RELEASE here to ensure that bd->gen is visible to other cores.
+ RELEASE_STORE(&bd->flags, BF_EVACUATED);
}
bd->link = NULL;
@@ -345,7 +346,7 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
// See Note [big objects]
debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d",
- bd->free, ws->gen->no);
+ RELAXED_LOAD(&bd->free), ws->gen->no);
return ws->todo_free;
}
=====================================
rts/sm/GCUtils.h
=====================================
@@ -67,7 +67,9 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)
bd = new_bd;
gct->mut_lists[gen_no] = bd;
}
- *bd->free++ = (StgWord)p;
+ *bd->free++ = (StgWord) p;
+ // N.B. we are allocating into our Capability-local mut_list, therefore
+ // we don't need an atomic increment.
}
#include "EndPrivate.h"
=====================================
rts/sm/MarkWeak.c
=====================================
@@ -414,7 +414,7 @@ markWeakPtrList ( void )
StgWeak *w, **last_w;
last_w = &gen->weak_ptr_list;
- for (w = gen->weak_ptr_list; w != NULL; w = w->link) {
+ for (w = gen->weak_ptr_list; w != NULL; w = RELAXED_LOAD(&w->link)) {
// w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here
#if defined(DEBUG)
=====================================
rts/sm/NonMoving.c
=====================================
@@ -726,6 +726,7 @@ void nonmovingStop(void)
"waiting for nonmoving collector thread to terminate");
ACQUIRE_LOCK(&concurrent_coll_finished_lock);
waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+ joinOSThread(mark_thread);
}
#endif
}
=====================================
rts/sm/Scav.c
=====================================
@@ -203,9 +203,9 @@ scavenge_compact(StgCompactNFData *str)
gct->eager_promotion = saved_eager;
if (gct->failed_to_evac) {
- ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_DIRTY_info);
} else {
- ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_CLEAN_info);
}
}
@@ -466,9 +466,9 @@ scavenge_block (bdescr *bd)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- mvar->header.info = &stg_MVAR_DIRTY_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);
} else {
- mvar->header.info = &stg_MVAR_CLEAN_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);
}
p += sizeofW(StgMVar);
break;
@@ -483,9 +483,9 @@ scavenge_block (bdescr *bd)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- tvar->header.info = &stg_TVAR_DIRTY_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);
} else {
- tvar->header.info = &stg_TVAR_CLEAN_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);
}
p += sizeofW(StgTVar);
break;
@@ -617,9 +617,9 @@ scavenge_block (bdescr *bd)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info);
}
p += sizeofW(StgMutVar);
break;
@@ -636,9 +636,9 @@ scavenge_block (bdescr *bd)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);
} else {
- bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);
}
p += sizeofW(StgBlockingQueue);
break;
@@ -688,9 +688,9 @@ scavenge_block (bdescr *bd)
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);
}
gct->eager_promotion = saved_eager_promotion;
@@ -705,9 +705,9 @@ scavenge_block (bdescr *bd)
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -730,9 +730,9 @@ scavenge_block (bdescr *bd)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);
}
gct->failed_to_evac = true; // always put it on the mutable list.
@@ -751,9 +751,9 @@ scavenge_block (bdescr *bd)
}
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -836,7 +836,7 @@ scavenge_block (bdescr *bd)
if (p > bd->free) {
gct->copied += ws->todo_free - bd->free;
- bd->free = p;
+ RELEASE_STORE(&bd->free, p);
}
debugTrace(DEBUG_gc, " scavenged %ld bytes",
@@ -891,9 +891,9 @@ scavenge_mark_stack(void)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- mvar->header.info = &stg_MVAR_DIRTY_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);
} else {
- mvar->header.info = &stg_MVAR_CLEAN_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);
}
break;
}
@@ -907,9 +907,9 @@ scavenge_mark_stack(void)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- tvar->header.info = &stg_TVAR_DIRTY_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);
} else {
- tvar->header.info = &stg_TVAR_CLEAN_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);
}
break;
}
@@ -1013,9 +1013,9 @@ scavenge_mark_stack(void)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info);
}
break;
}
@@ -1032,9 +1032,9 @@ scavenge_mark_stack(void)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);
} else {
- bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);
}
break;
}
@@ -1080,9 +1080,9 @@ scavenge_mark_stack(void)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);
}
gct->eager_promotion = saved_eager_promotion;
@@ -1099,9 +1099,9 @@ scavenge_mark_stack(void)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -1126,9 +1126,9 @@ scavenge_mark_stack(void)
gct->eager_promotion = saved_eager;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);
}
gct->failed_to_evac = true; // mutable anyhow.
@@ -1147,9 +1147,9 @@ scavenge_mark_stack(void)
}
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -1253,9 +1253,9 @@ scavenge_one(StgPtr p)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- mvar->header.info = &stg_MVAR_DIRTY_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);
} else {
- mvar->header.info = &stg_MVAR_CLEAN_info;
+ RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);
}
break;
}
@@ -1269,9 +1269,9 @@ scavenge_one(StgPtr p)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- tvar->header.info = &stg_TVAR_DIRTY_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);
} else {
- tvar->header.info = &stg_TVAR_CLEAN_info;
+ RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);
}
break;
}
@@ -1333,9 +1333,9 @@ scavenge_one(StgPtr p)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_CLEAN_info);
}
break;
}
@@ -1352,9 +1352,9 @@ scavenge_one(StgPtr p)
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
- bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);
} else {
- bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);
}
break;
}
@@ -1400,9 +1400,9 @@ scavenge_one(StgPtr p)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
if (gct->failed_to_evac) {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);
}
gct->eager_promotion = saved_eager_promotion;
@@ -1417,9 +1417,9 @@ scavenge_one(StgPtr p)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
if (gct->failed_to_evac) {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -1444,9 +1444,9 @@ scavenge_one(StgPtr p)
gct->eager_promotion = saved_eager;
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);
}
gct->failed_to_evac = true;
@@ -1465,9 +1465,9 @@ scavenge_one(StgPtr p)
}
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);
}
break;
}
@@ -1653,9 +1653,9 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p);
if (gct->failed_to_evac) {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);
} else {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);
}
gct->eager_promotion = saved_eager_promotion;
@@ -1753,8 +1753,9 @@ scavenge_static(void)
/* Take this object *off* the static_objects list,
* and put it on the scavenged_static_objects list.
*/
- gct->static_objects = *STATIC_LINK(info,p);
- *STATIC_LINK(info,p) = gct->scavenged_static_objects;
+ StgClosure **link = STATIC_LINK(info,p);
+ gct->static_objects = RELAXED_LOAD(link);
+ RELAXED_STORE(link, gct->scavenged_static_objects);
gct->scavenged_static_objects = flagged_p;
switch (info -> type) {
=====================================
rts/sm/Storage.c
=====================================
@@ -1032,8 +1032,8 @@ allocateMightFail (Capability *cap, W_ n)
g0->n_new_large_words += n;
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
- bd->flags = BF_LARGE;
- bd->free = bd->start + n;
+ RELAXED_STORE(&bd->flags, BF_LARGE);
+ RELAXED_STORE(&bd->free, bd->start + n);
cap->total_allocated += n;
return bd->start;
}
@@ -1561,10 +1561,13 @@ calcNeeded (bool force_major, memcount *blocks_needed)
for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
generation *gen = &generations[g];
-
W_ blocks = gen->live_estimate ? (gen->live_estimate / BLOCK_SIZE_W) : gen->n_blocks;
- blocks += gen->n_large_blocks
- + gen->n_compact_blocks;
+
+ // This can race with allocate() and compactAllocateBlockInternal()
+ // but only needs to be approximate
+ TSAN_ANNOTATE_BENIGN_RACE(&gen->n_large_blocks, "n_large_blocks");
+ blocks += RELAXED_LOAD(&gen->n_large_blocks)
+ + RELAXED_LOAD(&gen->n_compact_blocks);
// we need at least this much space
needed += blocks;
=====================================
rts/sm/Storage.h
=====================================
@@ -72,8 +72,11 @@ bool getNewNursery (Capability *cap);
INLINE_HEADER
bool doYouWantToGC(Capability *cap)
{
+ // This is necessarily approximate since otherwise we would need to take
+ // SM_LOCK to safely look at n_new_large_words.
+ TSAN_ANNOTATE_BENIGN_RACE(&g0->n_new_large_words, "doYouWantToGC(n_new_large_words)");
return ((cap->r.rCurrentNursery->link == NULL && !getNewNursery(cap)) ||
- g0->n_new_large_words >= large_alloc_lim);
+ RELAXED_LOAD(&g0->n_new_large_words) >= large_alloc_lim);
}
/* -----------------------------------------------------------------------------
@@ -91,7 +94,7 @@ INLINE_HEADER void finishedNurseryBlock (Capability *cap, bdescr *bd) {
}
INLINE_HEADER void newNurseryBlock (bdescr *bd) {
- bd->free = bd->start;
+ RELAXED_STORE(&bd->free, bd->start);
}
void updateNurseriesStats (void);
=====================================
rts/win32/OSThreads.c
=====================================
@@ -444,6 +444,15 @@ interruptOSThread (OSThreadId id)
CloseHandle(hdl);
}
+void
+joinOSThread (OSThreadId id)
+{
+ int ret = WaitForSingleObject(id, INFINITE);
+ if (ret != WAIT_OBJECT_0) {
+ sysErrorBelch("joinOSThread: error %d", ret);
+ }
+}
+
void setThreadNode (uint32_t node)
{
if (osNumaAvailable())
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/eaa95417b69729783995c4ebd36b1537d75e0fff...96f8bde7efa53078238ddb0736795b3f98a8f1ce
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/eaa95417b69729783995c4ebd36b1537d75e0fff...96f8bde7efa53078238ddb0736795b3f98a8f1ce
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20201030/576c3697/attachment-0001.html>
More information about the ghc-commits
mailing list