[Git][ghc/ghc][wip/memory-barriers] 3 commits: Account for Simon's comments
Ben Gamari
gitlab at gitlab.haskell.org
Fri Jun 14 16:08:37 UTC 2019
Ben Gamari pushed to branch wip/memory-barriers at Glasgow Haskell Compiler / GHC
Commits:
35300a47 by Ben Gamari at 2019-06-14T15:15:09Z
Account for Simon's comments
- - - - -
266cc558 by Ben Gamari at 2019-06-14T15:16:25Z
rts: Assert that LDV profiling isn't used with parallel GC
I'm not entirely sure we are careful about ensuring this; this is a
last-ditch check.
- - - - -
1ce4268e by Ben Gamari at 2019-06-14T16:08:27Z
More comments from Simon
- - - - -
15 changed files:
- includes/Cmm.h
- includes/stg/SMP.h
- rts/Apply.cmm
- rts/Compact.cmm
- rts/Interpreter.c
- rts/Messages.c
- rts/PrimOps.cmm
- rts/RaiseAsync.c
- rts/StgMiscClosures.cmm
- rts/ThreadPaused.c
- rts/sm/Compact.c
- rts/sm/Evac.c
- rts/sm/GC.c
- rts/sm/GCAux.c
- rts/sm/Scav.c
Changes:
=====================================
includes/Cmm.h
=====================================
@@ -633,6 +633,9 @@
#define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
#endif
+// Memory barriers.
+// For discussion of how these are used to fence heap object
+// accesses see Note [Heap memory barriers] in SMP.h.
#if defined(THREADED_RTS)
#define prim_read_barrier prim %read_barrier()
#else
=====================================
includes/stg/SMP.h
=====================================
@@ -123,24 +123,25 @@ EXTERN_INLINE void load_load_barrier(void);
* has jumped off into the abyss. Speculative execution can also cause us
* issues, consider:
*
- * - An observer is about to case on a value in closure's info table.
- * - The observer speculatively reads one or more of closure's fields.
- * - An updater writes to closure's info table.
- * - The observer takes a branch based on the new info table value, but with the
+ * - an observer is about to case on a value in closure's info table.
+ * - the observer speculatively reads one or more of closure's fields.
+ * - an updater writes to closure's info table.
+ * - the observer takes a branch based on the new info table value, but with the
* old closure fields!
- * - The updater writes to the closure's other fields, but its too late.
+ * - the updater writes to the closure's other fields, but its too late.
*
* Because of these effects, reads and writes to a closure's info table must be
* ordered carefully with respect to reads and writes to the closure's other
* fields, and memory barriers must be placed to ensure that reads and writes
- * occur in program order. Specifically, updates to a closure must follow the
- * following pattern:
+ * occur in program order. Specifically, updates to an already existing closure
+ * must follow the following pattern:
*
* - Update the closure's (non-info table) fields.
* - Write barrier.
* - Update the closure's info table.
*
- * Observing a closure's fields must follow the following pattern:
+ * Observing the fields of an updateable closure (e.g. a THUNK) must follow the
+ * following pattern:
*
* - Read the closure's info pointer.
* - Read barrier.
@@ -220,8 +221,31 @@ EXTERN_INLINE void load_load_barrier(void);
* the capability-local mut_list. Consequently this does not require any memory
* barrier.
*
- * During parallel GC cores are each scavenging disjoint sets of blocks and
- * consequently no barriers are needed.
+ * During parallel GC we need to be careful during evacuation: before replacing
+ * a closure with a forwarding pointer we must commit a write barrier to ensure
+ * that the copy we made in to-space is visible to other cores.
+ *
+ * However, we can be a bit lax when *reading* during GC. Specifically, the GC
+ * can only make a very limited set of changes to existing closures:
+ *
+ * - it can replace a closure's info table with stg_WHITEHOLE.
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * forwarding pointer
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * valid info table pointer (done in eval_thunk_selector)
+ * - it can update the value of a pointer field after evacuating it
+ *
+ * This is quite nice since we don't need to worry about an interleaving
+ * of writes producing an invalid state: a closure's fields remain valid after
+ * an update of its info table pointer and vice-versa.
+ *
+ * After a round of parallel scavenging we must also ensure that any writes the
+ * GC thread workers made are visible to the main GC thread. This is ensured by
+ * the full barrier implied by the atomic decrement in
+ * GC.c:scavenge_until_all_done.
+ *
+ * The work-stealing queue (WSDeque) also requires barriers; these are
+ * documented in WSDeque.c.
*
*/
=====================================
rts/Apply.cmm
=====================================
@@ -66,7 +66,6 @@ again:
// Note [Heap memory barriers] in SMP.h.
untaggedfun = UNTAG(fun);
info = %INFO_PTR(untaggedfun);
- prim_read_barrier;
switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
(TO_W_( %INFO_TYPE(%STD_INFO(info)) )) {
case
@@ -285,7 +284,6 @@ for:
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
- prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
@@ -364,7 +362,6 @@ for:
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
- prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
@@ -429,14 +426,12 @@ for:
TICK_ENT_VIA_NODE();
#if defined(NO_ARG_REGS)
- prim_read_barrier;
jump %GET_ENTRY(UNTAG(R1)) [R1];
#else
W_ info;
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
- prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
=====================================
rts/Compact.cmm
=====================================
@@ -72,7 +72,6 @@ eval:
tag = GETTAG(p);
p = UNTAG(p);
info = %INFO_PTR(p);
- prim_read_barrier;
type = TO_W_(%INFO_TYPE(%STD_INFO(info)));
switch [0 .. N_CLOSURE_TYPES] type {
=====================================
rts/Interpreter.c
=====================================
@@ -1529,7 +1529,6 @@ run_BCO:
int o_itbl = BCO_GET_LARGE_ARG;
int n_words = BCO_NEXT;
StgInfoTable* itbl = INFO_PTR_TO_STRUCT((StgInfoTable *)BCO_LIT(o_itbl));
- load_load_barrier();
int request = CONSTR_sizeW( itbl->layout.payload.ptrs,
itbl->layout.payload.nptrs );
StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
=====================================
rts/Messages.c
=====================================
@@ -28,7 +28,6 @@ void sendMessage(Capability *from_cap, Capability *to_cap, Message *msg)
#if defined(DEBUG)
{
const StgInfoTable *i = msg->header.info;
- load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
if (i != &stg_MSG_THROWTO_info &&
i != &stg_MSG_BLACKHOLE_info &&
i != &stg_MSG_TRY_WAKEUP_info &&
@@ -71,7 +70,6 @@ executeMessage (Capability *cap, Message *m)
loop:
write_barrier(); // allow m->header to be modified by another thread
i = m->header.info;
- load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
if (i == &stg_MSG_TRY_WAKEUP_info)
{
StgTSO *tso = ((MessageWakeup *)m)->tso;
@@ -331,7 +329,6 @@ StgTSO * blackHoleOwner (StgClosure *bh)
StgClosure *p;
info = bh->header.info;
- load_load_barrier(); // XXX
if (info != &stg_BLACKHOLE_info &&
info != &stg_CAF_BLACKHOLE_info &&
@@ -347,7 +344,6 @@ loop:
// and turns this into an infinite loop.
p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee));
info = p->header.info;
- load_load_barrier(); // XXX
if (info == &stg_IND_info) goto loop;
=====================================
rts/PrimOps.cmm
=====================================
@@ -1683,8 +1683,8 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
- prim_write_barrier;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
@@ -1876,8 +1876,8 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
StgMVarTSOQueue_link(q) = StgMVar_head(mvar);
StgMVarTSOQueue_tso(q) = CurrentTSO;
- prim_write_barrier;
SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
@@ -1943,6 +1943,9 @@ stg_makeStableNamezh ( P_ obj )
BYTES_TO_WDS(SIZEOF_StgStableName));
SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
+ // This will make the StableName# object visible to other threads;
+ // be sure that its completely visible to other cores.
+ // See Note [Heap memory barriers] in SMP.h.
prim_write_barrier;
snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
=====================================
rts/RaiseAsync.c
=====================================
@@ -922,7 +922,6 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
ap->payload[i] = (StgClosure *)*sp++;
}
- write_barrier(); // XXX: Necessary?
SET_HDR(ap,&stg_AP_STACK_NOUPD_info,stack->header.prof.ccs);
TICK_ALLOC_SE_THK(WDS(words+1),0);
@@ -961,7 +960,6 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
//
raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
TICK_ALLOC_SE_THK(WDS(1),0);
- write_barrier(); // XXX: Necessary?
SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
raise->payload[0] = exception;
=====================================
rts/StgMiscClosures.cmm
=====================================
@@ -379,7 +379,6 @@ INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE")
loop:
// spin until the WHITEHOLE is updated
info = StgHeader_info(node);
- prim_read_barrier;
if (info == stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
W_[whitehole_lockClosure_spin] =
=====================================
rts/ThreadPaused.c
=====================================
@@ -219,9 +219,10 @@ threadPaused(Capability *cap, StgTSO *tso)
frame = (StgClosure *)tso->stackobj->sp;
+ // N.B. We know that the TSO is owned by the current capability so no
+ // memory barriers are needed here.
while ((P_)frame < stack_end) {
info = get_ret_itbl(frame);
- load_load_barrier();
switch (info->i.type) {
@@ -229,8 +230,6 @@ threadPaused(Capability *cap, StgTSO *tso)
// If we've already marked this frame, then stop here.
frame_info = frame->header.info;
- // Ensure that read from frame->updatee below sees any pending writes
- load_load_barrier();
if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) {
if (prev_was_update_frame) {
words_to_squeeze += sizeofW(StgUpdateFrame);
@@ -244,7 +243,6 @@ threadPaused(Capability *cap, StgTSO *tso)
bh = ((StgUpdateFrame *)frame)->updatee;
bh_info = bh->header.info;
- load_load_barrier(); // XXX: Why is this needed?
#if defined(THREADED_RTS)
retry:
=====================================
rts/sm/Compact.c
=====================================
@@ -160,7 +160,6 @@ get_threaded_info( StgPtr p )
StgWord q;
q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
- load_load_barrier();
loop:
switch (GET_CLOSURE_TAG((StgClosure *)q))
@@ -362,7 +361,6 @@ thread_stack(StgPtr p, StgPtr stack_end)
fun_info = FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *)
get_threaded_info((StgPtr)ret_fun->fun)));
- load_load_barrier();
// *before* threading it!
thread(&ret_fun->fun);
p = thread_arg_block(fun_info, ret_fun->payload);
@@ -385,7 +383,6 @@ thread_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
fun_info = FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *)
get_threaded_info((StgPtr)fun)));
- load_load_barrier();
ASSERT(fun_info->i.type != PAP);
p = (StgPtr)payload;
@@ -826,7 +823,6 @@ update_fwd_compact( bdescr *blocks )
// definitely have enough room. Also see bug #1147.
iptr = get_threaded_info(p);
info = INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_CLOSURE((StgClosure *)iptr));
- load_load_barrier();
q = p;
=====================================
rts/sm/Evac.c
=====================================
@@ -111,7 +111,6 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
{
const StgInfoTable *new_info;
new_info = (const StgInfoTable *)cas((StgPtr)&src->header.info, (W_)info, MK_FORWARDING_PTR(to));
- load_load_barrier();
if (new_info != info) {
#if defined(PROFILING)
// We copied this object at the same time as another
@@ -195,7 +194,7 @@ spin:
if (info == (W_)&stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
whitehole_gc_spin++;
-#endif
+#endif /* PROF_SPIN */
busy_wait_nop();
goto spin;
}
@@ -206,8 +205,7 @@ spin:
}
#else
info = (W_)src->header.info;
- load_load_barrier();
-#endif
+#endif /* PARALLEL_GC */
to = alloc_for_copy(size_to_reserve, gen_no);
@@ -612,7 +610,6 @@ loop:
gen_no = bd->dest_no;
info = q->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(info))
{
/* Already evacuated, just return the forwarding address.
@@ -723,14 +720,11 @@ loop:
StgClosure *r;
const StgInfoTable *i;
r = ((StgInd*)q)->indirectee;
- load_load_barrier();
if (GET_CLOSURE_TAG(r) == 0) {
i = r->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
i = r->header.info;
- load_load_barrier();
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
@@ -923,7 +917,6 @@ evacuate_BLACKHOLE(StgClosure **p)
}
gen_no = bd->dest_no;
info = q->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(info))
{
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
@@ -1116,9 +1109,8 @@ selector_chain:
#else
// Save the real info pointer (NOTE: not the same as get_itbl()).
info_ptr = (StgWord)p->header.info;
- load_load_barrier();
SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info);
-#endif
+#endif /* THREADED_RTS */
field = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->layout.selector_offset;
@@ -1135,7 +1127,6 @@ selector_loop:
// that evacuate() doesn't mind if it gets passed a to-space pointer.
info = (StgInfoTable*)selectee->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
// We don't follow pointers into to-space; the constructor
@@ -1145,7 +1136,6 @@ selector_loop:
}
info = INFO_PTR_TO_STRUCT(info);
- load_load_barrier();
switch (info->type) {
case WHITEHOLE:
goto bale_out; // about to be evacuated by another thread (or a loop).
@@ -1177,6 +1167,9 @@ selector_loop:
OVERWRITING_CLOSURE((StgClosure*)p);
SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info);
write_barrier();
+#if defined(PARALLEL_GC)
+ abort(); // LDV is incompatible with parallel GC
+#endif
}
#endif
@@ -1190,7 +1183,6 @@ selector_loop:
if (!IS_FORWARDING_PTR(info_ptr))
{
info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr);
- load_load_barrier();
switch (info->type) {
case IND:
case IND_STATIC:
@@ -1242,11 +1234,9 @@ selector_loop:
// indirection, as in evacuate().
if (GET_CLOSURE_TAG(r) == 0) {
i = r->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
i = r->header.info;
- load_load_barrier();
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
=====================================
rts/sm/GC.c
=====================================
@@ -1104,6 +1104,8 @@ loop:
// scavenge_loop() only exits when there's no work to do
+ // This atomic decrement also serves as a full barrier to ensure that any
+ // writes we made during scavenging are visible to other threads.
#if defined(DEBUG)
r = dec_running();
#else
=====================================
rts/sm/GCAux.c
=====================================
@@ -76,7 +76,6 @@ isAlive(StgClosure *p)
}
info = q->header.info;
- load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
// alive!
=====================================
rts/sm/Scav.c
=====================================
@@ -374,7 +374,6 @@ scavenge_thunk_srt(const StgInfoTable *info)
if (!major_gc) return;
thunk_info = itbl_to_thunk_itbl(info);
- load_load_barrier();
if (thunk_info->i.srt) {
StgClosure *srt = (StgClosure*)GET_SRT(thunk_info);
evacuate(&srt);
@@ -389,7 +388,6 @@ scavenge_fun_srt(const StgInfoTable *info)
if (!major_gc) return;
fun_info = itbl_to_fun_itbl(info);
- load_load_barrier();
if (fun_info->i.srt) {
StgClosure *srt = (StgClosure*)GET_FUN_SRT(fun_info);
evacuate(&srt);
@@ -1603,7 +1601,6 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_TREC_CHUNK++; break;
case MUT_PRIM:
pinfo = ((StgClosure*)p)->header.info;
- load_load_barrier();
if (pinfo == &stg_TVAR_WATCH_QUEUE_info)
mutlist_TVAR_WATCH_QUEUE++;
else if (pinfo == &stg_TREC_HEADER_info)
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/ee6ba25b1b407a9516b55b94ea90dad176eb49ca...1ce4268e60b46551ecb24753a1eb796c9f04e601
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/ee6ba25b1b407a9516b55b94ea90dad176eb49ca...1ce4268e60b46551ecb24753a1eb796c9f04e601
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20190614/cc692ae6/attachment-0001.html>
More information about the ghc-commits
mailing list