[Git][ghc/ghc][wip/T22038] compiler: Rework handling of mutator aborting
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Fri Aug 19 23:10:35 UTC 2022
Ben Gamari pushed to branch wip/T22038 at Glasgow Haskell Compiler / GHC
Commits:
c895c449 by Ben Gamari at 2022-08-19T19:10:26-04:00
compiler: Rework handling of mutator aborting
Previously `-dtag-inference-checks`, `-dcheck-prim-bounds`, and
`-falignment-sanitization` all aborted by calling `barf` from the
mutator. However, this can lead to deadlocks in the threaded RTS. For
instance, in the case of `-dcheck-prim-bounds` the following can happen
1. the mutator takes a capability and begins execution
2. the bounds check fails, calling `barf`
3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging`
4. `endEventLogging` calls `flushEventLog`, which it turn initiates a
sync to request that all capabilities flush their local event logs
5. we deadlock as the the capability held by the crashing mutator can
never join the sync
To avoid this we now have a more principled means of aborting: we return
to the scheduler setting the thread's return value to ThreadAborting.
The scheduler will see this and call `barf`.
Fixes #22038.
- - - - -
9 changed files:
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToCmm/TagCheck.hs
- compiler/GHC/StgToCmm/Utils.hs
- rts/PrimOps.cmm
- rts/RtsMessages.c
- rts/Schedule.c
- rts/StgMiscClosures.cmm
- rts/include/rts/Constants.h
Changes:
=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -64,6 +64,7 @@ module GHC.Cmm.CLabel (
mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel,
mkBadAlignmentLabel,
+ mkTagInferenceCheckFailureLabel,
mkOutOfBoundsAccessLabel,
mkArrWords_infoLabel,
mkSRTInfoLabel,
@@ -637,8 +638,9 @@ mkDirty_MUT_VAR_Label,
mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel,
mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
- mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel,
- mkOutOfBoundsAccessLabel, mkMUT_VAR_CLEAN_infoLabel :: CLabel
+ mkSMAP_DIRTY_infoLabel, mkMUT_VAR_CLEAN_infoLabel,
+ mkBadAlignmentLabel, mkTagInferenceCheckFailureLabel, mkOutOfBoundsAccessLabel
+ :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkNonmovingWriteBarrierEnabledLabel
= CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "nonmoving_write_barrier_enabled") CmmData
@@ -655,9 +657,10 @@ mkArrWords_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsL
mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
-mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmEntry
-mkOutOfBoundsAccessLabel = mkForeignLabel (fsLit "rtsOutOfBoundsAccess") Nothing ForeignLabelInExternalPackage IsFunction
-mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo
+mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo
+mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmPrimCall
+mkOutOfBoundsAccessLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_outOfBoundsAccess") CmmPrimCall
+mkTagInferenceCheckFailureLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_tagInferenceCheckFailure") CmmPrimCall
mkSRTInfoLabel :: Int -> CLabel
mkSRTInfoLabel n = CmmLabel rtsUnitId (NeedExternDecl False) lbl CmmInfo
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -3211,7 +3211,7 @@ doBoundsCheck idx sz = do
when do_bounds_check (doCheck platform)
where
doCheck platform = do
- boundsCheckFailed <- getCode $ emitCCall [] (mkLblExpr mkOutOfBoundsAccessLabel) []
+ boundsCheckFailed <- getCode $ emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkOutOfBoundsAccessLabel) [idx, sz]
emit =<< mkCmmIfThen' isOutOfBounds boundsCheckFailed (Just False)
where
uGE = cmmUGeWord platform
=====================================
compiler/GHC/StgToCmm/TagCheck.hs
=====================================
@@ -19,9 +19,13 @@ import GHC.Prelude
import GHC.StgToCmm.Env
import GHC.StgToCmm.Monad
import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Layout (emitCall)
+import GHC.StgToCmm.Lit (newStringCLit)
import GHC.Cmm
import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel (mkTagInferenceCheckFailureLabel)
import GHC.Cmm.Graph as CmmGraph
+import GHC.Cmm.Utils
import GHC.Core.Type
import GHC.Types.Id
@@ -95,7 +99,8 @@ emitTagAssertion onWhat fun = do
; needsArgTag fun lbarf lret
; emitLabel lbarf
- ; emitBarf ("Tag inference failed on:" ++ onWhat)
+ ; onWhat_str <- newStringCLit onWhat
+ ; _ <- emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkTagInferenceCheckFailureLabel) [CmmLit onWhat_str]
; emitLabel lret
}
=====================================
compiler/GHC/StgToCmm/Utils.hs
=====================================
@@ -12,7 +12,6 @@ module GHC.StgToCmm.Utils (
emitDataLits, emitRODataLits,
emitDataCon,
emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
- emitBarf,
assignTemp, newTemp,
newUnboxedTupleRegs,
@@ -158,11 +157,6 @@ tagToClosure platform tycon tag
--
-------------------------------------------------------------------------
-emitBarf :: String -> FCode ()
-emitBarf msg = do
- strLbl <- newStringCLit msg
- emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False
-
emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun)
=====================================
rts/PrimOps.cmm
=====================================
@@ -42,7 +42,7 @@ import CLOSURE CCS_MAIN;
#if defined(DEBUG)
#define ASSERT_IN_BOUNDS(ind, sz) \
- if (ind >= sz) { ccall rtsOutOfBoundsAccess(); }
+ if (ind >= sz) { ccall stg_outOfBoundsAccess(ind, sz); }
#else
#define ASSERT_IN_BOUNDS(ind, sz)
#endif
@@ -1150,7 +1150,7 @@ stg_threadStatuszh ( gcptr tso )
* TVar primitives
* -------------------------------------------------------------------------- */
-stg_abort /* no arg list: explicit stack layout */
+stg_abort_tx /* no arg list: explicit stack layout */
{
W_ frame_type;
W_ frame;
@@ -1159,7 +1159,7 @@ stg_abort /* no arg list: explicit stack layout */
W_ r;
// STM operations may allocate
- MAYBE_GC_ (stg_abort); // NB. not MAYBE_GC(), we cannot make a
+ MAYBE_GC_ (stg_abort_tx); // NB. not MAYBE_GC(), we cannot make a
// function call in an explicit-stack proc
// Find the enclosing ATOMICALLY_FRAME
@@ -1217,7 +1217,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
} else {
// Did not commit: abort and restart.
StgTSO_trec(CurrentTSO) = outer;
- jump stg_abort();
+ jump stg_abort_tx();
}
}
=====================================
rts/RtsMessages.c
=====================================
@@ -320,21 +320,3 @@ rtsDebugMsgFn(const char *s, va_list ap)
return r;
}
-
-// Used in stg_badAlignment_entry defined in StgStartup.cmm.
-void rtsBadAlignmentBarf(void) GNUC3_ATTRIBUTE(__noreturn__);
-
-void
-rtsBadAlignmentBarf()
-{
- barf("Encountered incorrectly aligned pointer. This can't be good.");
-}
-
-// Used by code generator
-void rtsOutOfBoundsAccess(void) GNUC3_ATTRIBUTE(__noreturn__);
-
-void
-rtsOutOfBoundsAccess()
-{
- barf("Encountered out of bounds array access.");
-}
=====================================
rts/Schedule.c
=====================================
@@ -571,8 +571,12 @@ run_thread:
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
break;
+ case ThreadAborted:
+ interruptStgRts();
+ break;
+
default:
- barf("schedule: invalid thread return code %d", (int)ret);
+ barf("schedule: invalid thread return code %d", (int)ret);
}
if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
@@ -3090,7 +3094,7 @@ findRetryFrameHelper (Capability *cap, StgTSO *tso)
/* -----------------------------------------------------------------------------
findAtomicallyFrameHelper
- This function is called by stg_abort via catch_retry_frame primitive. It is
+ This function is called by stg_abort_tx via catch_retry_frame primitive. It is
like findRetryFrameHelper but it will only stop at ATOMICALLY_FRAME.
-------------------------------------------------------------------------- */
=====================================
rts/StgMiscClosures.cmm
=====================================
@@ -1486,3 +1486,49 @@ section "data" {
}
#endif
+
+/* Note [Aborting from the mutator]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * GHC supports a number of runtime checking modes (largely for debugging
+ * purposes) which may need to abort execution at runtime. This include
+ * -dtag-inference-check, -dcheck-prim-bounds, and -falignment-sanitisation.
+ * To abort execution one might think that we could just call `barf`; however
+ * this is not ideal since it doesn't allow the RTS to gracefully shutdown.
+ *
+ * In #22038 we saw this manifest as a deadlock when -dcheck-prim-bounds
+ * failed. In particular, we saw the following:
+ *
+ * 1. the mutator takes a capability and begins execution
+ * 2. the bounds check fails, calling `barf`
+ * 3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging`
+ * 4. `endEventLogging` calls `flushEventLog`, which it turn initiates a
+ * sync to request that all capabilities flush their local event logs
+ * 5. we deadlock as the the capability held by the crashing mutator can
+ * never yields to the sync
+ *
+ * Consequently, we instead crash in a more principled manner by yielding back
+ * to the scheduler, indicating that we should abort by setting the thread's
+ * return value to ThreadAborted. This is done by stg_abort().
+ */
+
+stg_tagInferenceCheckFailure(W_ what) {
+ ccall debugBelch("Tag inference failed on: %s\n", what);
+ jump stg_abort();
+}
+
+stg_outOfBoundsAccess(W_ ind, W_ sz) {
+ ccall debugBelch("Encountered out of bounds array access (index=%d, size=%d)", ind, sz);
+ jump stg_abort();
+}
+
+stg_badAlignment() {
+ ccall debugBelch("Encountered incorrectly aligned pointer. This can't be good.");
+ jump stg_abort();
+}
+
+stg_abort() {
+ StgTSO_what_next(CurrentTSO) = ThreadKilled :: I16;
+ StgRegTable_rRet(BaseReg) = ThreadAborted :: W_;
+ R1 = BaseReg;
+ jump stg_returnToSched [R1];
+}
=====================================
rts/include/rts/Constants.h
=====================================
@@ -268,6 +268,7 @@
#define ThreadYielding 3
#define ThreadBlocked 4
#define ThreadFinished 5
+#define ThreadAborted 6 /* See Note [Aborting from the mutator] */
/*
* Flags for the tso->flags field.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c895c44911ec44b9506cbe97555753ac402d6acf
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c895c44911ec44b9506cbe97555753ac402d6acf
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220819/bd4374ec/attachment-0001.html>
More information about the ghc-commits
mailing list