[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 2 commits: StgToCmm: Detect some no-op case-continuations
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Sat Dec 30 19:44:02 UTC 2023
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
203d0a7d by Matthew Craven at 2023-12-30T14:43:56-05:00
StgToCmm: Detect some no-op case-continuations
...and generate no code for them. Fixes #24264.
- - - - -
98af0798 by Krzysztof Gogolewski at 2023-12-30T14:43:57-05:00
Revert "testsuite: mark jspace as fragile on i386."
This reverts commit 0038d052c8c80b4b430bb2aa1c66d5280be1aa95.
The atomicity bug should be fixed by !11802.
- - - - -
7 changed files:
- compiler/GHC/StgToCmm/Expr.hs
- + testsuite/tests/codeGen/should_compile/T24264.hs
- + testsuite/tests/codeGen/should_compile/T24264.stderr
- testsuite/tests/codeGen/should_compile/all.T
- + testsuite/tests/codeGen/should_run/T24264run.hs
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/driver/j-space/all.T
Changes:
=====================================
compiler/GHC/StgToCmm/Expr.hs
=====================================
@@ -570,6 +570,58 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
-- Use the same return convention as vanilla 'a'.
cgCase (StgApp a []) bndr alt_type alts
+{-
+Note [Eliminate trivial Solo# continuations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have code like this:
+
+ case scrut of bndr {
+ alt -> Solo# bndr
+ }
+
+The RHS of the only branch does nothing except wrap the case-binder
+returned by 'scrut' in a unary unboxed tuple. But unboxed tuples
+don't exist at run-time, i.e. the branch is a no-op! So we can
+generate code as if we just had 'scrut' instead of a case-expression.
+
+This situation can easily arise for IO or ST code, where the last
+operation a function performs is commonly 'pure $! someExpr'.
+See also #24264 and !11778. More concretely, as of December 2023,
+when building a stage2 "perf+no_profiled_libs" ghc:
+
+ * The special case is reached 398 times.
+ * Of these, 158 have scrutinees that call a function or enter a
+ potential thunk, and would need to push a useless stack frame if
+ not for this optimisation.
+
+We might consider rewriting such case expressions in GHC.Stg.CSE as a
+slight extension of Note [All alternatives are the binder]. But the
+RuntimeReps of 'bndr' and 'Solo# bndr' are not exactly the same, and
+per Note [Typing the STG language] in GHC.Stg.Lint, we do expect Stg
+code to remain RuntimeRep-correct. So we just detect the situation in
+StgToCmm instead.
+
+Crucially, the return conventions for 'ty' and '(# ty #)' are compatible:
+The returned value is passed in the same register(s) or stack slot in
+both conventions, and the set of allowed return values for 'ty'
+is a subset of the allowed return values for '(# ty #)':
+
+ * For a lifted type 'ty', the return convention for 'ty' promises to
+ return an evaluated-properly-tagged heap pointer, while a return
+ type '(# ty #)' only promises to return a heap pointer to an object
+ that can be evaluated later if need be.
+
+ * If 'ty' is unlifted, the allowed return
+ values for 'ty' and '(# ty #)' are identical.
+-}
+
+cgCase scrut bndr _alt_type [GenStgAlt { alt_rhs = rhs}]
+ -- see Note [Eliminate trivial Solo# continuations]
+ | StgConApp dc _ [StgVarArg v] _ <- rhs
+ , isUnboxedTupleDataCon dc
+ , v == bndr
+ = cgExpr scrut
+
cgCase scrut bndr alt_type alts
= -- the general case
do { platform <- getPlatform
=====================================
testsuite/tests/codeGen/should_compile/T24264.hs
=====================================
@@ -0,0 +1,42 @@
+{-# LANGUAGE MagicHash #-}
+
+module T24264 where
+
+import Control.Exception (evaluate)
+import GHC.Exts (seq#, noinline)
+import GHC.IO (IO(..))
+
+fun1 :: a -> IO a
+{-# OPAQUE fun1 #-}
+fun1 x = do
+ pure ()
+ pure $! x
+ -- This should not push a continuation to the stack before entering 'x'
+
+fun2 :: a -> IO a
+{-# OPAQUE fun2 #-}
+fun2 x = do
+ pure ()
+ evaluate x
+ -- This should not push a continuation to the stack before entering 'x'
+
+fun3 :: a -> IO a
+{-# OPAQUE fun3 #-}
+fun3 x = do
+ pure ()
+ -- "evaluate $! x"
+ case x of !x' -> IO (noinline seq# x')
+ -- noinline to work around the bogus seqRule
+ -- This ideally also should not push a continuation to the stack
+ -- before entering 'x'. It currently does, but let's wait for
+ -- !11515 to land before worrying about that.
+
+funPair :: a -> IO (a, a)
+{-# OPAQUE funPair #-}
+funPair x = do
+ pure ()
+ x' <- pure $! x
+ -- This should push a continuation to the stack before entering 'x',
+ -- so the pair can be returned instead. (It's here to make sure
+ -- that the 'returns to' detection continues working correctly.)
+ pure (x', x')
=====================================
testsuite/tests/codeGen/should_compile/T24264.stderr
=====================================
@@ -0,0 +1,142 @@
+
+==================== Cmm produced by codegen ====================
+[T24264.fun1_entry() { // [R2]
+ { info_tbls: [(cQO,
+ label: T24264.fun1_info
+ rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cQO: // global
+ _sQw::P64 = R2; // CmmAssign
+ goto cQM; // CmmBranch
+ cQM: // global
+ if ((old + 0) - <highSp> < SpLim) (likely: False) goto cQP; else goto cQQ; // CmmCondBranch
+ cQP: // global
+ R2 = _sQw::P64; // CmmAssign
+ R1 = T24264.fun1_closure; // CmmAssign
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall
+ cQQ: // global
+ goto cQL; // CmmBranch
+ cQL: // global
+ // slowCall
+ R1 = _sQw::P64; // CmmAssign
+ call stg_ap_0_fast(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Cmm produced by codegen ====================
+[T24264.fun2_entry() { // [R2]
+ { info_tbls: [(cQY,
+ label: T24264.fun2_info
+ rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cQY: // global
+ _sQz::P64 = R2; // CmmAssign
+ goto cQW; // CmmBranch
+ cQW: // global
+ if ((old + 0) - <highSp> < SpLim) (likely: False) goto cQZ; else goto cR0; // CmmCondBranch
+ cQZ: // global
+ R2 = _sQz::P64; // CmmAssign
+ R1 = T24264.fun2_closure; // CmmAssign
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall
+ cR0: // global
+ goto cQV; // CmmBranch
+ cQV: // global
+ // slowCall
+ R1 = _sQz::P64; // CmmAssign
+ call stg_ap_0_fast(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Cmm produced by codegen ====================
+[T24264.fun3_entry() { // [R2]
+ { info_tbls: [(cRb,
+ label: T24264.fun3_info
+ rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cRb: // global
+ _sQB::P64 = R2; // CmmAssign
+ goto cR6; // CmmBranch
+ cR6: // global
+ if ((old + 0) - <highSp> < SpLim) (likely: False) goto cRc; else goto cRd; // CmmCondBranch
+ cRc: // global
+ R2 = _sQB::P64; // CmmAssign
+ R1 = T24264.fun3_closure; // CmmAssign
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall
+ cRd: // global
+ goto cR5; // CmmBranch
+ cR5: // global
+ // slowCall
+ I64[(young<cR8> + 8)] = cR8; // CmmStore
+ R1 = _sQB::P64; // CmmAssign
+ call stg_ap_0_fast(R1) returns to cR8, args: 8, res: 8, upd: 8; // CmmCall
+ cR8: // global
+ _sQD::P64 = R1; // CmmAssign
+ // slow_call for _sQB::P64 with pat stg_ap_0
+ R1 = _sQD::P64; // CmmAssign
+ call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
+
+==================== Cmm produced by codegen ====================
+[T24264.funPair_entry() { // [R2]
+ { info_tbls: [(cRq,
+ label: T24264.funPair_info
+ rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+ srt: Nothing)]
+ stack_info: arg_space: 8
+ }
+ {offset
+ cRq: // global
+ _sQE::P64 = R2; // CmmAssign
+ goto cRl; // CmmBranch
+ cRl: // global
+ if ((old + 0) - <highSp> < SpLim) (likely: False) goto cRr; else goto cRs; // CmmCondBranch
+ cRr: // global
+ R2 = _sQE::P64; // CmmAssign
+ R1 = T24264.funPair_closure; // CmmAssign
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; // CmmCall
+ cRs: // global
+ goto cRk; // CmmBranch
+ cRk: // global
+ // slowCall
+ I64[(young<cRn> + 8)] = cRn; // CmmStore
+ R1 = _sQE::P64; // CmmAssign
+ call stg_ap_0_fast(R1) returns to cRn, args: 8, res: 8, upd: 8; // CmmCall
+ cRn: // global
+ _sQG::P64 = R1; // CmmAssign
+ // slow_call for _sQE::P64 with pat stg_ap_0
+ Hp = Hp + 24; // CmmAssign
+ if (Hp > HpLim) (likely: False) goto cRv; else goto cRu; // CmmCondBranch
+ cRv: // global
+ HpAlloc = 24; // CmmAssign
+ goto cRt; // CmmBranch
+ cRt: // global
+ R1 = _sQG::P64; // CmmAssign
+ call stg_gc_unpt_r1(R1) returns to cRn, args: 8, res: 8, upd: 8; // CmmCall
+ cRu: // global
+ // allocHeapClosure
+ I64[Hp - 16] = (,)_con_info; // CmmStore
+ P64[Hp - 8] = _sQG::P64; // CmmStore
+ P64[Hp] = _sQG::P64; // CmmStore
+ _cRp::P64 = Hp - 15; // CmmAssign
+ R1 = _cRp::P64; // CmmAssign
+ call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; // CmmCall
+ }
+ }]
+
+
=====================================
testsuite/tests/codeGen/should_compile/all.T
=====================================
@@ -128,3 +128,5 @@ test('T21710a', [ unless(tables_next_to_code(), skip) , when(wordsize(32), skip)
, grep_errmsg('(call)',[1]) ]
, compile, ['-ddump-cmm -dno-typeable-binds'])
test('T23002', normal, compile, ['-fregs-graph'])
+test('T24264', [req_cmm, grep_errmsg(r'(.*\().*(\) returns to)', [1,2])],
+ compile, ['-O -ddump-cmm-from-stg -dno-typeable-binds'])
=====================================
testsuite/tests/codeGen/should_run/T24264run.hs
=====================================
@@ -0,0 +1,32 @@
+module Main where
+
+import Control.Exception (evaluate)
+import GHC.Exts (lazy, noinline)
+
+data StrictPair a b = !a :*: !b
+
+tailEval1 :: a -> IO a
+{-# OPAQUE tailEval1 #-}
+tailEval1 = lazy $ \x -> do
+ pure ()
+ pure $! x
+
+tailEval2 :: a -> IO a
+{-# OPAQUE tailEval2 #-}
+tailEval2 x = evaluate x
+
+go :: [a] -> IO ()
+go = noinline mapM_ $ \x -> do
+ y1 <- tailEval1 x
+ y2 <- tailEval2 x
+ evaluate (y1 :*: y2)
+
+main :: IO ()
+main = do
+ let ints :: [Int]
+ ints = take 1000 $ noinline iterate (\x -> x * 35) 1
+ go ints
+ go [LT, EQ, GT]
+ go $ noinline map (toEnum @Ordering . flip mod 3) ints
+ go $ noinline map Left ints
+ go $ noinline map (+) ints
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -240,3 +240,4 @@ test('MulMayOflo_full',
ignore_stdout],
multi_compile_and_run,
['MulMayOflo', [('MulMayOflo_full.cmm', '')], ''])
+test('T24264run', normal, compile_and_run, [''])
=====================================
testsuite/tests/driver/j-space/all.T
=====================================
@@ -1 +1 @@
-test('jspace', [extra_files(['genJspace']), req_target_smp, req_ghc_smp, js_broken(22573),when(arch('i386'), fragile(24261))], makefile_test, ['jspace'])
+test('jspace', [extra_files(['genJspace']), req_target_smp, req_ghc_smp, js_broken(22573)], makefile_test, ['jspace'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/68fe6e9bdf0c53abda64d4fabd910c534d8fd728...98af07984b8eac274bd71c5f3261d9dd982dbb2d
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/68fe6e9bdf0c53abda64d4fabd910c534d8fd728...98af07984b8eac274bd71c5f3261d9dd982dbb2d
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231230/c90e7119/attachment-0001.html>
More information about the ghc-commits
mailing list