[Git][ghc/ghc][wip/T24264] StgToCmm: Detect some no-op case-continuations

Matthew Craven (@clyring) gitlab at gitlab.haskell.org
Mon Dec 18 14:16:15 UTC 2023



Matthew Craven pushed to branch wip/T24264 at Glasgow Haskell Compiler / GHC


Commits:
6ebfa5f2 by Matthew Craven at 2023-12-18T09:15:35-05:00
StgToCmm: Detect some no-op case-continuations

...and generate no code for them. Fixes #24264.

- - - - -


6 changed files:

- compiler/GHC/StgToCmm/Expr.hs
- + testsuite/tests/codeGen/should_compile/T24264.hs
- + testsuite/tests/codeGen/should_compile/T24264.stderr
- testsuite/tests/codeGen/should_compile/all.T
- + testsuite/tests/codeGen/should_run/T24264run.hs
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
compiler/GHC/StgToCmm/Expr.hs
=====================================
@@ -570,6 +570,58 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
     -- Use the same return convention as vanilla 'a'.
     cgCase (StgApp a []) bndr alt_type alts
 
+{-
+Note [Eliminate trivial Solo# continuations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have code like this:
+
+    case scrut of bndr {
+      alt -> Solo# bndr
+    }
+
+The RHS of the only branch does nothing except wrap the case-binder
+returned by 'scrut' in a unary unboxed tuple.  But unboxed tuples
+don't exist at run-time, i.e. the branch is a no-op!  So we can
+generate code as if we just had 'scrut' instead of a case-expression.
+
+This situation can easily arise for IO or ST code, where the last
+operation a function performs is commonly 'pure $! someExpr'.
+See also #24264 and !11778.  More concretely, as of December 2023,
+when building a stage2 "perf+no_profiled_libs" ghc:
+
+ * The special case is reached 398 times.
+ * Of these, 158 have scrutinees that call a function or enter a
+   potential thunk, and would need to push a useless stack frame if
+   not for this optimisation.
+
+We might consider rewriting such case expressions in GHC.Stg.CSE as a
+slight extension of Note [All alternatives are the binder].  But the
+RuntimeReps of 'bndr' and 'Solo# bndr' are not exactly the same, and
+per Note [Typing the STG language] in GHC.Stg.Lint, we do expect Stg
+code to remain RuntimeRep-correct.  So we just detect the situation in
+StgToCmm instead.
+
+Crucially, the return conventions for 'ty' and '(# ty #)' are compatible:
+The returned value is passed in the same register(s) or stack slot in
+both conventions, and the set of allowed return values for 'ty'
+is a subset of the allowed return values for '(# ty #)':
+
+ * For a lifted type 'ty', the return convention for 'ty' promises to
+   return an evaluated-properly-tagged heap pointer, while a return
+   type '(# ty #)' only promises to return a heap pointer to an object
+   that can be evaluated later if need be.
+
+ * If 'ty' is unlifted, the allowed return
+   values for 'ty' and '(# ty #)' are identical.
+-}
+
+cgCase scrut bndr _alt_type [GenStgAlt { alt_rhs = rhs}]
+  -- see Note [Eliminate trivial Solo# continuations]
+  | StgConApp dc _ [StgVarArg v] _ <- rhs
+  , isUnboxedTupleDataCon dc
+  , v == bndr
+  = cgExpr scrut
+
 cgCase scrut bndr alt_type alts
   = -- the general case
     do { platform <- getPlatform


=====================================
testsuite/tests/codeGen/should_compile/T24264.hs
=====================================
@@ -0,0 +1,18 @@
+module T24264 where
+
+fun :: a -> IO a
+{-# OPAQUE fun #-}
+fun x = do
+  pure ()
+  pure $! x
+  -- This should not push a continuation to the stack before entering 'x'
+
+funPair :: a -> IO (a, a)
+{-# OPAQUE funPair #-}
+funPair x = do
+  pure ()
+  x' <- pure $! x
+  -- This should push a continuation to the stack before entering 'x',
+  -- so the pair can be returned instead.  (It's here to make sure
+  -- that the 'returns to' detection continues working correctly.)
+  pure (x', x')


=====================================
testsuite/tests/codeGen/should_compile/T24264.stderr
=====================================
@@ -0,0 +1,70 @@
+
+==================== Output Cmm ====================
+[T24264.fun_entry() { //  [R2]
+         { info_tbls: [(cKd,
+                        label: T24264.fun_info
+                        rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cKd: // global
+           // slowCall
+           R1 = R2;   // CmmAssign
+           call stg_ap_0_fast(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ },
+ section ""data" . T24264.fun_closure" {
+     T24264.fun_closure:
+         const T24264.fun_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[T24264.funPair_entry() { //  [R2]
+         { info_tbls: [(cKn,
+                        label: block_cKn_info
+                        rep: StackRep []
+                        srt: Nothing),
+                       (cKq,
+                        label: T24264.funPair_info
+                        rep: HeapRep static { Fun {arity: 2 fun_type: ArgSpec 5} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cKq: // global
+           if ((Sp + -8) < SpLim) (likely: False) goto cKr; else goto cKs;   // CmmCondBranch
+       cKr: // global
+           R1 = T24264.funPair_closure;   // CmmAssign
+           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;   // CmmCall
+       cKs: // global
+           // slowCall
+           I64[Sp - 8] = cKn;   // CmmStore
+           R1 = R2;   // CmmAssign
+           Sp = Sp - 8;   // CmmAssign
+           call stg_ap_0_fast(R1) returns to cKn, args: 8, res: 8, upd: 8;   // CmmCall
+       cKn: // global
+           // slow_call for _sK3::P64 with pat stg_ap_0
+           Hp = Hp + 24;   // CmmAssign
+           if (Hp > HpLim) (likely: False) goto cKv; else goto cKu;   // CmmCondBranch
+       cKv: // global
+           HpAlloc = 24;   // CmmAssign
+           call stg_gc_unpt_r1(R1) returns to cKn, args: 8, res: 8, upd: 8;   // CmmCall
+       cKu: // global
+           // allocHeapClosure
+           I64[Hp - 16] = (,)_con_info;   // CmmStore
+           P64[Hp - 8] = R1;   // CmmStore
+           P64[Hp] = R1;   // CmmStore
+           R1 = Hp - 15;   // CmmAssign
+           Sp = Sp + 8;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ },
+ section ""data" . T24264.funPair_closure" {
+     T24264.funPair_closure:
+         const T24264.funPair_info;
+ }]
+
+


=====================================
testsuite/tests/codeGen/should_compile/all.T
=====================================
@@ -128,3 +128,5 @@ test('T21710a', [ unless(tables_next_to_code(), skip) , when(wordsize(32), skip)
                 , grep_errmsg('(call)',[1]) ]
                 , compile, ['-ddump-cmm -dno-typeable-binds'])
 test('T23002', normal, compile, ['-fregs-graph'])
+test('T24264', grep_errmsg(r'(.*\().*(\) returns to)', [1,2]),
+     compile, ['-O -ddump-cmm -dno-typeable-binds'])


=====================================
testsuite/tests/codeGen/should_run/T24264run.hs
=====================================
@@ -0,0 +1,32 @@
+module Main where
+
+import Control.Exception (evaluate)
+import GHC.Exts (lazy, noinline)
+
+data StrictPair a b = !a :*: !b
+
+tailEval1 :: a -> IO a
+{-# OPAQUE tailEval1 #-}
+tailEval1 = lazy $ \x -> do
+  pure ()
+  pure $! x
+
+tailEval2 :: a -> IO a
+{-# OPAQUE tailEval2 #-}
+tailEval2 x = evaluate x
+
+go :: [a] -> IO ()
+go = noinline mapM_ $ \x -> do
+  y1 <- tailEval1 x
+  y2 <- tailEval2 x
+  evaluate (y1 :*: y2)
+
+main :: IO ()
+main = do
+  let ints :: [Int]
+      ints = take 1000 $ noinline iterate (\x -> x * 35) 1
+  go ints
+  go [LT, EQ, GT]
+  go $ noinline map (toEnum @Ordering . flip mod 3) ints
+  go $ noinline map Left ints
+  go $ noinline map (+)  ints


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -240,3 +240,4 @@ test('MulMayOflo_full',
         ignore_stdout],
      multi_compile_and_run,
      ['MulMayOflo', [('MulMayOflo_full.cmm', '')], ''])
+test('T24264run', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ebfa5f2d03c680e54e34b3d5df42b371b433e01

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ebfa5f2d03c680e54e34b3d5df42b371b433e01
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231218/b8a6e748/attachment-0001.html>


More information about the ghc-commits mailing list