[Git][ghc/ghc][wip/T24295] Fix loopification in the presence of void arguments

Matthew Craven (@clyring) gitlab at gitlab.haskell.org
Mon Jan 8 19:57:13 UTC 2024



Matthew Craven pushed to branch wip/T24295 at Glasgow Haskell Compiler / GHC


Commits:
c9967052 by Matthew Craven at 2024-01-08T14:56:14-05:00
Fix loopification in the presence of void arguments

This also removes Note [Void arguments in self-recursive tail calls],
which was just misleading.  It's important to count void args both
in the function's arity and at the call site.

Fixes #24295.

- - - - -


10 changed files:

- compiler/GHC/StgToCmm/Bind.hs
- compiler/GHC/StgToCmm/Closure.hs
- compiler/GHC/StgToCmm/Expr.hs
- compiler/GHC/StgToCmm/Heap.hs
- compiler/GHC/StgToCmm/Monad.hs
- compiler/GHC/StgToCmm/Sequel.hs
- + testsuite/tests/codeGen/should_run/T24295a.hs
- + testsuite/tests/codeGen/should_run/T24295a.stdout
- + testsuite/tests/codeGen/should_run/T24295b.hs
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -572,7 +572,13 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
                 -- Extend reader monad with information that
                 -- self-recursive tail calls can be optimized into local
                 -- jumps. See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr.
-                ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
+                ; let !self_loop_info = MkSelfLoopInfo
+                        { sli_id = bndr
+                        , sli_arity = arity
+                        , sli_header_block = loop_header_id
+                        , sli_registers = arg_regs
+                        }
+                ; withSelfLoop self_loop_info $ do
                 {
                 -- Main payload
                 ; entryHeapCheck cl_info node' arity arg_regs $ do


=====================================
compiler/GHC/StgToCmm/Closure.hs
=====================================
@@ -93,7 +93,6 @@ import GHC.Types.RepType
 import GHC.Types.Basic
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
-import GHC.Utils.Misc
 import GHC.Data.Maybe (isNothing)
 
 import Data.Coerce (coerce)
@@ -532,12 +531,12 @@ instance Outputable CallMethod where
 
 getCallMethod :: StgToCmmConfig
               -> Name           -- Function being applied
-              -> Id             -- Function Id used to chech if it can refer to
+              -> Id             -- Function Id used to check if it can refer to
                                 -- CAF's and whether the function is tail-calling
                                 -- itself
               -> LambdaFormInfo -- Its info
               -> RepArity       -- Number of available arguments
-              -> RepArity       -- Number of them being void arguments
+                                -- (including void args)
               -> CgLoc          -- Passed in from cgIdApp so that we can
                                 -- handle let-no-escape bindings and self-recursive
                                 -- tail calls using the same data constructor,
@@ -546,19 +545,22 @@ getCallMethod :: StgToCmmConfig
               -> Maybe SelfLoopInfo -- can we perform a self-recursive tail-call
               -> CallMethod
 
-getCallMethod cfg _ id _  n_args v_args _cg_loc (Just (self_loop_id, block_id, args))
+getCallMethod cfg _ id _  n_args _cg_loc (Just self_loop)
   | stgToCmmLoopification cfg
-  , id == self_loop_id
-  , args `lengthIs` (n_args - v_args)
+  , MkSelfLoopInfo
+    { sli_id = loop_id, sli_arity = arity
+    , sli_header_block = blk_id, sli_registers = arg_regs
+    } <- self_loop
+  , id == loop_id
+  , n_args == arity
   -- If these patterns match then we know that:
   --   * loopification optimisation is turned on
   --   * function is performing a self-recursive call in a tail position
-  --   * number of non-void parameters of the function matches functions arity.
-  -- See Note [Self-recursive tail calls] and Note [Void arguments in
-  -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details
-  = JumpToIt block_id args
+  --   * number of parameters matches the function's arity.
+  -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr for more details
+  = JumpToIt blk_id arg_regs
 
-getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info
   | n_args == 0 -- No args at all
   && not (profileIsProfiling (stgToCmmProfile cfg))
      -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
@@ -566,16 +568,16 @@ getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self
   | n_args < arity = SlowCall        -- Not enough args
   | otherwise      = DirectEntry (enterIdLabel (stgToCmmPlatform cfg) name (idCafInfo id)) arity
 
-getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ LFUnlifted n_args _cg_loc _self_loop_info
   = assert (n_args == 0) ReturnIt
 
-getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info
   = assert (n_args == 0) ReturnIt
     -- n_args=0 because it'd be ill-typed to apply a saturated
     --          constructor application to anything
 
 getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
-              n_args _v_args _cg_loc _self_loop_info
+              n_args _cg_loc _self_loop_info
 
   | Just sig <- idTagSig_maybe id
   , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -613,7 +615,7 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
                 updatable) 0
 
 -- Imported(Unknown) Ids
-getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info
+getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _cg_locs _self_loop_info
   | n_args == 0
   , Just sig <- idTagSig_maybe id
   , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -630,14 +632,14 @@ getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_loc
       EnterIt   -- Not a function
 
 -- TODO: Redundant with above match?
--- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+-- getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info
 --   = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
 --     EnterIt -- Not a function
 
-getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info
+getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info
   = JumpToIt blk_id lne_regs
 
-getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
+getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
 
 -----------------------------------------------------------------------------
 --              Data types for closure information


=====================================
compiler/GHC/StgToCmm/Expr.hs
=====================================
@@ -1080,8 +1080,7 @@ cgIdApp fun_id args = do
         fun            = idInfoToAmode fun_info
         lf_info        = cg_lf         fun_info
         n_args         = length args
-        v_args         = length $ filter (null . stgArgRep) args
-    case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop of
+    case getCallMethod cfg fun_name fun_id lf_info n_args (cg_loc fun_info) self_loop of
             -- A value in WHNF, so we can just return it.
         ReturnIt
           | isZeroBitTy (idType fun_id) -> emitReturn []
@@ -1176,12 +1175,14 @@ cgIdApp fun_id args = do
 --
 -- Implementation is spread across a couple of places in the code:
 --
---   * FCode monad stores additional information in its reader environment
---     (stgToCmmSelfLoop field). This information tells us which function can
---     tail call itself in an optimized way (it is the function currently
---     being compiled), what is the label of a loop header (L1 in example above)
---     and information about local registers in which we should arguments
---     before making a call (this would be a and b in example above).
+--   * FCode monad stores additional information in its reader
+--     environment (stgToCmmSelfLoop field). This `SelfLoopInfo`
+--     record tells us which function can tail call itself in an
+--     optimized way (it is the function currently being compiled),
+--     its RepArity, what is the label of its loop header (L1 in
+--     example above) and information about which local registers
+--     should receive arguments when making a call (this would be a
+--     and b in the example above).
 --
 --   * Whenever we are compiling a function, we set that information to reflect
 --     the fact that function currently being compiled can be jumped to, instead
@@ -1205,36 +1206,13 @@ cgIdApp fun_id args = do
 --     of call will be generated. getCallMethod decides to generate a self
 --     recursive tail call when (a) environment stores information about
 --     possible self tail-call; (b) that tail call is to a function currently
---     being compiled; (c) number of passed non-void arguments is equal to
---     function's arity. (d) loopification is turned on via -floopification
---     command-line option.
+--     being compiled; (c) number of passed arguments is equal to
+--     function's unarised arity. (d) loopification is turned on via
+--     -floopification command-line option.
 --
 --   * Command line option to turn loopification on and off is implemented in
 --     DynFlags, then passed to StgToCmmConfig for this phase.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
---     -> (a -> State# s -> (# State s, Bool #))
---     -> State# s
---     -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
---      (# s', b #) -> case b of
---          True -> (# s', Just x #)
---          False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
+
 
 emitEnter :: CmmExpr -> FCode ReturnKind
 emitEnter fun = do


=====================================
compiler/GHC/StgToCmm/Heap.hs
=====================================
@@ -635,7 +635,7 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
   -- See Note [Self-recursive loop header].
   self_loop_info <- getSelfLoop
   case self_loop_info of
-    Just (_, loop_header_id, _)
+    Just MkSelfLoopInfo { sli_header_block = loop_header_id }
         | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
     _otherwise -> return ()
 


=====================================
compiler/GHC/StgToCmm/Monad.hs
=====================================
@@ -42,6 +42,8 @@ module GHC.StgToCmm.Monad (
         Sequel(..), ReturnKind(..),
         withSequel, getSequel,
 
+        SelfLoopInfo(..),
+
         setTickyCtrLabel, getTickyCtrLabel,
         tickScope, getTickScope,
 
@@ -298,7 +300,7 @@ data FCodeState =
                                                          -- else the RTS will deadlock _and_ also experience a severe
                                                          -- performance degradation
               , fcs_sequel        :: !Sequel             -- ^ What to do at end of basic block
-              , fcs_selfloop      :: Maybe SelfLoopInfo  -- ^ Which tail calls can be compiled as local jumps?
+              , fcs_selfloop      :: !(Maybe SelfLoopInfo) -- ^ Which tail calls can be compiled as local jumps?
                                                          --   See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr
               , fcs_ticky         :: !CLabel             -- ^ Destination for ticky counts
               , fcs_tickscope     :: !CmmTickScope       -- ^ Tick scope for new blocks & ticks


=====================================
compiler/GHC/StgToCmm/Sequel.hs
=====================================
@@ -12,13 +12,14 @@
 
 module GHC.StgToCmm.Sequel
   ( Sequel(..)
-  , SelfLoopInfo
+  , SelfLoopInfo(..)
   ) where
 
 import GHC.Cmm.BlockId
 import GHC.Cmm
 
 import GHC.Types.Id
+import GHC.Types.Basic (RepArity)
 import GHC.Utils.Outputable
 
 import GHC.Prelude
@@ -41,5 +42,14 @@ instance Outputable Sequel where
     ppr Return = text "Return"
     ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b
 
-type SelfLoopInfo = (Id, BlockId, [LocalReg])
+data SelfLoopInfo = MkSelfLoopInfo
+  { sli_id :: !Id
+  , sli_arity :: !RepArity
+    -- ^ always equal to 'idFunRepArity' of sli_id,
+    -- i.e. unarised arity, including void arguments
+  , sli_registers :: ![LocalReg]
+    -- ^ Excludes void arguments (LocalReg is never void)
+  , sli_header_block :: !BlockId
+  }
+
 --------------------------------------------------------------------------------


=====================================
testsuite/tests/codeGen/should_run/T24295a.hs
=====================================
@@ -0,0 +1,20 @@
+module Main (main) where
+
+import Data.IORef (newIORef, readIORef, writeIORef)
+import Control.Exception (evaluate)
+import GHC.Exts (noinline)
+
+newtype Tricky = TrickyCon { unTrickyCon :: IO Tricky }
+
+main :: IO ()
+main = do
+  ref <- newIORef False
+  let
+    tricky :: Tricky
+    tricky = TrickyCon $ do
+      putStrLn "tricky call"
+      v <- readIORef ref
+      case v of
+        False -> writeIORef ref True >> evaluate (noinline tricky)
+        True  -> putStrLn "this shouldn't be printed" >> pure tricky
+  () <$ unTrickyCon tricky


=====================================
testsuite/tests/codeGen/should_run/T24295a.stdout
=====================================
@@ -0,0 +1 @@
+tricky call


=====================================
testsuite/tests/codeGen/should_run/T24295b.hs
=====================================
@@ -0,0 +1,28 @@
+{-# LANGUAGE GHC2021, UnboxedTuples #-}
+module Main (main) where
+
+import Control.Exception
+
+newtype Tricky = TrickyCon { unTrickyCon :: (# #) -> Tricky }
+
+data StrictBox a = SBox !a !a
+
+main :: IO ()
+main = do
+  let
+    tricky :: Tricky
+    {-# OPAQUE tricky #-}
+    tricky = TrickyCon $ \(# #) -> TrickyCon $ \(# #) ->
+      error "tricky called with at least two args"
+
+    applyToN :: Int -> Tricky -> Tricky
+    {-# OPAQUE applyToN #-}
+    applyToN n a | n == 0    = a
+                 | otherwise = applyToN (n - 1) a `unTrickyCon` (# #)
+
+    val = applyToN 12345 tricky
+
+  v <- try @ErrorCall $ evaluate (SBox val val)
+  case v of
+    Left _ -> pure ()
+    Right _ -> putStrLn "unreachable"


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -241,3 +241,5 @@ test('MulMayOflo_full',
      multi_compile_and_run,
      ['MulMayOflo', [('MulMayOflo_full.cmm', '')], ''])
 test('T24264run', normal, compile_and_run, [''])
+test('T24295a', normal, compile_and_run, ['-O -floopification'])
+test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c9967052037fa66f9adb886c1c3ada4540f6a8d9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c9967052037fa66f9adb886c1c3ada4540f6a8d9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240108/0606a769/attachment-0001.html>


More information about the ghc-commits mailing list