[Git][ghc/ghc][wip/andreask/ghci-tag-nullary] Fix GHCis interaction with tag inference.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Tue Aug 16 19:12:31 UTC 2022



Andreas Klebinger pushed to branch wip/andreask/ghci-tag-nullary at Glasgow Haskell Compiler / GHC


Commits:
0a6cade1 by Andreas Klebinger at 2022-08-16T21:09:09+02:00
Fix GHCis interaction with tag inference.

We had assumed that wrappers were not inlined into GHCi so we would
always execute the compiled wrapper inside GHCi. Turs turned out to be
a lie. So instead we now run tag inference even when we only generate
bytecode. In that case only for correctness reasons. Which is alright
as it's fairly cheap.

I further fixed a bug where GHCi didn't tag nullary constructor
arguments which caused segfaults when calling into compiled functions
which expected the strict field invariant to be upheld.

-------------------------
Metric Increase:
    T4801
-------------------------

- - - - -


14 changed files:

- compiler/GHC/Driver/Main.hs
- compiler/GHC/Stg/InferTags.hs
- compiler/GHC/Stg/InferTags/Rewrite.hs
- compiler/GHC/Stg/InferTags/TagSig.hs
- compiler/GHC/Stg/Pipeline.hs
- compiler/GHC/StgToByteCode.hs
- compiler/GHC/StgToCmm/Types.hs
- testsuite/tests/ghci.debugger/scripts/T12458.stdout
- testsuite/tests/ghci.debugger/scripts/print018.stdout
- testsuite/tests/simplStg/should_run/Makefile
- + testsuite/tests/simplStg/should_run/T22042.hs
- + testsuite/tests/simplStg/should_run/T22042.stdout
- + testsuite/tests/simplStg/should_run/T22042a.hs
- testsuite/tests/simplStg/should_run/all.T


Changes:

=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -186,7 +186,7 @@ import GHC.Tc.Utils.Monad
 import GHC.Tc.Utils.Zonk    ( ZonkFlexi (DefaultFlexi) )
 
 import GHC.Stg.Syntax
-import GHC.Stg.Pipeline ( stg2stg )
+import GHC.Stg.Pipeline ( stg2stg, StgCgInfos )
 import GHC.Stg.InferTags
 
 import GHC.Builtin.Utils
@@ -268,6 +268,8 @@ import Data.Functor
 import Control.DeepSeq (force)
 import Data.Bifunctor (first)
 import Data.List.NonEmpty (NonEmpty ((:|)))
+import GHC.Stg.InferTags.TagSig (seqTagSig)
+import GHC.Types.Unique.FM
 
 
 {- **********************************************************************
@@ -1719,11 +1721,16 @@ hscGenHardCode hsc_env cgguts location output_filename = do
             this_mod location late_cc_binds data_tycons
 
         -----------------  Convert to STG ------------------
-        (stg_binds, denv, (caf_ccs, caf_cc_stacks))
+        (stg_binds, denv, (caf_ccs, caf_cc_stacks), stg_cg_infos)
             <- {-# SCC "CoreToStg" #-}
                withTiming logger
                    (text "CoreToStg"<+>brackets (ppr this_mod))
-                   (\(a, b, (c,d)) -> a `seqList` b `seq` c `seqList` d `seqList` ())
+                   (\(a, b, (c,d), tag_env) ->
+                        a `seqList`
+                        b `seq`
+                        c `seqList`
+                        d `seqList`
+                        (seqEltsUFM (seqTagSig) tag_env))
                    (myCoreToStg logger dflags (hsc_IC hsc_env) False this_mod location prepd_binds)
 
         let cost_centre_info =
@@ -1766,7 +1773,8 @@ hscGenHardCode hsc_env cgguts location output_filename = do
                 <- {-# SCC "codeOutput" #-}
                   codeOutput logger tmpfs llvm_config dflags (hsc_units hsc_env) this_mod output_filename location
                   foreign_stubs foreign_files dependencies rawcmms1
-            return (output_filename, stub_c_exists, foreign_fps, Just cg_infos)
+            return  ( output_filename, stub_c_exists, foreign_fps
+                    , Just cg_infos{ cgTagSigs = stg_cg_infos})
 
 
 hscInteractive :: HscEnv
@@ -1801,7 +1809,9 @@ hscInteractive hsc_env cgguts location = do
         (initCorePrepPgmConfig (hsc_dflags hsc_env) (interactiveInScope $ hsc_IC hsc_env))
         this_mod location core_binds data_tycons
 
-    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks)
+    -- The stg cg info only provides a runtime benfit, but is not requires so we just
+    -- omit it here
+    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks, _ignore_stg_cg_infos)
       <- {-# SCC "CoreToStg" #-}
           myCoreToStg logger dflags (hsc_IC hsc_env) True this_mod location prepd_binds
     -----------------  Generate byte code ------------------
@@ -1906,13 +1916,13 @@ doCodeGen hsc_env this_mod denv data_tycons
         hooks      = hsc_hooks  hsc_env
         tmpfs      = hsc_tmpfs  hsc_env
         platform   = targetPlatform dflags
-
+        stg_ppr_opts = (initStgPprOpts dflags)
     -- Do tag inference on optimized STG
     (!stg_post_infer,export_tag_info) <-
-        {-# SCC "StgTagFields" #-} inferTags dflags logger this_mod stg_binds_w_fvs
+        {-# SCC "StgTagFields" #-} inferTags stg_ppr_opts logger this_mod stg_binds_w_fvs
 
     putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG
-        (pprGenStgTopBindings (initStgPprOpts dflags) stg_post_infer)
+        (pprGenStgTopBindings stg_ppr_opts stg_post_infer)
 
     let stg_to_cmm dflags mod = case stgToCmmHook hooks of
                         Nothing -> StgToCmm.codeGen logger tmpfs (initStgToCmmConfig dflags mod)
@@ -1960,7 +1970,8 @@ myCoreToStgExpr :: Logger -> DynFlags -> InteractiveContext
                 -> IO ( Id
                       , [CgStgTopBinding]
                       , InfoTableProvMap
-                      , CollectedCCs )
+                      , CollectedCCs
+                      , StgCgInfos )
 myCoreToStgExpr logger dflags ictxt for_bytecode this_mod ml prepd_expr = do
     {- Create a temporary binding (just because myCoreToStg needs a
        binding for the stg2stg step) -}
@@ -1968,7 +1979,7 @@ myCoreToStgExpr logger dflags ictxt for_bytecode this_mod ml prepd_expr = do
                                 (mkPseudoUniqueE 0)
                                 Many
                                 (exprType prepd_expr)
-    (stg_binds, prov_map, collected_ccs) <-
+    (stg_binds, prov_map, collected_ccs, stg_cg_infos) <-
        myCoreToStg logger
                    dflags
                    ictxt
@@ -1976,20 +1987,21 @@ myCoreToStgExpr logger dflags ictxt for_bytecode this_mod ml prepd_expr = do
                    this_mod
                    ml
                    [NonRec bco_tmp_id prepd_expr]
-    return (bco_tmp_id, stg_binds, prov_map, collected_ccs)
+    return (bco_tmp_id, stg_binds, prov_map, collected_ccs, stg_cg_infos)
 
 myCoreToStg :: Logger -> DynFlags -> InteractiveContext
             -> Bool
             -> Module -> ModLocation -> CoreProgram
             -> IO ( [CgStgTopBinding] -- output program
                   , InfoTableProvMap
-                  , CollectedCCs )  -- CAF cost centre info (declared and used)
+                  , CollectedCCs -- CAF cost centre info (declared and used)
+                  , StgCgInfos )
 myCoreToStg logger dflags ictxt for_bytecode this_mod ml prepd_binds = do
     let (stg_binds, denv, cost_centre_info)
          = {-# SCC "Core2Stg" #-}
            coreToStg dflags this_mod ml prepd_binds
 
-    stg_binds_with_fvs
+    (stg_binds_with_fvs,stg_cg_info)
         <- {-# SCC "Stg2Stg" #-}
            stg2stg logger (interactiveInScope ictxt) (initStgPipelineOpts dflags for_bytecode)
                    this_mod stg_binds
@@ -1997,7 +2009,7 @@ myCoreToStg logger dflags ictxt for_bytecode this_mod ml prepd_binds = do
     putDumpFileMaybe logger Opt_D_dump_stg_cg "CodeGenInput STG:" FormatSTG
         (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_with_fvs)
 
-    return (stg_binds_with_fvs, denv, cost_centre_info)
+    return (stg_binds_with_fvs, denv, cost_centre_info, stg_cg_info)
 
 {- **********************************************************************
 %*                                                                      *
@@ -2148,7 +2160,7 @@ hscParsedDecls hsc_env decls = runInteractiveHsc hsc_env $ do
         (initCorePrepPgmConfig (hsc_dflags hsc_env) (interactiveInScope $ hsc_IC hsc_env))
         this_mod iNTERACTIVELoc core_binds data_tycons
 
-    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks)
+    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks, _stg_cg_info)
         <- {-# SCC "CoreToStg" #-}
            liftIO $ myCoreToStg (hsc_logger hsc_env)
                                 (hsc_dflags hsc_env)
@@ -2385,7 +2397,7 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr
                                       ml_hie_file  = panic "hscCompileCoreExpr':ml_hie_file" }
 
          ; let ictxt = hsc_IC hsc_env
-         ; (binding_id, stg_expr, _, _) <-
+         ; (binding_id, stg_expr, _, _, _stg_cg_info) <-
              myCoreToStgExpr logger
                              dflags
                              ictxt


=====================================
compiler/GHC/Stg/InferTags.hs
=====================================
@@ -27,7 +27,6 @@ import GHC.Stg.InferTags.Types
 import GHC.Stg.InferTags.Rewrite (rewriteTopBinds)
 import Data.Maybe
 import GHC.Types.Name.Env (mkNameEnv, NameEnv)
-import GHC.Driver.Config.Stg.Ppr
 import GHC.Driver.Session
 import GHC.Utils.Logger
 import qualified GHC.Unit.Types
@@ -221,13 +220,13 @@ the output of itself.
 --          -- Note we produce a 'Stream' of CmmGroups, so that the
 --          -- backend can be run incrementally.  Otherwise it generates all
 --          -- the C-- up front, which has a significant space cost.
-inferTags :: DynFlags -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
-inferTags dflags logger this_mod stg_binds = do
+inferTags :: StgPprOpts -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags ppr_opts logger this_mod stg_binds = do
 
     -- Annotate binders with tag information.
     let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
                                         inferTagsAnal stg_binds
-    putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_tags)
+    putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings ppr_opts stg_binds_w_tags)
 
     let export_tag_info = collectExportInfo stg_binds_w_tags
 


=====================================
compiler/GHC/Stg/InferTags/Rewrite.hs
=====================================
@@ -26,7 +26,7 @@ import GHC.Types.Name
 import GHC.Types.Unique.Supply
 import GHC.Types.Unique.FM
 import GHC.Types.RepType
-import GHC.Unit.Types (Module)
+import GHC.Unit.Types (Module, isInteractiveModule)
 
 import GHC.Core.DataCon
 import GHC.Core (AltCon(..) )
@@ -212,16 +212,42 @@ withLcl fv act = do
     setFVs old_fvs
     return r
 
+{- Note [Tag inference for interactive contexts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When running code in GHCi we perform tag inference/rewrites
+for each individual STG expression entered on the prompt.
+
+This means in GHCi for a sequence of:
+    > let x = True
+    > let y = x
+We first run tagInference for `x = True`. While that computes a tag signature for `x` that information
+is currently not persistet.
+Then we process `y = x`, and to do so we check for the tag sig of `x` (which we don't have).
+This isn't a problem as we can always just default to TagDunno and nothing bad will happen.
+
+But in a non-interactive context this would indicate an error as every binding
+should be processed in dependency order for the whole module at once.
+Therefore taggedness information should be available for every id mentioned in any RHS.
+
+So if a lookup fails we check if we are in an interactive context. If so we just default
+to TagDunno. If we aren't in an interactive context this is an error and we have an assert
+to check that.
+
+-}
 isTagged :: Id -> RM Bool
 isTagged v = do
     this_mod <- getMod
+    -- See Note [Tag inference for interactive contexts]
+    let lookupDefault v = assertPpr (isInteractiveModule this_mod)
+                                    (text "unknown Id:" <> ppr this_mod <+> ppr v)
+                                    (TagSig TagDunno)
     case nameIsLocalOrFrom this_mod (idName v) of
         True
             | isUnliftedType (idType v)
             -> return True
             | otherwise -> do -- Local binding
                 !s <- getMap
-                let !sig = lookupWithDefaultUFM s (pprPanic "unknown Id:" (ppr v)) v
+                let !sig = lookupWithDefaultUFM s (lookupDefault v) v
                 return $ case sig of
                     TagSig info ->
                         case info of


=====================================
compiler/GHC/Stg/InferTags/TagSig.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE ConstraintKinds #-}
 
@@ -16,6 +17,7 @@ import GHC.Types.Var
 import GHC.Utils.Outputable
 import GHC.Utils.Binary
 import GHC.Utils.Panic.Plain
+import Data.Coerce
 
 data TagInfo
   = TagDunno            -- We don't know anything about the tag.
@@ -64,3 +66,12 @@ isTaggedSig :: TagSig -> Bool
 isTaggedSig (TagSig TagProper) = True
 isTaggedSig (TagSig TagTagged) = True
 isTaggedSig _ = False
+
+seqTagSig :: TagSig -> ()
+seqTagSig = coerce seqTagInfo
+
+seqTagInfo :: TagInfo -> ()
+seqTagInfo TagTagged      = ()
+seqTagInfo TagDunno       = ()
+seqTagInfo TagProper      = ()
+seqTagInfo (TagTuple tis) = foldl' (\_unit sig -> seqTagSig (coerce sig)) () tis
\ No newline at end of file


=====================================
compiler/GHC/Stg/Pipeline.hs
=====================================
@@ -13,6 +13,7 @@ module GHC.Stg.Pipeline
   ( StgPipelineOpts (..)
   , StgToDo (..)
   , stg2stg
+  , StgCgInfos
   ) where
 
 import GHC.Prelude
@@ -39,6 +40,9 @@ import Control.Monad
 import Control.Monad.IO.Class
 import Control.Monad.Trans.Reader
 import GHC.Settings (Platform)
+import GHC.Stg.InferTags (inferTags)
+import GHC.Types.Name.Env (NameEnv)
+import GHC.Stg.InferTags.TagSig (TagSig)
 
 data StgPipelineOpts = StgPipelineOpts
   { stgPipeline_phases      :: ![StgToDo]
@@ -52,6 +56,10 @@ data StgPipelineOpts = StgPipelineOpts
 newtype StgM a = StgM { _unStgM :: ReaderT Char IO a }
   deriving (Functor, Applicative, Monad, MonadIO)
 
+-- | Information to be exposed in interface files which is produced
+-- by the stg2stg pass.
+type StgCgInfos = NameEnv TagSig
+
 instance MonadUnique StgM where
   getUniqueSupplyM = StgM $ do { mask <- ask
                                ; liftIO $! mkSplitUniqSupply mask}
@@ -66,7 +74,7 @@ stg2stg :: Logger
         -> StgPipelineOpts
         -> Module                    -- ^ module being compiled
         -> [StgTopBinding]           -- ^ input program
-        -> IO [CgStgTopBinding]        -- output program
+        -> IO ([CgStgTopBinding], StgCgInfos) -- output program
 stg2stg logger extra_vars opts this_mod binds
   = do  { dump_when Opt_D_dump_stg_from_core "Initial STG:" binds
         ; showPass logger "Stg2Stg"
@@ -85,7 +93,7 @@ stg2stg logger extra_vars opts this_mod binds
           -- This pass will also augment each closure with non-global free variables
           -- annotations (which is used by code generator to compute offsets into closures)
         ; let binds_sorted_with_fvs = depSortWithAnnotStgPgm this_mod binds'
-        ; return binds_sorted_with_fvs
+        ; inferTags (stgPipeline_pprOpts opts) logger this_mod binds_sorted_with_fvs
    }
 
   where


=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -1669,10 +1669,21 @@ pushAtom d p (StgVarArg var)
         case lookupVarEnv topStrings var of
             Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $
               fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr
-            Nothing -> do
-                let sz = idSizeCon platform var
-                massert (sz == wordSize platform)
-                return (unitOL (PUSH_G (getName var)), sz)
+            Nothing
+              -- PUSH_G doesn't tag constructors. So we use PACK here
+              -- if we are dealing with nullary constructor.
+              | Just con <- isDataConWorkId_maybe var
+              -> do
+                  massert (sz == wordSize platform)
+                  massert (isNullaryRepDataCon con)
+                  return (unitOL (PACK con 0), sz)
+              | otherwise
+              -> do
+                  let
+                  massert (sz == wordSize platform)
+                  return (unitOL (PUSH_G (getName var)), sz)
+              where
+                !sz = idSizeCon platform var
 
 
 pushAtom _ _ (StgLitArg lit) = pushLiteral True lit


=====================================
compiler/GHC/StgToCmm/Types.hs
=====================================
@@ -94,6 +94,7 @@ data CgInfos = CgInfos
   , cgIPEStub :: !CStub
       -- ^ The C stub which is used for IPE information
   , cgTagSigs :: !(NameEnv TagSig)
+      -- ^ Tag sigs. These are produced by stg2stg hence why they end up in CgInfos.
   }
 
 --------------------------------------------------------------------------------


=====================================
testsuite/tests/ghci.debugger/scripts/T12458.stdout
=====================================
@@ -1,2 +1,2 @@
-d = (_t1::forall {k} {a :: k}. D a)
+d = <D>
 ()


=====================================
testsuite/tests/ghci.debugger/scripts/print018.stdout
=====================================
@@ -1,9 +1,9 @@
 Breakpoint 0 activated at Test.hs:40:10-17
 Stopped in Test.Test2.poly, Test.hs:40:10-17
 _result :: () = _
-x :: a = _
-x = (_t1::a)
-x :: a
+x :: Unary = Unary
+x = Unary
+x :: Unary
 ()
 x = Unary
 x :: Unary


=====================================
testsuite/tests/simplStg/should_run/Makefile
=====================================
@@ -1,3 +1,12 @@
 TOP=../../..
 include $(TOP)/mk/boilerplate.mk
 include $(TOP)/mk/test.mk
+
+T22042: T22042_clean
+	"$(TEST_HC)" $(TEST_HC_OPTS) -O T22042a.hs -dynamic -dtag-inference-checks -c
+	"$(TEST_HC)" $(TEST_HC_OPTS) -e ":main" T22042.hs T22042a.o
+
+T22042_clean:
+	rm -f T22042a.o T22042a.hi
+
+.PHONY: T22042 T22042_clean


=====================================
testsuite/tests/simplStg/should_run/T22042.hs
=====================================
@@ -0,0 +1,6 @@
+module Main where
+
+import T22042a
+
+main = do
+  putStrLn (foo $ SC A B C)


=====================================
testsuite/tests/simplStg/should_run/T22042.stdout
=====================================
@@ -0,0 +1 @@
+ABC


=====================================
testsuite/tests/simplStg/should_run/T22042a.hs
=====================================
@@ -0,0 +1,10 @@
+module T22042a where
+
+data A = A | AA deriving Show
+data B = B | AB deriving Show
+data C = C | AC deriving Show
+
+data SC = SC !A !B !C
+
+foo :: SC -> String
+foo (SC a b c) = show a ++ show b ++ show c


=====================================
testsuite/tests/simplStg/should_run/all.T
=====================================
@@ -19,3 +19,4 @@ test('T13536a',
     [''])
 
 test('inferTags001', normal, multimod_compile_and_run, ['inferTags001', 'inferTags001_a'])
+test('T22042', [extra_files(['T22042a.hs']),only_ways('normal')], makefile_test, ['T22042'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0a6cade119bff19f82573f2975f936af4db3c247

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0a6cade119bff19f82573f2975f936af4db3c247
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220816/d7a973dd/attachment-0001.html>


More information about the ghc-commits mailing list