[Git][ghc/ghc][wip/andreask/infer-bytecode] Fix some correctness issues around tag inference when targeting the bytecode generator.
Andreas Klebinger (@AndreasK)
gitlab at gitlab.haskell.org
Thu Feb 2 18:43:48 UTC 2023
Andreas Klebinger pushed to branch wip/andreask/infer-bytecode at Glasgow Haskell Compiler / GHC
Commits:
60241bfe by Andreas Klebinger at 2023-02-02T19:43:03+01:00
Fix some correctness issues around tag inference when targeting the bytecode generator.
* Let binders are now always assumed untagged for bytecode.
* Imported referenced are now always assumed to be untagged for bytecode.
Fixes #22840
- - - - -
4 changed files:
- compiler/GHC/Driver/Config/Stg/Pipeline.hs
- compiler/GHC/Stg/InferTags.hs
- compiler/GHC/Stg/InferTags/Types.hs
- compiler/GHC/Stg/Pipeline.hs
Changes:
=====================================
compiler/GHC/Driver/Config/Stg/Pipeline.hs
=====================================
@@ -22,6 +22,7 @@ initStgPipelineOpts dflags for_bytecode = StgPipelineOpts
, stgPipeline_pprOpts = initStgPprOpts dflags
, stgPipeline_phases = getStgToDo for_bytecode dflags
, stgPlatform = targetPlatform dflags
+ , stgPipeline_forBytecode = for_bytecode
}
-- | Which Stg-to-Stg passes to run. Depends on flags, ways etc.
=====================================
compiler/GHC/Stg/InferTags.hs
=====================================
@@ -204,6 +204,33 @@ a different StgPass! To handle this a large part of the analysis is polymorphic
over the exact StgPass we are using. Which allows us to run the analysis on
the output of itself.
+Note [Tag inference for interpreted code]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The bytecode interpreter has a different behaviour when it comes
+to the tagging of binders in certain situations than the StgToCmm code generator.
+
+a) Tags for let-bindings:
+
+ When compiling a binding for a constructor like `let x = Just True`
+ Weither or not `x` results in x pointing depends on the backend.
+ For the interpreter x points to a BCO which once
+ evaluated returns a properly tagged pointer to the heap object.
+ In the Cmm backend for the same binding we would allocate the constructor right
+ away and x will immediately be represented by a tagged pointer.
+ This means for interpreted code we can not assume let bound constructors are
+ properly tagged. Hence we distinguish between targeting bytecode and native in
+ the analysis.
+ We make this differentiation in `mkLetSig` where we simply never assume
+ lets are tagged when targeting bytecode.
+
+b) When referencing ids from other modules the Cmm backend will try to put a
+ proper tag on these references through various means. When doing analysis we
+ usually predict these cases to improve precision of the analysis.
+ But to my knowledge the bytecode generator makes no such attempts so we must
+ not infer imported bindings as tagged.
+ This is handled in GHC.Stg.InferTags.Types.lookupInfo
+
+
-}
{- *********************************************************************
@@ -212,20 +239,12 @@ the output of itself.
* *
********************************************************************* -}
--- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
--- -> CollectedCCs
--- -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs
--- -> HpcInfo
--- -> IO (Stream IO CmmGroupSRTs CmmCgInfos)
--- -- Note we produce a 'Stream' of CmmGroups, so that the
--- -- backend can be run incrementally. Otherwise it generates all
--- -- the C-- up front, which has a significant space cost.
-inferTags :: StgPprOpts -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
-inferTags ppr_opts logger this_mod stg_binds = do
-
+inferTags :: StgPprOpts -> Bool -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags ppr_opts !for_bytecode logger this_mod stg_binds = do
+ -- pprTraceM "inferTags for " (ppr this_mod <> text " bytecode:" <> ppr for_bytecode)
-- Annotate binders with tag information.
let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
- inferTagsAnal stg_binds
+ inferTagsAnal for_bytecode stg_binds
putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings ppr_opts stg_binds_w_tags)
let export_tag_info = collectExportInfo stg_binds_w_tags
@@ -254,10 +273,10 @@ type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders
, XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders
, XRhsClosure i ~ XRhsClosure 'InferTaggedBinders)
-inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
-inferTagsAnal binds =
+inferTagsAnal :: Bool -> [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
+inferTagsAnal for_bytecode binds =
-- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
- snd (mapAccumL inferTagTopBind initEnv binds)
+ snd (mapAccumL inferTagTopBind (initEnv for_bytecode) binds)
-----------------------
inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen
@@ -420,11 +439,12 @@ inferTagBind in_env (StgNonRec bndr rhs)
-- ppr bndr $$
-- ppr (isDeadEndId id) $$
-- ppr sig)
- (env', StgNonRec (id, sig) rhs')
+ (env', StgNonRec (id, out_sig) rhs')
where
id = getBinderId in_env bndr
- env' = extendSigEnv in_env [(id, sig)]
- (sig,rhs') = inferTagRhs id in_env rhs
+ (in_sig,rhs') = inferTagRhs id in_env rhs
+ out_sig = mkLetSig in_env in_sig
+ env' = extendSigEnv in_env [(id, out_sig)]
inferTagBind in_env (StgRec pairs)
= -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $
@@ -443,14 +463,17 @@ inferTagBind in_env (StgRec pairs)
| in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss')
| otherwise = go env' out_sigs rhss'
where
- out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
in_bndrs = in_ids `zip` in_sigs
+ out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
rhs_env = extendSigEnv go_env in_bndrs
(out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss)
env' = makeTagged go_env
anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders)
- anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs
+ anaRhs bnd rhs =
+ let (sig_rhs,rhs') = inferTagRhs bnd rhs_env rhs
+ in (mkLetSig go_env sig_rhs, rhs')
+
updateBndr :: (Id,TagSig) -> (Id,TagSig)
updateBndr (v,sig) = (setIdTagSig v sig, sig)
@@ -536,6 +559,15 @@ inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args)
= --pprTrace "inferTagRhsCon" (ppr grp_ids) $
(TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args)
+-- Adjust let semantics to the targeted backend.
+-- See Note [Tag inference for interpreted code]
+mkLetSig :: TagEnv p -> TagSig -> TagSig
+mkLetSig env in_sig
+ | for_bytecode = TagSig TagDunno
+ | otherwise = in_sig
+ where
+ for_bytecode = te_bytecode env
+
{- Note [Constructor TagSigs]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor
=====================================
compiler/GHC/Stg/InferTags/Types.hs
=====================================
@@ -49,24 +49,30 @@ combineAltInfo ti TagTagged = ti
type TagSigEnv = IdEnv TagSig
data TagEnv p = TE { te_env :: TagSigEnv
, te_get :: BinderP p -> Id
+ , te_bytecode :: !Bool
}
instance Outputable (TagEnv p) where
- ppr te = ppr (te_env te)
-
+ ppr te = for_txt <+> ppr (te_env te)
+ where
+ for_txt = if te_bytecode te
+ then text "for_bytecode"
+ else text "for_native"
getBinderId :: TagEnv p -> BinderP p -> Id
getBinderId = te_get
-initEnv :: TagEnv 'CodeGen
-initEnv = TE { te_env = emptyVarEnv
- , te_get = \x -> x}
+initEnv :: Bool -> TagEnv 'CodeGen
+initEnv for_bytecode = TE { te_env = emptyVarEnv
+ , te_get = \x -> x
+ , te_bytecode = for_bytecode }
-- | Simple convert env to a env of the 'InferTaggedBinders pass
-- with no other changes.
makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders
makeTagged env = TE { te_env = te_env env
- , te_get = fst }
+ , te_get = fst
+ , te_bytecode = te_bytecode env }
noSig :: TagEnv p -> BinderP p -> (Id, TagSig)
noSig env bndr
@@ -75,14 +81,18 @@ noSig env bndr
where
var = getBinderId env bndr
+-- | Look up a sig in the given env
lookupSig :: TagEnv p -> Id -> Maybe TagSig
lookupSig env fun = lookupVarEnv (te_env env) fun
+-- | Look up a sig in the env or derive it from information
+-- in the arg itself.
lookupInfo :: TagEnv p -> StgArg -> TagInfo
lookupInfo env (StgVarArg var)
-- Nullary data constructors like True, False
| Just dc <- isDataConWorkId_maybe var
, isNullaryRepDataCon dc
+ , not for_bytecode
= TagProper
| isUnliftedType (idType var)
@@ -93,6 +103,7 @@ lookupInfo env (StgVarArg var)
= info
| Just lf_info <- idLFInfo_maybe var
+ , not for_bytecode
= case lf_info of
-- Function, tagged (with arity)
LFReEntrant {}
@@ -112,6 +123,8 @@ lookupInfo env (StgVarArg var)
| otherwise
= TagDunno
+ where
+ for_bytecode = te_bytecode env
lookupInfo _ (StgLitArg {})
= TagProper
=====================================
compiler/GHC/Stg/Pipeline.hs
=====================================
@@ -50,6 +50,7 @@ data StgPipelineOpts = StgPipelineOpts
-- ^ Should we lint the STG at various stages of the pipeline?
, stgPipeline_pprOpts :: !StgPprOpts
, stgPlatform :: !Platform
+ , stgPipeline_forBytecode :: !Bool
}
newtype StgM a = StgM { _unStgM :: ReaderT Char IO a }
@@ -89,7 +90,7 @@ stg2stg logger extra_vars opts this_mod binds
-- annotations (which is used by code generator to compute offsets into closures)
; let binds_sorted_with_fvs = depSortWithAnnotStgPgm this_mod binds'
-- See Note [Tag inference for interactive contexts]
- ; inferTags (stgPipeline_pprOpts opts) logger this_mod binds_sorted_with_fvs
+ ; inferTags (stgPipeline_pprOpts opts) (stgPipeline_forBytecode opts) logger this_mod binds_sorted_with_fvs
}
where
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/60241bfeb56c85d816a28a22ad6e0b8322e3573c
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/60241bfeb56c85d816a28a22ad6e0b8322e3573c
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230202/5094ff8a/attachment-0001.html>
More information about the ghc-commits
mailing list