[Git][ghc/ghc][wip/andreask/infer-bytecode] Fix some correctness issues around tag inference when targeting the bytecode generator.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Thu Feb 2 18:43:48 UTC 2023



Andreas Klebinger pushed to branch wip/andreask/infer-bytecode at Glasgow Haskell Compiler / GHC


Commits:
60241bfe by Andreas Klebinger at 2023-02-02T19:43:03+01:00
Fix some correctness issues around tag inference when targeting the bytecode generator.

* Let binders are now always assumed untagged for bytecode.
* Imported referenced are now always assumed to be untagged for bytecode.

Fixes #22840

- - - - -


4 changed files:

- compiler/GHC/Driver/Config/Stg/Pipeline.hs
- compiler/GHC/Stg/InferTags.hs
- compiler/GHC/Stg/InferTags/Types.hs
- compiler/GHC/Stg/Pipeline.hs


Changes:

=====================================
compiler/GHC/Driver/Config/Stg/Pipeline.hs
=====================================
@@ -22,6 +22,7 @@ initStgPipelineOpts dflags for_bytecode = StgPipelineOpts
   , stgPipeline_pprOpts = initStgPprOpts dflags
   , stgPipeline_phases = getStgToDo for_bytecode dflags
   , stgPlatform = targetPlatform dflags
+  , stgPipeline_forBytecode = for_bytecode
   }
 
 -- | Which Stg-to-Stg passes to run. Depends on flags, ways etc.


=====================================
compiler/GHC/Stg/InferTags.hs
=====================================
@@ -204,6 +204,33 @@ a different StgPass! To handle this a large part of the analysis is polymorphic
 over the exact StgPass we are using. Which allows us to run the analysis on
 the output of itself.
 
+Note [Tag inference for interpreted code]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The bytecode interpreter has a different behaviour when it comes
+to the tagging of binders in certain situations than the StgToCmm code generator.
+
+a) Tags for let-bindings:
+
+  When compiling a binding for a constructor like `let x = Just True`
+  Weither or not `x` results in x pointing depends on the backend.
+  For the interpreter x points to a BCO which once
+  evaluated returns a properly tagged pointer to the heap object.
+  In the Cmm backend for the same binding we would allocate the constructor right
+  away and x will immediately be represented by a tagged pointer.
+  This means for interpreted code we can not assume let bound constructors are
+  properly tagged. Hence we distinguish between targeting bytecode and native in
+  the analysis.
+  We make this differentiation in `mkLetSig` where we simply never assume
+  lets are tagged when targeting bytecode.
+
+b) When referencing ids from other modules the Cmm backend will try to put a
+   proper tag on these references through various means. When doing analysis we
+   usually predict these cases to improve precision of the analysis.
+   But to my knowledge the bytecode generator makes no such attempts so we must
+   not infer imported bindings as tagged.
+   This is handled in GHC.Stg.InferTags.Types.lookupInfo
+
+
 -}
 
 {- *********************************************************************
@@ -212,20 +239,12 @@ the output of itself.
 *                                                                      *
 ********************************************************************* -}
 
--- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
---           -> CollectedCCs
---           -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs
---           -> HpcInfo
---           -> IO (Stream IO CmmGroupSRTs CmmCgInfos)
---          -- Note we produce a 'Stream' of CmmGroups, so that the
---          -- backend can be run incrementally.  Otherwise it generates all
---          -- the C-- up front, which has a significant space cost.
-inferTags :: StgPprOpts -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
-inferTags ppr_opts logger this_mod stg_binds = do
-
+inferTags :: StgPprOpts -> Bool -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags ppr_opts !for_bytecode logger this_mod stg_binds = do
+    -- pprTraceM "inferTags for " (ppr this_mod <> text " bytecode:" <> ppr for_bytecode)
     -- Annotate binders with tag information.
     let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
-                                        inferTagsAnal stg_binds
+                                        inferTagsAnal for_bytecode stg_binds
     putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings ppr_opts stg_binds_w_tags)
 
     let export_tag_info = collectExportInfo stg_binds_w_tags
@@ -254,10 +273,10 @@ type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders
                     , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders
                     , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders)
 
-inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
-inferTagsAnal binds =
+inferTagsAnal :: Bool -> [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
+inferTagsAnal for_bytecode binds =
   -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
-  snd (mapAccumL inferTagTopBind initEnv binds)
+  snd (mapAccumL inferTagTopBind (initEnv for_bytecode) binds)
 
 -----------------------
 inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen
@@ -420,11 +439,12 @@ inferTagBind in_env (StgNonRec bndr rhs)
     --   ppr bndr $$
     --   ppr (isDeadEndId id) $$
     --   ppr sig)
-    (env', StgNonRec (id, sig) rhs')
+    (env', StgNonRec (id, out_sig) rhs')
   where
     id   = getBinderId in_env bndr
-    env' = extendSigEnv in_env [(id, sig)]
-    (sig,rhs') = inferTagRhs id in_env rhs
+    (in_sig,rhs') = inferTagRhs id in_env rhs
+    out_sig = mkLetSig in_env in_sig
+    env' = extendSigEnv in_env [(id, out_sig)]
 
 inferTagBind in_env (StgRec pairs)
   = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $
@@ -443,14 +463,17 @@ inferTagBind in_env (StgRec pairs)
        | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss')
        | otherwise     = go env' out_sigs rhss'
        where
-         out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
          in_bndrs = in_ids `zip` in_sigs
+         out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
          rhs_env = extendSigEnv go_env in_bndrs
          (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss)
          env' = makeTagged go_env
 
          anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders)
-         anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs
+         anaRhs bnd rhs =
+            let (sig_rhs,rhs') = inferTagRhs bnd rhs_env rhs
+            in (mkLetSig go_env sig_rhs, rhs')
+
 
          updateBndr :: (Id,TagSig) -> (Id,TagSig)
          updateBndr (v,sig) = (setIdTagSig v sig, sig)
@@ -536,6 +559,15 @@ inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args)
   = --pprTrace "inferTagRhsCon" (ppr grp_ids) $
     (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args)
 
+-- Adjust let semantics to the targeted backend.
+-- See Note [Tag inference for interpreted code]
+mkLetSig :: TagEnv p -> TagSig -> TagSig
+mkLetSig env in_sig
+  | for_bytecode = TagSig TagDunno
+  | otherwise = in_sig
+  where
+    for_bytecode = te_bytecode env
+
 {- Note [Constructor TagSigs]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 @inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor


=====================================
compiler/GHC/Stg/InferTags/Types.hs
=====================================
@@ -49,24 +49,30 @@ combineAltInfo ti               TagTagged      = ti
 type TagSigEnv = IdEnv TagSig
 data TagEnv p = TE { te_env :: TagSigEnv
                    , te_get :: BinderP p -> Id
+                   , te_bytecode :: !Bool
                    }
 
 instance Outputable (TagEnv p) where
-    ppr te = ppr (te_env te)
-
+    ppr te = for_txt <+> ppr (te_env te)
+        where
+            for_txt = if te_bytecode te
+                then text "for_bytecode"
+                else text "for_native"
 
 getBinderId :: TagEnv p -> BinderP p -> Id
 getBinderId = te_get
 
-initEnv :: TagEnv 'CodeGen
-initEnv = TE { te_env = emptyVarEnv
-             , te_get = \x -> x}
+initEnv :: Bool -> TagEnv 'CodeGen
+initEnv for_bytecode = TE { te_env = emptyVarEnv
+             , te_get = \x -> x
+             , te_bytecode = for_bytecode }
 
 -- | Simple convert env to a env of the 'InferTaggedBinders pass
 -- with no other changes.
 makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders
 makeTagged env = TE { te_env = te_env env
-                    , te_get = fst }
+                    , te_get = fst
+                    , te_bytecode = te_bytecode env }
 
 noSig :: TagEnv p -> BinderP p -> (Id, TagSig)
 noSig env bndr
@@ -75,14 +81,18 @@ noSig env bndr
   where
     var = getBinderId env bndr
 
+-- | Look up a sig in the given env
 lookupSig :: TagEnv p -> Id -> Maybe TagSig
 lookupSig env fun = lookupVarEnv (te_env env) fun
 
+-- | Look up a sig in the env or derive it from information
+-- in the arg itself.
 lookupInfo :: TagEnv p -> StgArg -> TagInfo
 lookupInfo env (StgVarArg var)
   -- Nullary data constructors like True, False
   | Just dc <- isDataConWorkId_maybe var
   , isNullaryRepDataCon dc
+  , not for_bytecode
   = TagProper
 
   | isUnliftedType (idType var)
@@ -93,6 +103,7 @@ lookupInfo env (StgVarArg var)
   = info
 
   | Just lf_info <- idLFInfo_maybe var
+  , not for_bytecode
   =   case lf_info of
           -- Function, tagged (with arity)
           LFReEntrant {}
@@ -112,6 +123,8 @@ lookupInfo env (StgVarArg var)
 
   | otherwise
   = TagDunno
+  where
+    for_bytecode = te_bytecode env
 
 lookupInfo _ (StgLitArg {})
   = TagProper


=====================================
compiler/GHC/Stg/Pipeline.hs
=====================================
@@ -50,6 +50,7 @@ data StgPipelineOpts = StgPipelineOpts
   -- ^ Should we lint the STG at various stages of the pipeline?
   , stgPipeline_pprOpts     :: !StgPprOpts
   , stgPlatform             :: !Platform
+  , stgPipeline_forBytecode :: !Bool
   }
 
 newtype StgM a = StgM { _unStgM :: ReaderT Char IO a }
@@ -89,7 +90,7 @@ stg2stg logger extra_vars opts this_mod binds
           -- annotations (which is used by code generator to compute offsets into closures)
         ; let binds_sorted_with_fvs = depSortWithAnnotStgPgm this_mod binds'
         -- See Note [Tag inference for interactive contexts]
-        ; inferTags (stgPipeline_pprOpts opts) logger this_mod binds_sorted_with_fvs
+        ; inferTags (stgPipeline_pprOpts opts) (stgPipeline_forBytecode opts) logger this_mod binds_sorted_with_fvs
    }
 
   where



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/60241bfeb56c85d816a28a22ad6e0b8322e3573c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/60241bfeb56c85d816a28a22ad6e0b8322e3573c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230202/5094ff8a/attachment-0001.html>


More information about the ghc-commits mailing list