[Git][ghc/ghc][wip/andreask/infer-bytecode] Adjust let tag inference for bytecode.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Tue Jan 31 11:26:48 UTC 2023



Andreas Klebinger pushed to branch wip/andreask/infer-bytecode at Glasgow Haskell Compiler / GHC


Commits:
d036785f by Andreas Klebinger at 2023-01-31T12:23:32+01:00
Adjust let tag inference for bytecode.

Infer untagged for x in `let x = <Con>` when targeting bytecode.

Tag inference assumed let bound counstructors as in `let x = True in
...` would result in `x` being a tagged pointer unconditionally.

Sadly this is not true for bytecode where x will point to a BCO object.

Fixes #22840

- - - - -


4 changed files:

- compiler/GHC/Driver/Config/Stg/Pipeline.hs
- compiler/GHC/Stg/InferTags.hs
- compiler/GHC/Stg/InferTags/Types.hs
- compiler/GHC/Stg/Pipeline.hs


Changes:

=====================================
compiler/GHC/Driver/Config/Stg/Pipeline.hs
=====================================
@@ -22,6 +22,7 @@ initStgPipelineOpts dflags for_bytecode = StgPipelineOpts
   , stgPipeline_pprOpts = initStgPprOpts dflags
   , stgPipeline_phases = getStgToDo for_bytecode dflags
   , stgPlatform = targetPlatform dflags
+  , stgPipeline_forBytecode = for_bytecode
   }
 
 -- | Which Stg-to-Stg passes to run. Depends on flags, ways etc.


=====================================
compiler/GHC/Stg/InferTags.hs
=====================================
@@ -204,6 +204,26 @@ a different StgPass! To handle this a large part of the analysis is polymorphic
 over the exact StgPass we are using. Which allows us to run the analysis on
 the output of itself.
 
+Note [Tag inference for interpreted code]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The bytecode interpreter has a different behaviour when it comes
+to the tagging of let binders than the StgToCmm code generator.
+
+When we compile a let binder like
+
+  let x = Just True
+
+Weither or not `x` will be represented by a properly tagged pointer depends on
+the backend in use.
+For the bytecode backend it will result in x pointing to a BCO which once
+evaluated returns a properly tagged pointer to the heap object.
+In the Cmm backend for the same binding we would allocate the constructor right
+away and x will immediately be represented by a tagged pointer.
+This means for interpreted code we can not assume let bound constructors are
+properly tagged. Hence we distinguish between targeting bytecode and native in
+the analysis.
+
+We make this differentiation in `mkLetSig`.
 -}
 
 {- *********************************************************************
@@ -212,20 +232,12 @@ the output of itself.
 *                                                                      *
 ********************************************************************* -}
 
--- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
---           -> CollectedCCs
---           -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs
---           -> HpcInfo
---           -> IO (Stream IO CmmGroupSRTs CmmCgInfos)
---          -- Note we produce a 'Stream' of CmmGroups, so that the
---          -- backend can be run incrementally.  Otherwise it generates all
---          -- the C-- up front, which has a significant space cost.
-inferTags :: StgPprOpts -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
-inferTags ppr_opts logger this_mod stg_binds = do
+inferTags :: StgPprOpts -> Bool -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags ppr_opts !for_bytecode logger this_mod stg_binds = do
 
     -- Annotate binders with tag information.
     let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
-                                        inferTagsAnal stg_binds
+                                        inferTagsAnal for_bytecode stg_binds
     putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings ppr_opts stg_binds_w_tags)
 
     let export_tag_info = collectExportInfo stg_binds_w_tags
@@ -254,10 +266,10 @@ type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders
                     , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders
                     , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders)
 
-inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
-inferTagsAnal binds =
+inferTagsAnal :: Bool -> [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
+inferTagsAnal for_bytecode binds =
   -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
-  snd (mapAccumL inferTagTopBind initEnv binds)
+  snd (mapAccumL inferTagTopBind (initEnv for_bytecode) binds)
 
 -----------------------
 inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen
@@ -423,8 +435,9 @@ inferTagBind in_env (StgNonRec bndr rhs)
     (env', StgNonRec (id, sig) rhs')
   where
     id   = getBinderId in_env bndr
-    env' = extendSigEnv in_env [(id, sig)]
     (sig,rhs') = inferTagRhs id in_env rhs
+    sig' = mkLetSig in_env sig
+    env' = extendSigEnv in_env [(id, sig')]
 
 inferTagBind in_env (StgRec pairs)
   = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $
@@ -443,14 +456,17 @@ inferTagBind in_env (StgRec pairs)
        | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss')
        | otherwise     = go env' out_sigs rhss'
        where
-         out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
          in_bndrs = in_ids `zip` in_sigs
+         out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
          rhs_env = extendSigEnv go_env in_bndrs
          (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss)
          env' = makeTagged go_env
 
          anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders)
-         anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs
+         anaRhs bnd rhs =
+            let (sig',rhs') = inferTagRhs bnd rhs_env rhs
+            in (mkLetSig go_env sig', rhs')
+
 
          updateBndr :: (Id,TagSig) -> (Id,TagSig)
          updateBndr (v,sig) = (setIdTagSig v sig, sig)
@@ -536,6 +552,17 @@ inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args)
   = --pprTrace "inferTagRhsCon" (ppr grp_ids) $
     (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args)
 
+-- Adjust let semantics to the targeted backend.
+-- See Note [Tag inference for interpreted code]
+mkLetSig :: TagEnv p -> TagSig -> TagSig
+mkLetSig env in_sig@(TagSig tag)
+  | for_bytecode = case tag of
+      tuple_tag at TagTuple{} -> TagSig tuple_tag
+      _                    -> TagSig TagDunno
+  | otherwise = in_sig
+  where
+    for_bytecode = te_bytecode env
+
 {- Note [Constructor TagSigs]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 @inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor


=====================================
compiler/GHC/Stg/InferTags/Types.hs
=====================================
@@ -49,24 +49,33 @@ combineAltInfo ti               TagTagged      = ti
 type TagSigEnv = IdEnv TagSig
 data TagEnv p = TE { te_env :: TagSigEnv
                    , te_get :: BinderP p -> Id
+                   , te_bytecode :: !Bool
                    }
 
 instance Outputable (TagEnv p) where
-    ppr te = ppr (te_env te)
+    ppr te = for_bytecode <+> ppr (te_env te)
+        where
+            for_bytecode = if te_bytecode te
+                then
+                    text "for_bytecode"
+                else
+                    text "for_native"
 
 
 getBinderId :: TagEnv p -> BinderP p -> Id
 getBinderId = te_get
 
-initEnv :: TagEnv 'CodeGen
-initEnv = TE { te_env = emptyVarEnv
-             , te_get = \x -> x}
+initEnv :: Bool -> TagEnv 'CodeGen
+initEnv for_bytecode = TE { te_env = emptyVarEnv
+             , te_get = \x -> x
+             , te_bytecode = for_bytecode }
 
 -- | Simple convert env to a env of the 'InferTaggedBinders pass
 -- with no other changes.
 makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders
 makeTagged env = TE { te_env = te_env env
-                    , te_get = fst }
+                    , te_get = fst
+                    , te_bytecode = te_bytecode env }
 
 noSig :: TagEnv p -> BinderP p -> (Id, TagSig)
 noSig env bndr
@@ -83,6 +92,7 @@ lookupInfo env (StgVarArg var)
   -- Nullary data constructors like True, False
   | Just dc <- isDataConWorkId_maybe var
   , isNullaryRepDataCon dc
+  , not for_bytecode
   = TagProper
 
   | isUnliftedType (idType var)
@@ -93,6 +103,7 @@ lookupInfo env (StgVarArg var)
   = info
 
   | Just lf_info <- idLFInfo_maybe var
+  , not for_bytecode
   =   case lf_info of
           -- Function, tagged (with arity)
           LFReEntrant {}
@@ -112,6 +123,8 @@ lookupInfo env (StgVarArg var)
 
   | otherwise
   = TagDunno
+  where
+    for_bytecode = te_bytecode env
 
 lookupInfo _ (StgLitArg {})
   = TagProper


=====================================
compiler/GHC/Stg/Pipeline.hs
=====================================
@@ -50,6 +50,7 @@ data StgPipelineOpts = StgPipelineOpts
   -- ^ Should we lint the STG at various stages of the pipeline?
   , stgPipeline_pprOpts     :: !StgPprOpts
   , stgPlatform             :: !Platform
+  , stgPipeline_forBytecode :: !Bool
   }
 
 newtype StgM a = StgM { _unStgM :: ReaderT Char IO a }
@@ -89,7 +90,7 @@ stg2stg logger extra_vars opts this_mod binds
           -- annotations (which is used by code generator to compute offsets into closures)
         ; let binds_sorted_with_fvs = depSortWithAnnotStgPgm this_mod binds'
         -- See Note [Tag inference for interactive contexts]
-        ; inferTags (stgPipeline_pprOpts opts) logger this_mod binds_sorted_with_fvs
+        ; inferTags (stgPipeline_pprOpts opts) (stgPipeline_forBytecode opts) logger this_mod binds_sorted_with_fvs
    }
 
   where



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d036785f8e657f48387e054de53344eba9c0c24c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d036785f8e657f48387e054de53344eba9c0c24c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230131/c1f7843c/attachment-0001.html>


More information about the ghc-commits mailing list