[Git][ghc/ghc][master] Add entity information to HieFile #24544

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Fri Sep 27 10:10:48 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
1b39363b by Patrick at 2024-09-27T06:10:19-04:00
Add entity information to HieFile #24544

Enhanced HieFile to capture entity information for identifiers, enabling better support for language tools and protocols. See issue #24544 for more details.

Work have been done:
* Introduction of new data type `EntityInfo` in `GHC.Iface.Ext.Types`.
* Add extra field `hie_entity_infos :: NameEntityInfo` to `HieFile`
  to store the mapping from entity name to corresponding entity infos
  in `GHC.Iface.Ext.Types`.
* Compute `EntityInfo` for each entity name in the HieAst from `TyThing,
  Id, OccName` when generating the `HieFile` in `GHC.Iface.Ext.Ast`.
* Add test T24544 to test the generation of `EntityInfo`.

- - - - -


5 changed files:

- compiler/GHC/Iface/Ext/Ast.hs
- compiler/GHC/Iface/Ext/Types.hs
- + testsuite/tests/hiefile/should_run/T24544.hs
- + testsuite/tests/hiefile/should_run/T24544.stdout
- testsuite/tests/hiefile/should_run/all.T


Changes:

=====================================
compiler/GHC/Iface/Ext/Ast.hs
=====================================
@@ -33,7 +33,6 @@ import GHC.Types.Basic
 import GHC.Data.BooleanFormula
 import GHC.Core.Class             ( className, classSCSelIds )
 import GHC.Core.ConLike           ( conLikeName )
-import GHC.Core.TyCon             ( TyCon, tyConClass_maybe )
 import GHC.Core.FVs
 import GHC.Core.DataCon           ( dataConNonlinearType )
 import GHC.Types.FieldLabel
@@ -41,11 +40,12 @@ import GHC.Hs
 import GHC.Hs.Syn.Type
 import GHC.Utils.Monad            ( concatMapM, MonadIO(liftIO) )
 import GHC.Types.Id               ( isDataConId_maybe )
-import GHC.Types.Name             ( Name, nameSrcSpan, nameUnique )
+import GHC.Types.Name             ( Name, nameSrcSpan, nameUnique, wiredInNameTyThing_maybe )
 import GHC.Types.Name.Env         ( NameEnv, emptyNameEnv, extendNameEnv, lookupNameEnv )
 import GHC.Types.Name.Reader      ( RecFieldInfo(..) )
 import GHC.Types.SrcLoc
 import GHC.Core.Type              ( Type )
+import GHC.Core.TyCon             ( TyCon, tyConClass_maybe )
 import GHC.Core.Predicate
 import GHC.Core.InstEnv
 import GHC.Tc.Types
@@ -81,6 +81,8 @@ import Control.Monad.Trans.State.Strict
 import Control.Monad.Trans.Reader
 import Control.Monad.Trans.Class  ( lift )
 import Control.Applicative        ( (<|>) )
+import GHC.Types.TypeEnv          ( TypeEnv )
+import Control.Arrow              ( second )
 
 {- Note [Updating HieAst for changes in the GHC AST]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -229,6 +231,10 @@ data HieState = HieState
   -- These are placed at the top level Node in the HieAST after everything
   -- else has been generated
   -- This includes things like top level evidence bindings.
+  , type_env :: TypeEnv
+  -- tcg_type_env from TcGblEnv contains the type environment for the module
+  , entity_infos :: NameEntityInfo
+  -- ^ Information about entities in the module
   }
 
 addUnlocatedEvBind :: Var -> ContextInfo -> HieM ()
@@ -260,8 +266,20 @@ getUnlocatedEvBinds file = do
 
   pure $ (M.fromList nis, asts)
 
+lookupAndInsertEntityName :: Name -> HieM ()
+lookupAndInsertEntityName name = do
+  m <- lift $ gets type_env
+  let tyThing = lookupNameEnv m name <|> wiredInNameTyThing_maybe name
+  insertEntityInfo name $ maybe (nameEntityInfo name) tyThingEntityInfo tyThing
+
+-- | Insert entity information for an identifier
+insertEntityInfo :: Name -> S.Set EntityInfo -> HieM ()
+insertEntityInfo ident info = do
+  lift $ modify' $ \s ->
+    s { entity_infos = M.insertWith S.union ident info (entity_infos s) }
+
 initState :: HieState
-initState = HieState emptyNameEnv emptyDVarEnv
+initState = HieState emptyNameEnv emptyDVarEnv mempty mempty
 
 class ModifyState a where -- See Note [Name Remapping]
   addSubstitution :: a -> a -> HieState -> HieState
@@ -302,8 +320,9 @@ mkHieFileWithSource src_file src ms ts rs =
   let tc_binds = tcg_binds ts
       top_ev_binds = tcg_ev_binds ts
       insts = tcg_insts ts
+      tte = tcg_type_env ts
       tcs = tcg_tcs ts
-      (asts',arr) = getCompressedAsts tc_binds rs top_ev_binds insts tcs in
+      (asts',arr,entityInfos) = getCompressedAsts tc_binds rs top_ev_binds insts tcs tte in
   HieFile
       { hie_hs_file = src_file
       , hie_module = ms_mod ms
@@ -312,18 +331,20 @@ mkHieFileWithSource src_file src ms ts rs =
       -- mkIfaceExports sorts the AvailInfos for stability
       , hie_exports = mkIfaceExports (tcg_exports ts)
       , hie_hs_src = src
+      , hie_entity_infos = entityInfos
       }
 
-getCompressedAsts :: TypecheckedSource -> RenamedSource -> Bag EvBind -> [ClsInst] -> [TyCon]
-  -> (HieASTs TypeIndex, A.Array TypeIndex HieTypeFlat)
-getCompressedAsts ts rs top_ev_binds insts tcs =
-  let asts = enrichHie ts rs top_ev_binds insts tcs in
-  compressTypes asts
-
-enrichHie :: TypecheckedSource -> RenamedSource -> Bag EvBind -> [ClsInst] -> [TyCon]
-  -> HieASTs Type
-enrichHie ts (hsGrp, imports, exports, docs, modName) ev_bs insts tcs =
-  runIdentity $ flip evalStateT initState $ flip runReaderT SourceInfo $ do
+getCompressedAsts :: TypecheckedSource -> RenamedSource -> Bag EvBind -> [ClsInst] -> [TyCon] -> TypeEnv
+  -> (HieASTs TypeIndex, A.Array TypeIndex HieTypeFlat, NameEntityInfo)
+getCompressedAsts ts rs top_ev_binds insts tcs tte =
+  let (asts, infos) = enrichHie ts rs top_ev_binds insts tcs tte
+      add c (a, b) = (a,b,c)
+  in add infos $ compressTypes asts
+
+enrichHie :: TypecheckedSource -> RenamedSource -> Bag EvBind -> [ClsInst] -> [TyCon] -> TypeEnv
+  -> (HieASTs Type, NameEntityInfo)
+enrichHie ts (hsGrp, imports, exports, docs, modName) ev_bs insts tcs tte =
+  second entity_infos $ runIdentity $ flip runStateT initState{type_env=tte} $ flip runReaderT SourceInfo $ do
     modName <- toHie (IEC Export <$> modName)
     tasts <- toHie $ fmap (BC RegularBind ModuleScope) ts
     rasts <- processGrp hsGrp
@@ -418,6 +439,7 @@ bindingsOnly [] = pure []
 bindingsOnly (C c n : xs) = do
   org <- ask
   rest <- bindingsOnly xs
+  lookupAndInsertEntityName n
   pure $ case nameSrcSpan n of
     RealSrcSpan span _ -> Node (mkSourcedNodeInfo org nodeinfo) span [] : rest
       where nodeinfo = NodeInfo S.empty [] (M.singleton (Right n) info)
@@ -599,7 +621,7 @@ instance (ToHie a) => ToHie (Maybe a) where
 instance ToHie (IEContext (LocatedA ModuleName)) where
   toHie (IEC c (L (EpAnn (EpaSpan (RealSrcSpan span _)) _ _) mname)) = do
       org <- ask
-      pure $ [Node (mkSourcedNodeInfo org $ NodeInfo S.empty [] idents) span []]
+      pure [Node (mkSourcedNodeInfo org $ NodeInfo S.empty [] idents) span []]
     where details = mempty{identInfo = S.singleton (IEThing c)}
           idents = M.singleton (Left mname) details
   toHie _ = pure []
@@ -624,6 +646,9 @@ instance ToHie (Context (Located Var)) where
               ty = case isDataConId_maybe name' of
                       Nothing -> varType name'
                       Just dc -> dataConNonlinearType dc
+          -- insert the entity info for the name into the entity_infos map
+          insertEntityInfo (varName name) $ idEntityInfo name
+          insertEntityInfo (varName name') $ idEntityInfo name'
           pure
             [Node
               (mkSourcedNodeInfo org $ NodeInfo S.empty [] $
@@ -648,6 +673,9 @@ instance ToHie (Context (Located Name)) where
           let name = case lookupNameEnv m name' of
                 Just var -> varName var
                 Nothing -> name'
+          -- insert the entity info for the name into the entity_infos map
+          lookupAndInsertEntityName name
+          lookupAndInsertEntityName name'
           pure
             [Node
               (mkSourcedNodeInfo org $ NodeInfo S.empty [] $


=====================================
compiler/GHC/Iface/Ext/Types.hs
=====================================
@@ -29,6 +29,13 @@ import GHC.Types.Avail
 import GHC.Types.Unique
 import qualified GHC.Utils.Outputable as O ( (<>) )
 import GHC.Utils.Panic
+import GHC.Core.ConLike           ( ConLike(..) )
+import GHC.Core.TyCo.Rep          ( Type(..) )
+import GHC.Core.Type              ( coreFullView, isFunTy, Var (..) )
+import GHC.Core.TyCon             ( isTypeSynonymTyCon, isClassTyCon, isFamilyTyCon )
+import GHC.Types.Id               ( Id, isRecordSelector, isClassOpId )
+import GHC.Types.TyThing          ( TyThing (..) )
+import GHC.Types.Var              ( isTyVar, isFUNArg )
 
 import qualified Data.Array as A
 import qualified Data.Map as M
@@ -84,7 +91,17 @@ data HieFile = HieFile
 
     , hie_hs_src :: ByteString
     -- ^ Raw bytes of the initial Haskell source
+
+    , hie_entity_infos :: NameEntityInfo
+    -- ^ Entity information for each `Name` in the `hie_asts`
     }
+
+type NameEntityInfo = M.Map Name (S.Set EntityInfo)
+
+instance Binary NameEntityInfo where
+  put_ bh m = put_ bh $ M.toList m
+  get bh = fmap M.fromList (get bh)
+
 instance Binary HieFile where
   put_ bh hf = do
     put_ bh $ hie_hs_file hf
@@ -93,6 +110,7 @@ instance Binary HieFile where
     put_ bh $ hie_asts hf
     put_ bh $ hie_exports hf
     put_ bh $ hie_hs_src hf
+    put_ bh $ hie_entity_infos hf
 
   get bh = HieFile
     <$> get bh
@@ -101,6 +119,7 @@ instance Binary HieFile where
     <*> get bh
     <*> get bh
     <*> get bh
+    <*> get bh
 
 
 {-
@@ -783,3 +802,84 @@ toHieName name
                                        (nameOccName name)
                                        (removeBufSpan $ nameSrcSpan name)
   | otherwise = LocalName (nameOccName name) (removeBufSpan $ nameSrcSpan name)
+
+
+{- Note [Capture Entity Information]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We need to capture the entity information for the identifier in HieAst, so that
+language tools and protocols can take advantage making use of it.
+
+Capture `EntityInfo` for a `Name` or `Id` in `renamedSource` or `typecheckedSource`
+if it is a name, we ask the env for the `TyThing` then compute the `EntityInfo` from tyThing
+if it is an Id, we compute the `EntityInfo` directly from Id
+
+see issue #24544 for more details
+-}
+
+
+-- | Entity information
+-- `EntityInfo` is a simplified version of `TyThing` and richer version than `Namespace` in `OccName`.
+-- It state the kind of the entity, such as `Variable`, `TypeVariable`, `DataConstructor`, etc..
+data EntityInfo
+  = EntityVariable
+  | EntityFunction
+  | EntityDataConstructor
+  | EntityTypeVariable
+  | EntityClassMethod
+  | EntityPatternSynonym
+  | EntityTypeConstructor
+  | EntityTypeClass
+  | EntityTypeSynonym
+  | EntityTypeFamily
+  | EntityRecordField
+  deriving (Eq, Ord, Enum, Show)
+
+
+instance Outputable EntityInfo where
+  ppr EntityVariable = text "variable"
+  ppr EntityFunction = text "function"
+  ppr EntityDataConstructor = text "data constructor"
+  ppr EntityTypeVariable = text "type variable"
+  ppr EntityClassMethod = text "class method"
+  ppr EntityPatternSynonym = text "pattern synonym"
+  ppr EntityTypeConstructor = text "type constructor"
+  ppr EntityTypeClass = text "type class"
+  ppr EntityTypeSynonym = text "type synonym"
+  ppr EntityTypeFamily = text "type family"
+  ppr EntityRecordField = text "record field"
+
+
+instance Binary EntityInfo where
+  put_ bh b = putByte bh (fromIntegral (fromEnum b))
+  get bh = do x <- getByte bh; pure $! toEnum (fromIntegral x)
+
+
+-- | Get the `EntityInfo` for an `Id`
+idEntityInfo :: Id -> S.Set EntityInfo
+idEntityInfo vid = S.fromList $ [EntityTypeVariable | isTyVar vid] <> [EntityFunction | isFunType $ varType vid]
+  <> [EntityRecordField | isRecordSelector vid] <> [EntityClassMethod | isClassOpId vid] <> [EntityVariable]
+  where
+    isFunType a = case coreFullView a of
+      ForAllTy _ t    -> isFunType t
+      FunTy { ft_af = flg, ft_res = rhs } -> isFUNArg flg || isFunType rhs
+      _x              -> isFunTy a
+
+-- | Get the `EntityInfo` for a `TyThing`
+tyThingEntityInfo :: TyThing -> S.Set EntityInfo
+tyThingEntityInfo ty = case ty of
+  AnId vid -> idEntityInfo vid
+  AConLike con -> case con of
+    RealDataCon _ -> S.singleton EntityDataConstructor
+    PatSynCon _   -> S.singleton EntityPatternSynonym
+  ATyCon tyCon -> S.fromList $ [EntityTypeSynonym | isTypeSynonymTyCon tyCon] <> [EntityTypeFamily | isFamilyTyCon tyCon]
+                  <> [EntityTypeClass | isClassTyCon tyCon] <> [EntityTypeConstructor]
+  ACoAxiom _ -> S.empty
+
+nameEntityInfo :: Name -> S.Set EntityInfo
+nameEntityInfo name
+  | isTyVarName name = S.fromList [EntityVariable, EntityTypeVariable]
+  | isDataConName name = S.singleton EntityDataConstructor
+  | isTcClsNameSpace (occNameSpace $ occName name) = S.singleton EntityTypeConstructor
+  | isFieldName name = S.fromList [EntityVariable, EntityRecordField]
+  | isVarName name = S.fromList [EntityVariable]
+  | otherwise = S.empty


=====================================
testsuite/tests/hiefile/should_run/T24544.hs
=====================================
@@ -0,0 +1,54 @@
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE TypeFamilies #-}
+module Main where
+
+import TestUtils
+import qualified Data.Map.Strict as M
+import qualified Data.Set as S
+import Data.Either
+import Data.Maybe
+import Data.Bifunctor (first)
+import GHC.Plugins (moduleNameString, nameStableString, nameOccName, occNameString, isDerivedOccName)
+import GHC.Iface.Ext.Types
+-- data EntityInfo
+--   =
+--   EntityVariable
+boo = 1
+--   | EntityFunction
+foo :: a -> a
+--   | EntityTypeVariable
+foo a = a
+--   | EntityTypeConstructor
+data DataFoo
+--   | EntityDataConstructor
+   = DFoo
+--   | EntityTypeClass
+class ClassBoo a where
+--   | EntityClassMethod
+  cboo :: a
+--   | EntityPatternSynonym
+pattern PatternFoo = 1
+--   | EntityTypeFamily
+type family FamilyFoo
+data family DataFamilyBar
+--   | EntityTypeSynonym
+type TypeSynonymFoo = Int
+--   | EntityRecordField
+data RecordFoo = RecordFoo { recordFoo :: Int }
+
+points :: [(Int,Int)]
+points = [(16,1), (18,9), (20,1), (22,6), (24,6), (26,7), (28,2), (30,9), (32,13), (33,13), (35,6), (37,30)]
+
+getIdentifierEntityInfo :: HieFile -> Identifier -> S.Set EntityInfo
+getIdentifierEntityInfo hf (Right ident) = M.findWithDefault S.empty ident (hie_entity_infos hf)
+getIdentifierEntityInfo hf (Left _) = S.empty
+
+isNotDerived :: (Identifier, a) -> Bool
+isNotDerived ((Right name), _) = not $ isDerivedOccName (nameOccName name)
+isNotDerived ((Left _), _) = True
+main = do
+  (df, hf) <- readTestHie "T24544.hie"
+  let asts = fmap (fromMaybe (error "nothing") . selectPoint hf) points
+      idents = concatMap (M.toList . sourcedNodeIdents . sourcedNodeInfo) asts
+      names = map (\(x, _) -> (either moduleNameString  (occNameString . nameOccName) x, getIdentifierEntityInfo hf x)) $ filter isNotDerived idents
+  mapM_ (print) names


=====================================
testsuite/tests/hiefile/should_run/T24544.stdout
=====================================
@@ -0,0 +1,12 @@
+("boo",fromList [EntityVariable])
+("a",fromList [EntityVariable,EntityTypeVariable])
+("foo",fromList [EntityVariable,EntityFunction])
+("DataFoo",fromList [EntityTypeConstructor])
+("DFoo",fromList [EntityDataConstructor])
+("ClassBoo",fromList [EntityTypeConstructor,EntityTypeClass])
+("PatternFoo",fromList [EntityPatternSynonym])
+("FamilyFoo",fromList [EntityTypeConstructor,EntityTypeFamily])
+("DataFamilyBar",fromList [EntityTypeConstructor,EntityTypeFamily])
+("TypeSynonymFoo",fromList [EntityTypeConstructor,EntityTypeSynonym])
+("RecordFoo",fromList [EntityDataConstructor])
+("recordFoo",fromList [EntityVariable,EntityFunction,EntityRecordField])
\ No newline at end of file


=====================================
testsuite/tests/hiefile/should_run/all.T
=====================================
@@ -7,3 +7,4 @@ test('SpliceTypes', [req_th, extra_run_opts('"' + config.libdir + '"'), extra_fi
 test('HieVdq', [extra_run_opts('"' + config.libdir + '"'), extra_files(['TestUtils.hs'])], compile_and_run, ['-package ghc -fwrite-ide-info'])
 test('T23540', [extra_run_opts('"' + config.libdir + '"'), extra_files(['TestUtils.hs'])], compile_and_run, ['-package ghc -fwrite-ide-info'])
 test('T23120', [extra_run_opts('"' + config.libdir + '"'), extra_files(['TestUtils.hs'])], compile_and_run, ['-package ghc -fwrite-ide-info'])
+test('T24544', [extra_run_opts('"' + config.libdir + '"'), extra_files(['TestUtils.hs'])], compile_and_run, ['-package ghc -fwrite-ide-info'])
\ No newline at end of file



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1b39363bd673314df76b74f5f9c65af6fe84f1c8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1b39363bd673314df76b74f5f9c65af6fe84f1c8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240927/9ebb518e/attachment-0001.html>


More information about the ghc-commits mailing list