[Git][ghc/ghc][wip/mpickering/get-link-deps] 4 commits: Doc and clean up ExternalModuleGraph

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Fri Dec 20 16:48:12 UTC 2024



Rodrigo Mesquita pushed to branch wip/mpickering/get-link-deps at Glasgow Haskell Compiler / GHC


Commits:
c7d794e9 by Rodrigo Mesquita at 2024-12-19T17:29:59+00:00
Doc and clean up ExternalModuleGraph

- - - - -
7f2a5f25 by Rodrigo Mesquita at 2024-12-19T18:12:05+00:00
Add package nodes and reachability to EPS module graph

- - - - -
5211b644 by Rodrigo Mesquita at 2024-12-20T15:28:12+00:00
Improve code driving getLinkDeps

- - - - -
2ddafd26 by Rodrigo Mesquita at 2024-12-20T16:47:59+00:00
Finish loadHomePackageInterfacesBelow, delete follow_deps

- - - - -


5 changed files:

- compiler/GHC/Iface/Load.hs
- compiler/GHC/Linker/Deps.hs
- compiler/GHC/Linker/Loader.hs
- compiler/GHC/Unit/Module/External/Graph.hs
- compiler/GHC/Unit/Module/Graph.hs


Changes:

=====================================
compiler/GHC/Iface/Load.hs
=====================================
@@ -25,6 +25,7 @@ module GHC.Iface.Load (
         -- IfM functions
         loadInterface,
         loadSysInterface, loadUserInterface, loadPluginInterface,
+        loadHomePackageInterfacesBelow,
         findAndReadIface, readIface, writeIface,
         flagsToIfCompression,
         moduleFreeHolesPrecise,
@@ -410,20 +411,65 @@ loadInterfaceWithException doc mod_name where_from
     let ctx = initSDocContext dflags defaultUserStyle
     withIfaceErr ctx (loadInterface doc mod_name where_from)
 
--- | Load all interfaces from the home package, presumes that this operation
--- can be completed by traversing from the already loaded home packages.
+-- | Load all interfaces from the home package that are transitively reachable
+-- from the given modules -- presumes that this operation can be completed by
+-- traversing from the already loaded home packages.
+--
+-- This operation is used just before TH splices are run (in 'getLinkDeps').
+--
+-- TODO: SHOULD WE ASSERT THIS IS ONLY CALLED ON ONESHOT MODE? WE SHOULD NEVER
+-- WANT TO LOAD HOME MODULE PACKAGES INTO THE EPS ANY OTHER WAY.
+--
+-- The first time this is run...??
+--
+-- A field in the EPS tracks which home modules are already fully loaded, which we use
+-- here to avoid trying to load them a second time.
+--
+-- For convenience, reads the module graph out of the EPS after having loaded
+-- all the modules and returns it. It would be harder to get the updated module
+-- graph in 'getLinkDeps' another way.
+loadHomePackageInterfacesBelow :: (Module -> SDoc) -> Maybe HomeUnit {-^ The current home unit -}
+                               -> [Module] -> IfM lcl ()
+loadHomePackageInterfacesBelow _ Nothing _ = error "No home unit, what to do?"
+loadHomePackageInterfacesBelow msg (Just home_unit) mods = do
+  dflags <- getDynFlags
+  let ctx = initSDocContext dflags defaultUserStyle
+
+  forM_ mods $ \mod -> do
+
+    graph <- eps_module_graph <$> getEps
+    let key = ExternalModuleKey $ ModNodeKeyWithUid (GWIB (moduleName mod) NotBoot) (moduleUnitId mod)
+
+    if isFullyLoadedModule key graph
+      then return ()
+      else do
+        iface <- withIfaceErr ctx $
+          loadInterface (msg mod) mod (ImportByUser NotBoot)
+
+        -- RM:TODO: THINGS WE ARE NOT DOING
+        --
+        -- The ModIface contains the transitive closure of the module dependencies
+        -- within the current package, *except* for boot modules: if we encounter
+        -- a boot module, we have to find its real interface and discover the
+        -- dependencies of that.  Hence we need to traverse the dependency
+        -- tree recursively.  See bug #936, testcase ghci/prog007.
+
+        -- RM:TODO: WHAT WAS THIS DOING BEFORE IN FOLLOW_DEPS?
+        -- (was in follow_deps)
+        -- when (mi_boot iface == IsBoot) $ link_boot_mod_error mod
+
+        let deps = mi_deps iface
+            mod_deps = dep_direct_mods deps
 
--- This operation is used just before TH splices are run.
+        -- Load all direct dependencies that are in the home package
+        loadHomePackageInterfacesBelow msg (Just home_unit)
+          $ map (\(uid, GWIB mn _) -> mkModule (RealUnit (Definite uid)) mn)
+          $ filter ((==) (homeUnitId home_unit) . fst)
+          $ Set.toList mod_deps
 
--- The first time this is run
--- A field in the EPS tracks which home modules are fully loaded
-_loadHomePackageInterfacesBelow :: ModNodeKeyWithUid -> IfM lcl ()
-_loadHomePackageInterfacesBelow mn = do
-  graph <- eps_module_graph <$> getEps
-  let key = ExternalModuleKey mn
-  if isFullyLoadedModule key graph
-    then return ()
-    else return ()
+        -- Update the external graph with this module being fully loaded.
+        updateEps_ $ \eps ->
+          eps{eps_module_graph = setFullyLoadedModule key (eps_module_graph eps)}
 
 ------------------
 loadInterface :: SDoc -> Module -> WhereFrom
@@ -534,10 +580,12 @@ loadInterface doc_str mod from
         ; purged_hsc_env <- getTopEnv
 
         ; let direct_deps = map (uncurry (flip ModNodeKeyWithUid)) $ (Set.toList (dep_direct_mods $ mi_deps iface))
-        ; let !module_graph_key =
-                if moduleUnitId mod `elem` hsc_all_home_unit_ids hsc_env
+        ; let !module_graph_key = pprTrace "module_graph_on_load" (ppr (eps_module_graph eps)) $
+                if moduleUnitId mod `elem` hsc_all_home_unit_ids hsc_env -- can only happen in oneshot mode
                   then Just $ NodeHomePackage (miKey iface) (map ExternalModuleKey direct_deps)
                   else Nothing
+        -- ; let !module_graph_external_pkgs_nods = _
+                -- ROMES:TODO: Fairly sure we need to insert package nodes somewhere here.
 
         ; let final_iface = iface
                                & set_mi_decls     (panic "No mi_decls in PIT")


=====================================
compiler/GHC/Linker/Deps.hs
=====================================
@@ -36,25 +36,25 @@ import GHC.Unit.Finder
 import GHC.Unit.Module
 import GHC.Unit.Module.ModIface
 import GHC.Unit.Module.WholeCoreBindings
-import GHC.Unit.Module.Deps
 import GHC.Unit.Module.Graph
+import GHC.Unit.Module.External.Graph
 import GHC.Unit.Home.ModInfo
 
 import GHC.Iface.Errors.Types
-import GHC.Iface.Errors.Ppr
 
 import GHC.Utils.Misc
 import GHC.Unit.Home
 import GHC.Data.Maybe
 
-import Control.Monad
 import Control.Applicative
 
-import qualified Data.Set as Set
 import Data.List (isSuffixOf)
 
 import System.FilePath
 import System.Directory
+import GHC.Utils.Monad (mapMaybeM)
+import Data.Either (partitionEithers)
+import Data.Bifunctor (Bifunctor(..))
 
 data LinkDepsOpts = LinkDepsOpts
   { ldObjSuffix   :: !String                        -- ^ Suffix of .o files
@@ -68,8 +68,9 @@ data LinkDepsOpts = LinkDepsOpts
   , ldWays        :: !Ways                          -- ^ Enabled ways
   , ldFinderCache :: !FinderCache
   , ldFinderOpts  :: !FinderOpts
-  , ldLoadIface   :: !(SDoc -> Module -> IO (MaybeErr MissingInterfaceError ModIface))
   , ldLoadByteCode :: !(Module -> IO (Maybe Linkable))
+  , ldLoadHomeIfacesBelow :: !((Module -> SDoc) -> Maybe HomeUnit {- current home unit -}
+                                -> [Module] -> IO ExternalModuleGraph)
   }
 
 data LinkDeps = LinkDeps
@@ -102,7 +103,6 @@ getLinkDeps opts interp pls span mods = do
 
       get_link_deps opts pls maybe_normal_osuf span mods
 
-
 get_link_deps
   :: LinkDepsOpts
   -> LoaderState
@@ -111,31 +111,26 @@ get_link_deps
   -> [Module]
   -> IO LinkDeps
 get_link_deps opts pls maybe_normal_osuf span mods = do
-        -- 1.  Find the dependent home-pkg-modules/packages from each iface
+
+      -- Three step process:
+
+        -- 1. Find the dependent home-pkg-modules/packages from each iface
         -- (omitting modules from the interactive package, which is already linked)
-      (mods_s, pkgs_s) <-
-          -- Why two code paths here? There is a significant amount of repeated work
-          -- performed calculating transitive dependencies
-          -- if --make uses the oneShot code path (see MultiLayerModulesTH_* tests)
-          if ldOneShotMode opts
-            then follow_deps (filterOut isInteractiveModule mods)
-                              emptyUniqDSet emptyUniqDSet;
-            else do
-              mmods <- mapM get_mod_info all_home_mods
-              return (catMaybes mmods, mkUniqDSet all_dep_pkgs)
+      (all_home_mods, pkgs_s) <- get_reachable_nodes opts relevant_mods
+      mods_s <- mapMaybeM get_mod_info all_home_mods
 
       let
         -- 2.  Exclude ones already linked
         --      Main reason: avoid findModule calls in get_linkable
-            (mods_needed, links_got) = partitionWith split_mods mods_s
-            pkgs_needed = eltsUDFM $ getUniqDSet pkgs_s `minusUDFM` pkgs_loaded pls
+        (mods_needed, links_got) = partitionWith split_mods mods_s
+        pkgs_needed = eltsUDFM $ getUniqDSet pkgs_s `minusUDFM` pkgs_loaded pls
 
-            split_mods mod =
-                let is_linked = lookupModuleEnv (objs_loaded pls) mod
-                                <|> lookupModuleEnv (bcos_loaded pls) mod
-                in case is_linked of
-                     Just linkable -> Right linkable
-                     Nothing -> Left mod
+        split_mods mod =
+            let is_linked = lookupModuleEnv (objs_loaded pls) mod
+                            <|> lookupModuleEnv (bcos_loaded pls) mod
+            in case is_linked of
+                 Just linkable -> Right linkable
+                 Nothing -> Left mod
 
         -- 3.  For each dependent module, find its linkable
         --     This will either be in the HPT or (in the case of one-shot
@@ -149,20 +144,9 @@ get_link_deps opts pls maybe_normal_osuf span mods = do
         , ldNeededUnits     = pkgs_s
         }
   where
-    mod_graph = ldModuleGraph opts
     unit_env  = ldUnitEnv     opts
 
-    mkNk m
-       = let k = NodeKey_Module (ModNodeKeyWithUid (GWIB (moduleName m) NotBoot) (moduleUnitId m))
-         in if mgMember mod_graph k
-              then k
-              else NodeKey_ExternalUnit (moduleUnitId m)
-
-    initial_keys = map mkNk (filterOut isInteractiveModule mods)
-    all_deps = initial_keys ++ map mkNodeKey (mgReachableLoop mod_graph initial_keys)
-
-    all_home_mods = [with_uid | NodeKey_Module with_uid <- all_deps]
-    all_dep_pkgs = [uid | NodeKey_ExternalUnit uid <- all_deps]
+    relevant_mods = filterOut isInteractiveModule mods
 
     get_mod_info (ModNodeKeyWithUid gwib uid) =
       case lookupHug (ue_home_unit_graph unit_env) uid (gwib_mod gwib) of
@@ -174,59 +158,6 @@ get_link_deps opts pls maybe_normal_osuf span mods = do
         Nothing -> throwProgramError opts $
           text "getLinkDeps: Home module not loaded" <+> ppr (gwib_mod gwib) <+> ppr uid
 
-
-       -- This code is used in one-shot mode to traverse downwards through the HPT
-       -- to find all link dependencies.
-       -- The ModIface contains the transitive closure of the module dependencies
-       -- within the current package, *except* for boot modules: if we encounter
-       -- a boot module, we have to find its real interface and discover the
-       -- dependencies of that.  Hence we need to traverse the dependency
-       -- tree recursively.  See bug #936, testcase ghci/prog007.
-    follow_deps :: [Module]             -- modules to follow
-                -> UniqDSet Module         -- accum. module dependencies
-                -> UniqDSet UnitId          -- accum. package dependencies
-                -> IO ([Module], UniqDSet UnitId) -- result
-    follow_deps []     acc_mods acc_pkgs
-        = return (uniqDSetToList acc_mods, acc_pkgs)
-    follow_deps (mod:mods) acc_mods acc_pkgs
-        = do
-          mb_iface <- ldLoadIface opts msg mod
-          iface <- case mb_iface of
-                    Failed err      -> throwProgramError opts $
-                      missingInterfaceErrorDiagnostic (ldMsgOpts opts) err
-                    Succeeded iface -> return iface
-
-          when (mi_boot iface == IsBoot) $ link_boot_mod_error mod
-
-          let
-            pkg = moduleUnit mod
-            deps  = mi_deps iface
-
-            pkg_deps = dep_direct_pkgs deps
-            (boot_deps, mod_deps) = flip partitionWith (Set.toList (dep_direct_mods deps)) $
-              \case
-                (_, GWIB m IsBoot)  -> Left m
-                (_, GWIB m NotBoot) -> Right m
-
-            mod_deps' = case ue_homeUnit unit_env of
-                          Nothing -> []
-                          Just home_unit -> filter (not . (`elementOfUniqDSet` acc_mods)) (map (mkHomeModule home_unit) $ (boot_deps ++ mod_deps))
-            acc_mods'  = case ue_homeUnit unit_env of
-                          Nothing -> acc_mods
-                          Just home_unit -> addListToUniqDSet acc_mods (mod : map (mkHomeModule home_unit) mod_deps)
-            acc_pkgs'  = addListToUniqDSet acc_pkgs (Set.toList pkg_deps)
-
-          case ue_homeUnit unit_env of
-            Just home_unit | isHomeUnit home_unit pkg ->  follow_deps (mod_deps' ++ mods)
-                                                                      acc_mods' acc_pkgs'
-            _ ->  follow_deps mods acc_mods (addOneToUniqDSet acc_pkgs' (toUnitId pkg))
-        where
-           msg = text "need to link module" <+> ppr mod <+>
-                  text "due to use of Template Haskell"
-
-
-
-    link_boot_mod_error :: Module -> IO a
     link_boot_mod_error mod = throwProgramError opts $
             text "module" <+> ppr mod <+>
             text "cannot be linked; it is only available as a boot module"
@@ -299,6 +230,56 @@ get_link_deps opts pls maybe_normal_osuf span mods = do
               CoreBindings WholeCoreBindings {wcb_module} ->
                 pprPanic "Unhydrated core bindings" (ppr wcb_module)
 
+-- See Note [Reachability in One-shot mode vs Make mode]
+get_reachable_nodes :: LinkDepsOpts -> [Module] -> IO ([ModNodeKeyWithUid], UniqDSet UnitId)
+get_reachable_nodes opts mods
+
+  -- Reachability on 'ExternalModuleGraph' (for one shot mode)
+  | ldOneShotMode opts
+  = do
+    emg <- ldLoadHomeIfacesBelow opts msg (ue_homeUnit (ldUnitEnv opts)) mods
+    go (ExternalModuleKey . mkModuleNk) emgNodeKey (emgReachableMany emg) (map emgProject)
+    --romes:todo:^ make sure we only get non-boot files out of this. perhaps as
+    --easy as filtering them out by ModNodeKeyWithUid with is boot information.
+
+  -- Reachability on 'ModuleGraph' (for --make mode)
+  | otherwise
+  = go hmgModKey mkNodeKey (mgReachableLoop hmGraph) (catMaybes . map hmgProject)
+
+  where
+    mkModuleNk m = ModNodeKeyWithUid (GWIB (moduleName m) NotBoot) (moduleUnitId m)
+    msg mod =
+      text "need to link module" <+> ppr mod <+>
+        text "and the modules below it, due to use of Template Haskell"
+
+    hmGraph = ldModuleGraph opts
+
+    hmgModKey m
+      | let k = NodeKey_Module (mkModuleNk m)
+      , mgMember hmGraph k = k
+      | otherwise = NodeKey_ExternalUnit (moduleUnitId m)
+
+    hmgProject = \case
+      NodeKey_Module with_uid  -> Just $ Left  with_uid
+      NodeKey_ExternalUnit uid -> Just $ Right uid
+      _                        -> Nothing
+
+    emgProject = \case
+      ExternalModuleKey with_uid -> Left  with_uid
+      ExternalPackageKey uid     -> Right uid
+
+    -- The main driver for getting dependencies, which calls the given
+    -- functions to compute the reachable nodes.
+    go :: (Module -> key)
+       -> (node -> key)
+       -> ([key] -> [node])
+       -> ([key] -> [Either ModNodeKeyWithUid UnitId])
+       -> IO ([ModNodeKeyWithUid], UniqDSet UnitId)
+    go modKey nodeKey manyReachable project
+      | let mod_keys = map modKey mods
+      = pure $ second mkUniqDSet $ partitionEithers $ project $
+          mod_keys ++ map nodeKey (manyReachable mod_keys)
+
 {-
 Note [Using Byte Code rather than Object Code for Template Haskell]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -322,6 +303,30 @@ The only other place where the flag is consulted is when enabling code generatio
 with `-fno-code`, which does so to anticipate what decision we will make at the
 splice point about what we would prefer.
 
+Note [Reachability in One-shot mode vs Make mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Why are there two code paths in `get_reachable_nodes`? (ldOneShotMode vs otherwise)
+
+In one-shot mode, the home package modules are loaded into the EPS,
+whereas for --make mode, the home package modules are in the HUG/HPT.
+
+For both of these cases, we cache the calculation of transitive
+dependencies in a 'ModuleGraph'. For the --make case, the relevant
+'ModuleGraph' is in the EPS, the other case uses the 'ModuleGraph'
+for the home modules.
+
+The home modules graph is known statically after downsweep.
+On the contrary, the EPS module graph is only extended when a
+module is loaded into the EPS -- which is done lazily as needed.
+Therefore, for get_link_deps, we need to force the transitive
+closure to be loaded before querying the graph for the reachable
+link dependencies -- done in the call to 'ldLoadHomeIfacesBelow'.
+Because we cache the transitive closure, this work is only done once.
+
+After forcing the modules with the call to 'ldLoadHomeIfacesBelow' in
+'get_reachable_nodes', the external module graph has all edges needed to
+compute the full transitive closure so we can proceed just like we do in the
+second path with a normal module graph.
 -}
 
 dieWith :: LinkDepsOpts -> SrcSpan -> SDoc -> IO a


=====================================
compiler/GHC/Linker/Loader.hs
=====================================
@@ -76,7 +76,7 @@ import GHC.Utils.Logger
 import GHC.Utils.TmpFs
 
 import GHC.Unit.Env
-import GHC.Unit.External (ExternalPackageState (EPS, eps_iface_bytecode))
+import GHC.Unit.External (ExternalPackageState (EPS, eps_iface_bytecode, eps_module_graph))
 import GHC.Unit.Module
 import GHC.Unit.State as Packages
 
@@ -603,12 +603,16 @@ initLinkDepsOpts hsc_env = opts
             , ldUseByteCode = gopt Opt_UseBytecodeRatherThanObjects dflags
             , ldMsgOpts     = initIfaceMessageOpts dflags
             , ldWays        = ways dflags
-            , ldLoadIface
+            , ldLoadHomeIfacesBelow
             , ldLoadByteCode
             }
     dflags = hsc_dflags hsc_env
-    ldLoadIface msg mod = initIfaceCheck (text "loader") hsc_env
-                          $ loadInterface msg mod (ImportByUser NotBoot)
+    ldLoadHomeIfacesBelow msg hu mods
+      = do
+        initIfaceCheck (text "loader") hsc_env
+          $ loadHomePackageInterfacesBelow msg hu mods
+        -- Read the module graph only after `loadHomePackageInterfacesBelow`
+        eps_module_graph <$> hscEPS hsc_env
 
     ldLoadByteCode mod = do
       EPS {eps_iface_bytecode} <- hscEPS hsc_env


=====================================
compiler/GHC/Unit/Module/External/Graph.hs
=====================================
@@ -1,6 +1,57 @@
--- | Like GHC.Unit.Module.Graph but for the ExternalModuleGraph which
---   is stored in the EPS.
-module GHC.Unit.Module.External.Graph where
+{-# LANGUAGE LambdaCase #-}
+
+-- | Like @'GHC.Unit.Module.Graph'@ but for the @'ExternalModuleGraph'@ which is
+-- stored in the EPS.
+module GHC.Unit.Module.External.Graph
+  ( -- * External Module Graph
+    --
+    -- | A module graph for the EPS.
+    ExternalModuleGraph, ExternalGraphNode(..)
+  , ExternalKey(..), emptyExternalModuleGraph
+  , emgNodeKey, emgNodeDeps
+
+    -- * Extending
+    --
+    -- | The @'ExternalModuleGraph'@ is a structure which is incrementally
+    -- updated as the 'ExternalPackageState' (EPS) is updated (when an iface is
+    -- loaded, in 'loadInterface').
+    --
+    -- Therefore, there is an operation for extending the 'ExternalModuleGraph',
+    -- unlike @'GHC.Unit.Module.Graph.ModuleGraph'@, which is constructed once
+    -- during downsweep and never altered (since all of the home units
+    -- dependencies are fully known then).
+  , extendExternalModuleGraph
+
+    -- * Loading
+    --
+    -- | As mentioned in the top-level haddocks for the
+    -- 'extendExternalModuleGraph', the external module graph is incrementally
+    -- updated as interfaces are loaded. This module graph keeps an additional
+    -- cache registering which modules have already been fully loaded.
+    --
+    -- This cache is necessary to quickly check when a full-transitive-closure
+    -- reachability query would be valid for some module.
+    --
+    -- Such a query may be invalid if ran on a module in the
+    -- 'ExternalModuleGraph' whose dependencies have /not yet/ been fully loaded
+    -- into the EPS.
+    -- (Recall that interfaces are lazily loaded, and the 'ExternalModuleGraph'
+    -- is only incrementally updated).
+    --
+    -- To guarantee the full transitive closure of a given module is completely
+    -- loaded into the EPS (i.e. all interfaces of the modules below this one
+    -- are also loaded), see @'loadHomePackageInterfacesBelow'@ in
+    -- 'GHC.Iface.Load'.
+  , isFullyLoadedModule
+  , setFullyLoadedModule
+
+    -- * Reachability
+    --
+    -- | Fast reachability queries on the external module graph. Similar to
+    -- reachability queries on 'GHC.Unit.Module.Graph'.
+  , emgReachable
+  , emgReachableMany
+  ) where
 
 import GHC.Prelude
 import GHC.Unit.Module.Graph
@@ -9,36 +60,48 @@ import GHC.Data.Graph.Directed
 import qualified Data.Map as M
 import qualified Data.Set as S
 import Data.Maybe
+import GHC.Utils.Outputable
+import GHC.Unit.Types (UnitId)
 
-data ExternalKey = ExternalModuleKey ModNodeKeyWithUid deriving (Eq, Ord)
+--------------------------------------------------------------------------------
+-- * Main
+--------------------------------------------------------------------------------
 
-data ExternalGraphNode = NodeHomePackage {
-                              externalNodeKey :: ModNodeKeyWithUid
-                             , externalNodeDeps :: [ExternalKey] }
-
-externalKey :: ExternalGraphNode -> ExternalKey
-externalKey (NodeHomePackage k _) = ExternalModuleKey k
+data ExternalModuleGraph = ExternalModuleGraph
+  { external_nodes :: [ExternalGraphNode]
+  , external_trans :: (ReachabilityIndex ExternalNode, ExternalKey -> Maybe ExternalNode)
+  , external_fully_loaded :: !(S.Set ExternalKey) }
 
 type ExternalNode = Node Int ExternalGraphNode
 
-data ExternalModuleGraph = ExternalModuleGraph
-                              { external_nodes :: [ExternalGraphNode]
-                              , external_trans :: (ReachabilityIndex ExternalNode, ExternalKey -> Maybe ExternalNode)
-                              , external_fully_loaded :: !(S.Set ExternalKey) }
+data ExternalGraphNode
+  -- | A node for a home package module that is inserted in the EPS.
+  --
+  -- INVARIANT: This type of node can only ever exist if compiling in one-shot
+  -- mode. In --make mode, it is imperative that the EPS doesn't have any home
+  -- package modules ever.
+  = NodeHomePackage
+      { externalNodeKey :: ModNodeKeyWithUid
+      , externalNodeDeps :: [ExternalKey] }
+  -- | A node for packages with at least one module loaded in the EPS.
+  --
+  -- Edge from A to NodeExternalPackage p when A has p as a direct package
+  -- dependency.
+  | NodeExternalPackage
+      { externalPkgKey :: UnitId
+      , externalPkgDeps :: [UnitId]
+      }
+
+data ExternalKey
+  = ExternalModuleKey ModNodeKeyWithUid
+  | ExternalPackageKey UnitId
+  deriving (Eq, Ord)
 
 emptyExternalModuleGraph :: ExternalModuleGraph
 emptyExternalModuleGraph = ExternalModuleGraph [] (graphReachability emptyGraph, const Nothing) S.empty
 
-extendExternalModuleGraph :: ExternalGraphNode -> ExternalModuleGraph -> ExternalModuleGraph
-extendExternalModuleGraph node graph = mkExternalModuleGraph (node : external_nodes graph) (external_fully_loaded graph)
-
-setFullyLoadedModule :: ExternalKey -> ExternalModuleGraph -> ExternalModuleGraph
-setFullyLoadedModule key graph = graph { external_fully_loaded = S.insert key (external_fully_loaded graph)}
-
-isFullyLoadedModule :: ExternalKey -> ExternalModuleGraph -> Bool
-isFullyLoadedModule key graph = S.member key (external_fully_loaded graph)
-
 mkExternalModuleGraph :: [ExternalGraphNode] -> S.Set ExternalKey -> ExternalModuleGraph
+-- romes:todo: does this also need to be defined in terms of extend (like for `ModuleGraph`?)
 mkExternalModuleGraph nodes loaded =
   ExternalModuleGraph {
       external_nodes = nodes
@@ -46,9 +109,60 @@ mkExternalModuleGraph nodes loaded =
                        in (graphReachability g, f)
     , external_fully_loaded = loaded  }
 
+-- | Get the dependencies of an 'ExternalNode'
+emgNodeDeps :: ExternalGraphNode -> [ExternalKey]
+emgNodeDeps = \case
+  NodeHomePackage _ dps -> dps
+  NodeExternalPackage _ dps -> map ExternalPackageKey dps
+
+-- | The graph key for a given node
+emgNodeKey :: ExternalGraphNode -> ExternalKey
+emgNodeKey (NodeHomePackage k _) = ExternalModuleKey k
+emgNodeKey (NodeExternalPackage k _) = ExternalPackageKey k
+
+--------------------------------------------------------------------------------
+-- * Extending
+--------------------------------------------------------------------------------
+
+extendExternalModuleGraph :: ExternalGraphNode -> ExternalModuleGraph -> ExternalModuleGraph
+extendExternalModuleGraph node graph = mkExternalModuleGraph (node : external_nodes graph) (external_fully_loaded graph)
+
+--------------------------------------------------------------------------------
+-- * Loading
+--------------------------------------------------------------------------------
+
+isFullyLoadedModule :: ExternalKey -> ExternalModuleGraph -> Bool
+isFullyLoadedModule key graph = S.member key (external_fully_loaded graph)
+
+setFullyLoadedModule :: ExternalKey -> ExternalModuleGraph -> ExternalModuleGraph
+setFullyLoadedModule key graph = graph { external_fully_loaded = S.insert key (external_fully_loaded graph)}
+
+--------------------------------------------------------------------------------
+-- * Reachability
+--------------------------------------------------------------------------------
+
+-- | Return all nodes reachable from the given key, also known as its full
+-- transitive closure.
+--
+-- @Nothing@ if the key couldn't be found in the graph.
+emgReachable :: ExternalModuleGraph -> ExternalKey -> Maybe [ExternalGraphNode]
+emgReachable mg nk = map node_payload <$> modules_below where
+  (td_map, lookup_node) = external_trans mg
+  modules_below =
+    allReachable td_map <$> lookup_node nk
+
+-- | Return all nodes reachable from all of the given keys.
+emgReachableMany :: ExternalModuleGraph -> [ExternalKey] -> [ExternalGraphNode]
+emgReachableMany mg nk = map node_payload modules_below where
+  (td_map, lookup_node) = external_trans mg
+  modules_below =
+    allReachableMany td_map (mapMaybe lookup_node nk)
+
+--------------------------------------------------------------------------------
+-- * Internals
+--------------------------------------------------------------------------------
+
 -- | Turn a list of graph nodes into an efficient queriable graph.
--- The first boolean parameter indicates whether nodes corresponding to hs-boot files
--- should be collapsed into their relevant hs nodes.
 externalGraphNodes ::
      [ExternalGraphNode]
   -> (Graph ExternalNode, ExternalKey -> Maybe ExternalNode)
@@ -60,7 +174,7 @@ externalGraphNodes summaries =
 
       where
         go (s, key) = DigraphNode s key $ out_edge_keys $
-                          (externalNodeDeps s)
+                          (emgNodeDeps s)
 
     numbered_summaries = zip summaries [1..]
 
@@ -72,10 +186,24 @@ externalGraphNodes summaries =
 
     node_map :: M.Map ExternalKey ExternalNode
     node_map =
-      M.fromList [ (externalKey s, node)
+      M.fromList [ (emgNodeKey s, node)
                    | node <- nodes
                    , let s = node_payload node
                    ]
 
     out_edge_keys :: [ExternalKey] -> [Int]
     out_edge_keys = mapMaybe lookup_key
+
+instance Outputable ExternalGraphNode where
+  ppr = \case
+    NodeHomePackage mk ds -> text "NodeHomePackage" <+> ppr mk <+> ppr ds
+    NodeExternalPackage mk ds -> text "NodeExternalPackage" <+> ppr mk <+> ppr ds
+
+instance Outputable ExternalKey where
+  ppr = \case
+    ExternalModuleKey mk -> text "ExternalModuleKey" <+> ppr mk
+    ExternalPackageKey uid -> text "ExternalPackageKey" <+> ppr uid
+
+instance Outputable ExternalModuleGraph where
+  ppr ExternalModuleGraph{external_nodes, external_fully_loaded}
+    = text "ExternalModuleGraph" <+> ppr external_nodes <+> ppr external_fully_loaded


=====================================
compiler/GHC/Unit/Module/Graph.hs
=====================================
@@ -301,6 +301,8 @@ mgMember graph k = isJust $ snd (mg_graph graph) k
 --------------------------------------------------------------------------------
 
 -- | Return all nodes reachable from the given 'NodeKey'.
+--
+-- @Nothing@ if the key couldn't be found in the graph.
 mgReachable :: ModuleGraph -> NodeKey -> Maybe [ModuleGraphNode]
 mgReachable mg nk = map summaryNodeSummary <$> modules_below where
   (td_map, lookup_node) = mg_graph mg



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a4db8f7223054067d1c3aa25b2eb637cd3605675...2ddafd2675385742880718683be57df7aa39d587

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a4db8f7223054067d1c3aa25b2eb637cd3605675...2ddafd2675385742880718683be57df7aa39d587
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241220/9c55f0c8/attachment-0001.html>


More information about the ghc-commits mailing list