[Git][ghc/ghc][wip/romes/graph-compact-easy] Improve reachability queries on ModuleGraph

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Wed Nov 20 15:23:55 UTC 2024



Rodrigo Mesquita pushed to branch wip/romes/graph-compact-easy at Glasgow Haskell Compiler / GHC


Commits:
48f02c05 by Rodrigo Mesquita at 2024-11-20T15:23:39+00:00
Improve reachability queries on ModuleGraph

Introduces `ReachabilityIndex`, an index constructed from a
`GHC.Data.Graph.Directed` `Graph` that supports fast reachability
queries (in $O(1)$). This abstract data structure is exposed from
`GHC.Data.Graph.Directed.Reachability`.

This index is constructed from the module graph nodes and cached in
`ModuleGraph`, enabling efficient reachability queries on the module
graph. Previously, we'd construct a Map of Set of ModuleGraph nodes
which used a lot of memory (`O(n^2)` in the number of nodes) and cache
that in the `ModuleGraph`. By using the reachability index we get rid of
this space leak in the module graph -- even though the index is still
quadratic in the number of modules, it is much, much more space
efficient due to its representation using an IntMap of IntSet as opposed
to the transitive closure we previously cached.

In a memory profile of MultiLayerModules with 100x100 modules, memory
usage improved from 6GB residency to 2.8GB, out of which roughly 1.8GB
are caused by a second space leak related to ModuleGraph. On the same
program, it brings compile time from 7.5s to 5.5s.

Note how we simplify `checkHomeUnitsClosed` in terms of
`isReachableMany` and by avoiding constructing a second graph with the
full transitive closure -- it suffices to answer the reachability query
on the full graph without collapsing the transitive closure completely
into nodes.

-------------------------
Metric Decrease:
    mhu-perf
-------------------------

- - - - -


9 changed files:

- compiler/GHC/Data/Graph/Directed.hs
- + compiler/GHC/Data/Graph/Directed/Internal.hs
- + compiler/GHC/Data/Graph/Directed/Reachability.hs
- compiler/GHC/Driver/Make.hs
- compiler/GHC/Linker/Deps.hs
- compiler/GHC/Unit/Module/Graph.hs
- compiler/ghc.cabal.in
- testsuite/tests/count-deps/CountDepsAst.stdout
- testsuite/tests/count-deps/CountDepsParser.stdout


Changes:

=====================================
compiler/GHC/Data/Graph/Directed.hs
=====================================
@@ -8,13 +8,14 @@
 
 module GHC.Data.Graph.Directed (
         Graph, graphFromEdgedVerticesOrd, graphFromEdgedVerticesUniq,
-        graphFromVerticesAndAdjacency,
+        graphFromVerticesAndAdjacency, emptyGraph,
 
         SCC(..), Node(..), G.flattenSCC, G.flattenSCCs,
         stronglyConnCompG,
         topologicalSortG,
         verticesG, edgesG, hasVertexG,
-        reachableG, reachablesG, transposeG, allReachable, allReachableCyclic, outgoingG,
+        reachablesG,
+        transposeG, outgoingG,
         emptyG,
 
         findCycle,
@@ -43,7 +44,6 @@ module GHC.Data.Graph.Directed (
 -- removed them since they were not used anywhere in GHC.
 ------------------------------------------------------------------------------
 
-
 import GHC.Prelude
 
 import GHC.Utils.Misc ( sortWith, count )
@@ -60,13 +60,13 @@ import qualified Data.Set as Set
 
 import qualified Data.Graph as G
 import Data.Graph ( Vertex, Bounds, SCC(..) ) -- Used in the underlying representation
-import Data.Tree
 import GHC.Types.Unique
 import GHC.Types.Unique.FM
-import qualified Data.IntMap as IM
-import qualified Data.IntSet as IS
-import qualified Data.Map as M
-import qualified Data.Set as S
+
+-- The graph internals are defined in the .Internal module so they can be
+-- imported by GHC.Data.Graph.Directed.Reachability while still allowing this
+-- module to export it abstractly.
+import GHC.Data.Graph.Directed.Internal
 
 {-
 ************************************************************************
@@ -86,14 +86,6 @@ Note [Nodes, keys, vertices]
         arranged densely in 0.n
 -}
 
-data Graph node = Graph {
-    gr_int_graph      :: IntGraph,
-    gr_vertex_to_node :: Vertex -> node,
-    gr_node_to_vertex :: node -> Maybe Vertex
-  }
-
-data Edge node = Edge node node
-
 {-| Representation for nodes of the Graph.
 
  * The @payload@ is user data, just carried around in this module
@@ -357,51 +349,22 @@ topologicalSortG :: Graph node -> [node]
 topologicalSortG graph = map (gr_vertex_to_node graph) result
   where result = {-# SCC "Digraph.topSort" #-} G.topSort (gr_int_graph graph)
 
-reachableG :: Graph node -> node -> [node]
-reachableG graph from = map (gr_vertex_to_node graph) result
-  where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
-        result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex]
-
 outgoingG :: Graph node -> node -> [node]
 outgoingG graph from = map (gr_vertex_to_node graph) result
-  where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
+  where from_vertex = expectJust "outgoingG" (gr_node_to_vertex graph from)
         result = gr_int_graph graph ! from_vertex
 
--- | Given a list of roots return all reachable nodes.
+-- | Given a list of roots, return all reachable nodes in topological order.
+-- Implemented using a depth-first traversal.
 reachablesG :: Graph node -> [node] -> [node]
 reachablesG graph froms = map (gr_vertex_to_node graph) result
   where result = {-# SCC "Digraph.reachable" #-}
                  reachable (gr_int_graph graph) vs
         vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ]
 
--- | Efficiently construct a map which maps each key to it's set of transitive
--- dependencies. Only works on acyclic input.
-allReachable :: Ord key => Graph node -> (node -> key) -> M.Map key (S.Set key)
-allReachable = all_reachable reachableGraph
-
--- | Efficiently construct a map which maps each key to it's set of transitive
--- dependencies. Less efficient than @allReachable@, but works on cyclic input as well.
-allReachableCyclic :: Ord key => Graph node -> (node -> key) -> M.Map key (S.Set key)
-allReachableCyclic = all_reachable reachableGraphCyclic
-
-all_reachable :: Ord key => (IntGraph -> IM.IntMap IS.IntSet) -> Graph node -> (node -> key) -> M.Map key (S.Set key)
-all_reachable int_reachables (Graph g from _) keyOf =
-  M.fromList [(k, IS.foldr (\v' vs -> keyOf (from v') `S.insert` vs) S.empty vs)
-             | (v, vs) <- IM.toList int_graph
-             , let k = keyOf (from v)]
-  where
-    int_graph = int_reachables g
-
 hasVertexG :: Graph node -> node -> Bool
 hasVertexG graph node = isJust $ gr_node_to_vertex graph node
 
-verticesG :: Graph node -> [node]
-verticesG graph = map (gr_vertex_to_node graph) $ G.vertices (gr_int_graph graph)
-
-edgesG :: Graph node -> [Edge node]
-edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ G.edges (gr_int_graph graph)
-  where v2n = gr_vertex_to_node graph
-
 transposeG :: Graph node -> Graph node
 transposeG graph = Graph (G.transposeG (gr_int_graph graph))
                          (gr_vertex_to_node graph)
@@ -410,112 +373,10 @@ transposeG graph = Graph (G.transposeG (gr_int_graph graph))
 emptyG :: Graph node -> Bool
 emptyG g = graphEmpty (gr_int_graph g)
 
-{-
-************************************************************************
-*                                                                      *
-*      Showing Graphs
-*                                                                      *
-************************************************************************
--}
-
-instance Outputable node => Outputable (Graph node) where
-    ppr graph = vcat [
-                  hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)),
-                  hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph))
-                ]
-
-instance Outputable node => Outputable (Edge node) where
-    ppr (Edge from to) = ppr from <+> text "->" <+> ppr to
-
 graphEmpty :: G.Graph -> Bool
 graphEmpty g = lo > hi
   where (lo, hi) = bounds g
 
-{-
-************************************************************************
-*                                                                      *
-*      IntGraphs
-*                                                                      *
-************************************************************************
--}
-
-type IntGraph = G.Graph
-
-{-
-------------------------------------------------------------
--- Depth first search numbering
-------------------------------------------------------------
--}
-
--- Data.Tree has flatten for Tree, but nothing for Forest
-preorderF           :: Forest a -> [a]
-preorderF ts         = concatMap flatten ts
-
-{-
-------------------------------------------------------------
--- Finding reachable vertices
-------------------------------------------------------------
--}
-
--- This generalizes reachable which was found in Data.Graph
-reachable    :: IntGraph -> [Vertex] -> [Vertex]
-reachable g vs = preorderF (G.dfs g vs)
-
-reachableGraph :: IntGraph -> IM.IntMap IS.IntSet
-reachableGraph g = res
-  where
-    do_one v = IS.unions (IS.fromList (g ! v) : mapMaybe (flip IM.lookup res) (g ! v))
-    res = IM.fromList [(v, do_one v) | v <- G.vertices g]
-
-scc :: IntGraph -> [SCC Vertex]
-scc graph = map decode forest
-  where
-    forest = {-# SCC "Digraph.scc" #-} G.scc graph
-
-    decode (Node v []) | mentions_itself v = CyclicSCC [v]
-                       | otherwise         = AcyclicSCC v
-    decode other = CyclicSCC (dec other [])
-      where dec (Node v ts) vs = v : foldr dec vs ts
-    mentions_itself v = v `elem` (graph ! v)
-
-reachableGraphCyclic :: IntGraph -> IM.IntMap IS.IntSet
-reachableGraphCyclic g = foldl' add_one_comp mempty comps
-  where
-    neighboursOf v = g!v
-
-    comps = scc g
-
-    -- To avoid divergence on cyclic input, we build the result
-    -- strongly connected component by component, in topological
-    -- order. For each SCC, we know that:
-    --
-    --   * All vertices in the component can reach all other vertices
-    --     in the component ("local" reachables)
-    --
-    --   * Other reachable vertices ("remote" reachables) must come
-    --     from earlier components, either via direct neighbourhood, or
-    --     transitively from earlier reachability map
-    --
-    -- This allows us to build the extension of the reachability map
-    -- directly, without any self-reference, thereby avoiding a loop.
-    add_one_comp :: IM.IntMap IS.IntSet -> SCC Vertex -> IM.IntMap IS.IntSet
-    add_one_comp earlier (AcyclicSCC v) = IM.insert v all_remotes earlier
-      where
-        earlier_neighbours = neighboursOf v
-        earlier_further = mapMaybe (flip IM.lookup earlier) earlier_neighbours
-        all_remotes = IS.unions (IS.fromList earlier_neighbours : earlier_further)
-    add_one_comp earlier (CyclicSCC vs) = IM.union (IM.fromList [(v, local v `IS.union` all_remotes) | v <- vs]) earlier
-      where
-        all_locals = IS.fromList vs
-        local v = IS.delete v all_locals
-            -- Arguably, for a cyclic SCC we should include each
-            -- vertex in its own reachable set. However, this could
-            -- lead to a lot of extra pain in client code to avoid
-            -- looping when traversing the reachability map.
-        all_neighbours = IS.fromList (concatMap neighboursOf vs)
-        earlier_neighbours = all_neighbours IS.\\ all_locals
-        earlier_further = mapMaybe (flip IM.lookup earlier) (IS.toList earlier_neighbours)
-        all_remotes = IS.unions (earlier_neighbours : earlier_further)
 
 {-
 ************************************************************************
@@ -623,3 +484,4 @@ graphFromVerticesAndAdjacency vertices edges = Graph graph vertex_node (key_vert
                                   expectJust "graphFromVerticesAndAdjacency" $ key_vertex b)
         reduced_edges = map key_vertex_pair edges
         graph = G.buildG bounds reduced_edges
+


=====================================
compiler/GHC/Data/Graph/Directed/Internal.hs
=====================================
@@ -0,0 +1,79 @@
+module GHC.Data.Graph.Directed.Internal where
+
+import GHC.Prelude
+import GHC.Utils.Outputable
+
+import Data.Array
+import qualified Data.Graph as G
+import Data.Graph ( Vertex, SCC(..) ) -- Used in the underlying representation
+import Data.Tree
+
+data Graph node = Graph {
+    gr_int_graph      :: IntGraph,
+    gr_vertex_to_node :: Vertex -> node,
+    gr_node_to_vertex :: node -> Maybe Vertex
+}
+
+data Edge node = Edge node node
+
+------------------------------------------------------------
+-- Nodes and Edges
+------------------------------------------------------------
+
+verticesG :: Graph node -> [node]
+verticesG graph = map (gr_vertex_to_node graph) $ G.vertices (gr_int_graph graph)
+
+edgesG :: Graph node -> [Edge node]
+edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ G.edges (gr_int_graph graph)
+  where v2n = gr_vertex_to_node graph
+
+------------------------------------------------------------
+-- Showing Graphs
+------------------------------------------------------------
+
+instance Outputable node => Outputable (Graph node) where
+    ppr graph = vcat [
+                  hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)),
+                  hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph))
+                ]
+
+instance Outputable node => Outputable (Edge node) where
+    ppr (Edge from to) = ppr from <+> text "->" <+> ppr to
+
+{-
+************************************************************************
+*                                                                      *
+*      IntGraphs
+*                                                                      *
+************************************************************************
+-}
+
+type IntGraph = G.Graph
+
+------------------------------------------------------------
+-- Depth first search numbering
+------------------------------------------------------------
+
+-- Data.Tree has flatten for Tree, but nothing for Forest
+preorderF           :: Forest a -> [a]
+preorderF ts         = concatMap flatten ts
+
+------------------------------------------------------------
+-- Finding reachable vertices
+------------------------------------------------------------
+
+-- This generalizes reachable which was found in Data.Graph
+reachable    :: IntGraph -> [Vertex] -> [Vertex]
+reachable g vs = preorderF (G.dfs g vs)
+
+scc :: IntGraph -> [SCC Vertex]
+scc graph = map decode forest
+  where
+    forest = {-# SCC "Digraph.scc" #-} G.scc graph
+
+    decode (Node v []) | mentions_itself v = CyclicSCC [v]
+                       | otherwise         = AcyclicSCC v
+    decode other = CyclicSCC (dec other [])
+      where dec (Node v ts) vs = v : foldr dec vs ts
+    mentions_itself v = v `elem` (graph ! v)
+


=====================================
compiler/GHC/Data/Graph/Directed/Reachability.hs
=====================================
@@ -0,0 +1,168 @@
+-- | An abstract interface for a fast reachability data structure constructed
+-- from a 'GHC.Data.Graph.Directed' graph.
+module GHC.Data.Graph.Directed.Reachability
+  ( ReachabilityIndex
+
+  -- * Constructing a reachability index
+  , graphReachability, cyclicGraphReachability
+
+  -- * Reachability queries
+  , allReachable, allReachableMany
+  , isReachable, isReachableMany
+  )
+  where
+
+import GHC.Prelude
+import GHC.Data.Maybe
+
+import qualified Data.Graph as G
+import Data.Graph ( Vertex, SCC(..) )
+
+import Data.Array ((!))
+import qualified Data.IntMap as IM
+import qualified Data.IntSet as IS
+
+import GHC.Data.Graph.Directed.Internal
+
+--------------------------------------------------------------------------------
+-- * Reachability index
+--------------------------------------------------------------------------------
+
+-- | The abstract data structure for fast reachability queries
+data ReachabilityIndex node = ReachabilityIndex {
+    index :: IM.IntMap IS.IntSet,
+    from_vertex :: Vertex -> node,
+    to_vertex :: node -> Maybe Vertex
+}
+
+--------------------------------------------------------------------------------
+-- * Construction
+--------------------------------------------------------------------------------
+
+-- | Construct a 'ReachabilityIndex' from an acyclic 'Graph'.
+-- If the graph can have cycles, use 'cyclicGraphReachability'
+graphReachability :: Graph node -> ReachabilityIndex node
+graphReachability (Graph g from to) =
+  ReachabilityIndex{index = reachableGraph, from_vertex = from, to_vertex = to}
+    where
+      reachableGraph :: IM.IntMap IS.IntSet
+      reachableGraph = IM.fromList [(v, do_one v) | v <- G.vertices g]
+
+      do_one v = IS.unions (IS.fromList (g ! v) : mapMaybe (flip IM.lookup reachableGraph) (g ! v))
+
+-- | Construct a 'ReachabilityIndex' from a 'Graph' which may have cycles.
+cyclicGraphReachability :: Graph node -> ReachabilityIndex node
+cyclicGraphReachability (Graph g from to) =
+  ReachabilityIndex{index = reachableGraphCyclic, from_vertex = from, to_vertex = to}
+    where
+      reachableGraphCyclic :: IM.IntMap IS.IntSet
+      reachableGraphCyclic = foldl' add_one_comp mempty comps
+
+      neighboursOf v = g!v
+
+      comps = scc g
+
+      -- To avoid divergence on cyclic input, we build the result
+      -- strongly connected component by component, in topological
+      -- order. For each SCC, we know that:
+      --
+      --   * All vertices in the component can reach all other vertices
+      --     in the component ("local" reachables)
+      --
+      --   * Other reachable vertices ("remote" reachables) must come
+      --     from earlier components, either via direct neighbourhood, or
+      --     transitively from earlier reachability map
+      --
+      -- This allows us to build the extension of the reachability map
+      -- directly, without any self-reference, thereby avoiding a loop.
+      add_one_comp :: IM.IntMap IS.IntSet -> SCC Vertex -> IM.IntMap IS.IntSet
+      add_one_comp earlier (AcyclicSCC v) = IM.insert v all_remotes earlier
+        where
+          earlier_neighbours = neighboursOf v
+          earlier_further = mapMaybe (flip IM.lookup earlier) earlier_neighbours
+          all_remotes = IS.unions (IS.fromList earlier_neighbours : earlier_further)
+      add_one_comp earlier (CyclicSCC vs) = IM.union (IM.fromList [(v, local v `IS.union` all_remotes) | v <- vs]) earlier
+        where
+          all_locals = IS.fromList vs
+          local v = IS.delete v all_locals
+              -- Arguably, for a cyclic SCC we should include each
+              -- vertex in its own reachable set. However, this could
+              -- lead to a lot of extra pain in client code to avoid
+              -- looping when traversing the reachability map.
+          all_neighbours = IS.fromList (concatMap neighboursOf vs)
+          earlier_neighbours = all_neighbours IS.\\ all_locals
+          earlier_further = mapMaybe (flip IM.lookup earlier) (IS.toList earlier_neighbours)
+          all_remotes = IS.unions (earlier_neighbours : earlier_further)
+
+--------------------------------------------------------------------------------
+-- * Reachability queries
+--------------------------------------------------------------------------------
+
+-- | 'allReachable' returns the nodes reachable from the given @root@ node.
+--
+-- Properties:
+--  * The list of nodes /does not/ include the @root@ node!
+--  * The list of nodes is deterministically ordered, but according to an
+--     internal order determined by the indices attributed to graph nodes.
+--  * This function has $O(1)$ complexity.
+--
+-- If you need a topologically sorted list, consider using the functions exposed from 'GHC.Data.Graph.Directed' on 'Graph' instead.
+allReachable :: ReachabilityIndex node -> node {-^ The @root@ node -} -> [node] {-^ All nodes reachable from @root@ -}
+allReachable (ReachabilityIndex index from to) root = map from result
+  where root_i = expectJust "reachableFrom" (to root)
+        hits = {-# SCC "allReachable" #-} IM.lookup root_i index
+        result = IS.toList $! expectJust "reachableFrom" hits
+
+-- | 'allReachableMany' returns all nodes reachable from the many given @roots at .
+--
+-- Properties:
+--  * The list of nodes /does not/ include the @roots@ node!
+--  * The list of nodes is deterministically ordered, but according to an
+--     internal order determined by the indices attributed to graph nodes.
+--  * This function has $O(n)$ complexity where $n$ is the number of @roots at .
+--
+-- If you need a topologically sorted list, consider using the functions
+-- exposed from 'GHC.Data.Graph.Directed' on 'Graph' instead ('reachableG').
+allReachableMany :: ReachabilityIndex node -> [node] {-^ The @roots@ -} -> [node] {-^ All nodes reachable from all @roots@ -}
+allReachableMany (ReachabilityIndex index from to) roots = map from (IS.toList hits)
+  where roots_i = [ v | Just v <- map to roots ]
+        hits = {-# SCC "allReachableMany" #-}
+               IS.unions $ map (expectJust "reachablesG" . flip IM.lookup index) roots_i
+
+-- | Fast reachability query.
+--
+-- On graph @g@ with nodes @a@ and @b@, @isReachable g a b@
+-- asks whether @b@ can be reached through @g@ starting from @a at .
+--
+-- Properties:
+--  * No self loops, i.e. @isReachable _ a a == False@
+--  * This function has $O(1)$ complexity.
+isReachable :: ReachabilityIndex node {-^ @g@ -}
+            -> node -- ^ @a@
+            -> node -- ^ @b@
+            -> Bool -- ^ @b@ is reachable from @a@
+isReachable (ReachabilityIndex index _ to) a b =
+    IS.member b_i $
+    expectJust "reachable" $ IM.lookup a_i index
+  where a_i = expectJust "reachable:node not in graph" $ to a
+        b_i = expectJust "reachable:node not in graph" $ to b
+
+-- | Fast reachability query with many roots.
+--
+-- On graph @g@ with many nodes @roots@ and node @b@, @isReachableMany g as b@
+-- asks whether @b@ can be reached through @g@ from any of the @roots at .
+--
+-- Properties:
+--  * No self loops, i.e. @isReachableMany _ [a] a == False@
+--  * This function is $O(n)$ in the number of roots
+isReachableMany :: ReachabilityIndex node -- ^ @g@
+                -> [node] -- ^ @roots@
+                -> node -- ^ @b@
+                -> Bool -- ^ @b@ is reachable from any of the @roots@
+isReachableMany (ReachabilityIndex index _ to) roots b =
+    IS.member b_i $
+    IS.unions $
+    map (expectJust "reachablesQuery" . flip IM.lookup index) roots_i
+  where roots_i = [ v | Just v <- map to roots ]
+        b_i = expectJust "reachablesQuery:node not in graph" $ to b
+


=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -7,6 +7,7 @@
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE ViewPatterns #-}
 
 -- -----------------------------------------------------------------------------
 --
@@ -149,6 +150,7 @@ import GHC.Types.Unique
 import GHC.Iface.Errors.Types
 
 import qualified GHC.Data.Word64Set as W
+import GHC.Data.Graph.Directed.Reachability
 
 -- -----------------------------------------------------------------------------
 -- Loading the program
@@ -610,20 +612,20 @@ createBuildPlan mod_graph maybe_top_mod =
               mresolved_cycle = collapseSCC (topSortWithBoot nodes)
           in acyclic ++ [either UnresolvedCycle ResolvedCycle mresolved_cycle] ++ toBuildPlan sccs []
 
-        (mg, lookup_node) = moduleGraphNodes False (mgModSummaries' mod_graph)
-        trans_deps_map = allReachable mg (mkNodeKey . node_payload)
         -- Compute the intermediate modules between a file and its hs-boot file.
         -- See Step 2a in Note [Upsweep]
         boot_path mn uid =
-          map (summaryNodeSummary . expectJust "toNode" . lookup_node) $ Set.toList $
+          Set.toList $
           -- Don't include the boot module itself
-          Set.delete (NodeKey_Module (key IsBoot))  $
+          Set.filter ((/= NodeKey_Module (key IsBoot)) . mkNodeKey)  $
           -- Keep intermediate dependencies: as per Step 2a in Note [Upsweep], these are
           -- the transitive dependencies of the non-boot file which transitively depend
           -- on the boot file.
-          Set.filter (\nk -> nodeKeyUnitId nk == uid  -- Cheap test
-                              && (NodeKey_Module (key IsBoot)) `Set.member` expectJust "dep_on_boot" (M.lookup nk trans_deps_map)) $
-          expectJust "not_boot_dep" (M.lookup (NodeKey_Module (key NotBoot)) trans_deps_map)
+          Set.filter (\(mkNodeKey -> nk) ->
+            nodeKeyUnitId nk == uid  -- Cheap test
+              && mgQuery mod_graph nk (NodeKey_Module (key IsBoot))) $
+          Set.fromList $
+          expectJust "not_boot_dep"  (mgReachable mod_graph (NodeKey_Module (key NotBoot)))
           where
             key ib = ModNodeKeyWithUid (GWIB mn ib) uid
 
@@ -1497,7 +1499,7 @@ topSortModules drop_hs_boot_nodes summaries mb_root_mod
                      = node
                      | otherwise
                      = throwGhcException (ProgramError "module does not exist")
-            in graphFromEdgedVerticesUniq (seq root (reachableG graph root))
+            in graphFromEdgedVerticesUniq (seq root (root:allReachable (graphReachability graph) root))
 
 newtype ModNodeMap a = ModNodeMap { unModNodeMap :: Map.Map ModNodeKey a }
   deriving (Functor, Traversable, Foldable)
@@ -1821,20 +1823,15 @@ checkHomeUnitsClosed ue
     | otherwise = [singleMessage $ mkPlainErrorMsgEnvelope rootLoc $ DriverHomePackagesNotClosed (Set.toList bad_unit_ids)]
   where
     home_id_set = unitEnv_keys $ ue_home_unit_graph ue
-    bad_unit_ids = upwards_closure Set.\\ home_id_set
+    bad_unit_ids = upwards_closure Set.\\ home_id_set {- Remove all home units reached, keep only bad nodes -}
     rootLoc = mkGeneralSrcSpan (fsLit "<command line>")
 
-    graph :: Graph (Node UnitId UnitId)
-    graph = graphFromEdgedVerticesUniq graphNodes
+    downwards_closure :: Graph (Node UnitId UnitId)
+    downwards_closure = graphFromEdgedVerticesUniq graphNodes
 
-    -- downwards closure of graph
-    downwards_closure
-      = graphFromEdgedVerticesUniq [ DigraphNode uid uid (Set.toList deps)
-                                   | (uid, deps) <- M.toList (allReachable graph node_key)]
+    inverse_closure = graphReachability $ transposeG downwards_closure
 
-    inverse_closure = transposeG downwards_closure
-
-    upwards_closure = Set.fromList $ map node_key $ reachablesG inverse_closure [DigraphNode uid uid [] | uid <- Set.toList home_id_set]
+    upwards_closure = Set.fromList $ map node_key $ allReachableMany inverse_closure [DigraphNode uid uid [] | uid <- Set.toList home_id_set]
 
     all_unit_direct_deps :: UniqMap UnitId (Set.Set UnitId)
     all_unit_direct_deps


=====================================
compiler/GHC/Linker/Deps.hs
=====================================
@@ -51,7 +51,6 @@ import Control.Monad
 import Control.Applicative
 
 import qualified Data.Set as Set
-import qualified Data.Map as M
 import Data.List (isSuffixOf)
 
 import System.FilePath
@@ -166,16 +165,16 @@ get_link_deps opts pls maybe_normal_osuf span mods = do
     make_deps_loop found@(found_units, found_mods) (nk:nexts)
       | NodeKey_Module nk `Set.member` found_mods = make_deps_loop found nexts
       | otherwise =
-        case M.lookup (NodeKey_Module nk) (mgTransDeps mod_graph) of
-            Just trans_deps ->
-              let deps = Set.insert (NodeKey_Module nk) trans_deps
-                  -- See #936 and the ghci.prog007 test for why we have to continue traversing through
-                  -- boot modules.
-                  todo_boot_mods = [ModNodeKeyWithUid (GWIB mn NotBoot) uid | NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid) <- Set.toList trans_deps]
-              in make_deps_loop (found_units, deps `Set.union` found_mods) (todo_boot_mods ++ nexts)
-            Nothing ->
+        case fmap mkNodeKey <$> mgReachable mod_graph (NodeKey_Module nk) of
+          Nothing ->
               let (ModNodeKeyWithUid _ uid) = nk
               in make_deps_loop (addOneToUniqDSet found_units uid, found_mods) nexts
+          Just trans_deps ->
+            let deps = Set.insert (NodeKey_Module nk) (Set.fromList trans_deps)
+                -- See #936 and the ghci.prog007 test for why we have to continue traversing through
+                -- boot modules.
+                todo_boot_mods = [ModNodeKeyWithUid (GWIB mn NotBoot) uid | NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid) <- trans_deps]
+            in make_deps_loop (found_units, deps `Set.union` found_mods) (todo_boot_mods ++ nexts)
 
     mkNk m = ModNodeKeyWithUid (GWIB (moduleName m) NotBoot) (moduleUnitId m)
     (init_pkg_set, all_deps) = make_deps_loop (emptyUniqDSet, Set.empty) $ map mkNk (filterOut isInteractiveModule mods)


=====================================
compiler/GHC/Unit/Module/Graph.hs
=====================================
@@ -18,11 +18,12 @@ module GHC.Unit.Module.Graph
    , mgModSummaries
    , mgModSummaries'
    , mgLookupModule
-   , mgTransDeps
    , showModMsg
    , moduleGraphNodeModule
    , moduleGraphNodeModSum
    , moduleGraphModulesBelow
+   , mgReachable
+   , mgQuery
 
    , moduleGraphNodes
    , SummaryNode
@@ -49,6 +50,7 @@ import qualified GHC.LanguageExtensions as LangExt
 
 import GHC.Data.Maybe
 import GHC.Data.Graph.Directed
+import GHC.Data.Graph.Directed.Reachability
 
 import GHC.Driver.Backend
 import GHC.Driver.DynFlags
@@ -72,6 +74,7 @@ import Data.Bifunctor
 import Data.Function
 import Data.List (sort)
 import GHC.Data.List.SetOps
+import GHC.Stack
 
 -- | A '@ModuleGraphNode@' is a node in the '@ModuleGraph@'.
 -- Edges between nodes mark dependencies arising from module imports
@@ -153,7 +156,7 @@ instance Outputable ModNodeKeyWithUid where
 -- 'GHC.topSortModuleGraph' and 'GHC.Data.Graph.Directed.flattenSCC' to achieve this.
 data ModuleGraph = ModuleGraph
   { mg_mss :: [ModuleGraphNode]
-  , mg_trans_deps :: Map.Map NodeKey (Set.Set NodeKey)
+  , mg_graph :: (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
     -- A cached transitive dependency calculation so that a lot of work is not
     -- repeated whenever the transitive dependencies need to be calculated (for example, hptInstances)
   }
@@ -173,12 +176,11 @@ unionMG a b =
   let new_mss = nubOrdBy compare $ mg_mss a `mappend` mg_mss b
   in ModuleGraph {
         mg_mss = new_mss
-      , mg_trans_deps = mkTransDeps new_mss
+      , mg_graph = mkTransDeps new_mss
       }
 
-
-mgTransDeps :: ModuleGraph -> Map.Map NodeKey (Set.Set NodeKey)
-mgTransDeps = mg_trans_deps
+mkTransDeps :: [ModuleGraphNode] -> (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
+mkTransDeps = first graphReachability {- module graph is acyclic -} . moduleGraphNodes False
 
 mgModSummaries :: ModuleGraph -> [ModSummary]
 mgModSummaries mg = [ m | ModuleNode _ m <- mgModSummaries' mg ]
@@ -199,7 +201,7 @@ mgLookupModule ModuleGraph{..} m = listToMaybe $ mapMaybe go mg_mss
     go _ = Nothing
 
 emptyMG :: ModuleGraph
-emptyMG = ModuleGraph [] Map.empty
+emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing)
 
 isTemplateHaskellOrQQNonBoot :: ModSummary -> Bool
 isTemplateHaskellOrQQNonBoot ms =
@@ -212,14 +214,9 @@ isTemplateHaskellOrQQNonBoot ms =
 extendMG :: ModuleGraph -> [NodeKey] -> ModSummary -> ModuleGraph
 extendMG ModuleGraph{..} deps ms = ModuleGraph
   { mg_mss = ModuleNode deps ms : mg_mss
-  , mg_trans_deps = mkTransDeps (ModuleNode deps ms : mg_mss)
+  , mg_graph = mkTransDeps (ModuleNode deps ms : mg_mss)
   }
 
-mkTransDeps :: [ModuleGraphNode] -> Map.Map NodeKey (Set.Set NodeKey)
-mkTransDeps mss =
-  let (gg, _lookup_node) = moduleGraphNodes False mss
-  in allReachable gg (mkNodeKey . node_payload)
-
 extendMGInst :: ModuleGraph -> UnitId -> InstantiatedUnit -> ModuleGraph
 extendMGInst mg uid depUnitId = mg
   { mg_mss = InstantiationNode uid depUnitId : mg_mss mg
@@ -394,12 +391,9 @@ type ModNodeKey = ModuleNameWithIsBoot
 -- boot module and the non-boot module can be reached, it only returns the
 -- non-boot one.
 moduleGraphModulesBelow :: ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> Set ModNodeKeyWithUid
-moduleGraphModulesBelow mg uid mn = filtered_mods $ [ mn |  NodeKey_Module mn <- modules_below]
+moduleGraphModulesBelow mg uid mn = filtered_mods [ mn | NodeKey_Module mn <- modules_below ]
   where
-    td_map = mgTransDeps mg
-
-    modules_below = maybe [] Set.toList $ Map.lookup (NodeKey_Module (ModNodeKeyWithUid mn uid)) td_map
-
+    modules_below = maybe [] (map mkNodeKey) (mgReachable mg (NodeKey_Module (ModNodeKeyWithUid mn uid)))
     filtered_mods = Set.fromDistinctAscList . filter_mods . sort
 
     -- IsBoot and NotBoot modules are necessarily consecutive in the sorted list
@@ -415,3 +409,22 @@ moduleGraphModulesBelow mg uid mn = filtered_mods $ [ mn |  NodeKey_Module mn <-
                        in r' : filter_mods rs
         | otherwise -> r1 : filter_mods (r2:rs)
       rs -> rs
+
+mgReachable :: HasCallStack => ModuleGraph -> NodeKey -> Maybe [ModuleGraphNode]
+mgReachable mg nk = map summaryNodeSummary <$> modules_below where
+  (td_map, lookup_node) = mg_graph mg
+  modules_below =
+    allReachable td_map <$> lookup_node nk
+
+-- | Reachability Query. @mgQuery(g, a, b)@ asks: Can we reach @b@ from @a@ in
+-- graph @g@?
+-- INVARIANT: Both @a@ and @b@ must be in @g at .
+mgQuery :: ModuleGraph -- ^ @g@
+        -> NodeKey -- ^ @a@
+        -> NodeKey -- ^ @b@
+        -> Bool -- ^ @b@ is reachable from @a@
+mgQuery mg nka nkb = isReachable td_map na nb where
+  (td_map, lookup_node) = mg_graph mg
+  na = expectJust "mgQuery:a" $ lookup_node nka
+  nb = expectJust "mgQuery:b" $ lookup_node nkb
+


=====================================
compiler/ghc.cabal.in
=====================================
@@ -444,6 +444,8 @@ Library
         GHC.Data.Graph.Color
         GHC.Data.Graph.Collapse
         GHC.Data.Graph.Directed
+        GHC.Data.Graph.Directed.Internal
+        GHC.Data.Graph.Directed.Reachability
         GHC.Data.Graph.Inductive.Graph
         GHC.Data.Graph.Inductive.PatriciaTree
         GHC.Data.Graph.Ops


=====================================
testsuite/tests/count-deps/CountDepsAst.stdout
=====================================
@@ -66,6 +66,7 @@ GHC.Data.FastString.Env
 GHC.Data.FiniteMap
 GHC.Data.FlatBag
 GHC.Data.Graph.Directed
+GHC.Data.Graph.Directed.Internal
 GHC.Data.Graph.UnVar
 GHC.Data.List.Infinite
 GHC.Data.List.SetOps


=====================================
testsuite/tests/count-deps/CountDepsParser.stdout
=====================================
@@ -67,6 +67,8 @@ GHC.Data.FastString.Env
 GHC.Data.FiniteMap
 GHC.Data.FlatBag
 GHC.Data.Graph.Directed
+GHC.Data.Graph.Directed.Internal
+GHC.Data.Graph.Directed.Reachability
 GHC.Data.Graph.UnVar
 GHC.Data.List.Infinite
 GHC.Data.List.SetOps



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/48f02c056e4534d6eb4c5eb5238dbad924072826

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/48f02c056e4534d6eb4c5eb5238dbad924072826
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241120/b485a2c5/attachment-0001.html>


More information about the ghc-commits mailing list