[Git][ghc/ghc][wip/romes/graph-compact-easy] 3 commits: A start on module graphs
Rodrigo Mesquita (@alt-romes)
gitlab at gitlab.haskell.org
Tue Nov 19 16:44:48 UTC 2024
Rodrigo Mesquita pushed to branch wip/romes/graph-compact-easy at Glasgow Haskell Compiler / GHC
Commits:
01bd0f92 by Rodrigo Mesquita at 2024-11-19T15:56:13+00:00
A start on module graphs
- - - - -
c71edace by Rodrigo Mesquita at 2024-11-19T15:56:38+00:00
Better getLinkDeps
- - - - -
0b67174d by Matthew Pickering at 2024-11-19T15:56:42+00:00
MP fixes
- - - - -
4 changed files:
- compiler/GHC/Data/Graph/Directed.hs
- compiler/GHC/Driver/Make.hs
- compiler/GHC/Linker/Deps.hs
- compiler/GHC/Unit/Module/Graph.hs
Changes:
=====================================
compiler/GHC/Data/Graph/Directed.hs
=====================================
@@ -14,7 +14,8 @@ module GHC.Data.Graph.Directed (
stronglyConnCompG,
topologicalSortG,
verticesG, edgesG, hasVertexG,
- reachableG, reachablesG, transposeG, allReachable, allReachableCyclic, outgoingG,
+ reachableFromG, reachablesG,
+ transposeG, outgoingG,
emptyG,
findCycle,
@@ -26,7 +27,10 @@ module GHC.Data.Graph.Directed (
stronglyConnCompFromEdgedVerticesUniqR,
-- Simple way to classify edges
- EdgeType(..), classifyEdges
+ EdgeType(..), classifyEdges,
+
+ ReachabilityIndex, reachabilityIndex, emptyGraph, nodeLookupByIx, ixLookupByNode,
+ reachableQuery
) where
------------------------------------------------------------------------------
@@ -65,8 +69,6 @@ import GHC.Types.Unique
import GHC.Types.Unique.FM
import qualified Data.IntMap as IM
import qualified Data.IntSet as IS
-import qualified Data.Map as M
-import qualified Data.Set as S
{-
************************************************************************
@@ -89,7 +91,10 @@ Note [Nodes, keys, vertices]
data Graph node = Graph {
gr_int_graph :: IntGraph,
gr_vertex_to_node :: Vertex -> node,
- gr_node_to_vertex :: node -> Maybe Vertex
+ gr_node_to_vertex :: node -> Maybe Vertex,
+ gr_reachability :: ReachabilityIndex
+ -- ^ We cache a reachability index to answer reachability queries faster.
+ -- romes:todo: Is it worth it to cache this for all graphs, or should it be ModuleGraph specific?
}
data Edge node = Edge node node
@@ -116,7 +121,7 @@ instance (Outputable a, Outputable b) => Outputable (Node a b) where
ppr (DigraphNode a b c) = ppr (a, b, c)
emptyGraph :: Graph a
-emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing)
+emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing) mempty
-- See Note [Deterministic SCC]
graphFromEdgedVertices
@@ -127,7 +132,7 @@ graphFromEdgedVertices
-> Graph (Node key payload)
graphFromEdgedVertices _reduceFn [] = emptyGraph
graphFromEdgedVertices reduceFn edged_vertices =
- Graph graph vertex_fn (key_vertex . key_extractor)
+ Graph graph vertex_fn (key_vertex . key_extractor) (reachableGraph graph)
where key_extractor = node_key
(bounds, vertex_fn, key_vertex, numbered_nodes) =
reduceFn edged_vertices key_extractor
@@ -357,41 +362,31 @@ topologicalSortG :: Graph node -> [node]
topologicalSortG graph = map (gr_vertex_to_node graph) result
where result = {-# SCC "Digraph.topSort" #-} G.topSort (gr_int_graph graph)
-reachableG :: Graph node -> node -> [node]
-reachableG graph from = map (gr_vertex_to_node graph) result
- where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
- result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex]
+-- | 'reachableFromG' returns in an /unordered/ list the nodes reachable from the given node.
+-- The list of nodes /does not/ include the root node!
+-- Because we cache a graph reachability index, this operation is very fast.
+reachableFromG :: Graph node -> node -> [node]
+reachableFromG graph from = map (gr_vertex_to_node graph) result
+ where from_vertex = expectJust "reachableFromG" (gr_node_to_vertex graph from)
+ hits = {-# SCC "Digraph.reachable" #-} IM.lookup from_vertex (gr_reachability graph)
+ result = IS.toList $! expectJust "reachableFromG" hits
outgoingG :: Graph node -> node -> [node]
outgoingG graph from = map (gr_vertex_to_node graph) result
- where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
+ where from_vertex = expectJust "outgoingG" (gr_node_to_vertex graph from)
result = gr_int_graph graph ! from_vertex
--- | Given a list of roots return all reachable nodes.
+-- | Given a list of roots, return all reachable nodes in topological order.
+-- Implemented using a depth-first traversal.
reachablesG :: Graph node -> [node] -> [node]
-reachablesG graph froms = map (gr_vertex_to_node graph) result
+reachablesG graph froms = map (gr_vertex_to_node graph) ({-IS.toList-} result)
where result = {-# SCC "Digraph.reachable" #-}
reachable (gr_int_graph graph) vs
+ -- The following would be more efficient,
+ -- IS.union (IS.fromList vs) $
+ -- IS.unions $ map (expectJust "reachablesG" . flip IM.lookup (gr_reachability graph)) vs
vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ]
--- | Efficiently construct a map which maps each key to it's set of transitive
--- dependencies. Only works on acyclic input.
-allReachable :: Ord key => Graph node -> (node -> key) -> M.Map key (S.Set key)
-allReachable = all_reachable reachableGraph
-
--- | Efficiently construct a map which maps each key to it's set of transitive
--- dependencies. Less efficient than @allReachable@, but works on cyclic input as well.
-allReachableCyclic :: Ord key => Graph node -> (node -> key) -> M.Map key (S.Set key)
-allReachableCyclic = all_reachable reachableGraphCyclic
-
-all_reachable :: Ord key => (IntGraph -> IM.IntMap IS.IntSet) -> Graph node -> (node -> key) -> M.Map key (S.Set key)
-all_reachable int_reachables (Graph g from _) keyOf =
- M.fromList [(k, IS.foldr (\v' vs -> keyOf (from v') `S.insert` vs) S.empty vs)
- | (v, vs) <- IM.toList int_graph
- , let k = keyOf (from v)]
- where
- int_graph = int_reachables g
-
hasVertexG :: Graph node -> node -> Bool
hasVertexG graph node = isJust $ gr_node_to_vertex graph node
@@ -403,9 +398,12 @@ edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ G.edges (gr_int_graph
where v2n = gr_vertex_to_node graph
transposeG :: Graph node -> Graph node
-transposeG graph = Graph (G.transposeG (gr_int_graph graph))
+transposeG graph = Graph g'
(gr_vertex_to_node graph)
(gr_node_to_vertex graph)
+ (reachableGraph g')
+ where
+ g' = G.transposeG (gr_int_graph graph)
emptyG :: Graph node -> Bool
emptyG g = graphEmpty (gr_int_graph g)
@@ -466,6 +464,8 @@ reachableGraph g = res
where
do_one v = IS.unions (IS.fromList (g ! v) : mapMaybe (flip IM.lookup res) (g ! v))
res = IM.fromList [(v, do_one v) | v <- G.vertices g]
+-- NB: We also used to have `reachableGraphCyclic`, but it was unused in the source so it was removed.
+-- If you need something like that, search the git history for `reachableGraphCyclic`.
scc :: IntGraph -> [SCC Vertex]
scc graph = map decode forest
@@ -478,45 +478,6 @@ scc graph = map decode forest
where dec (Node v ts) vs = v : foldr dec vs ts
mentions_itself v = v `elem` (graph ! v)
-reachableGraphCyclic :: IntGraph -> IM.IntMap IS.IntSet
-reachableGraphCyclic g = foldl' add_one_comp mempty comps
- where
- neighboursOf v = g!v
-
- comps = scc g
-
- -- To avoid divergence on cyclic input, we build the result
- -- strongly connected component by component, in topological
- -- order. For each SCC, we know that:
- --
- -- * All vertices in the component can reach all other vertices
- -- in the component ("local" reachables)
- --
- -- * Other reachable vertices ("remote" reachables) must come
- -- from earlier components, either via direct neighbourhood, or
- -- transitively from earlier reachability map
- --
- -- This allows us to build the extension of the reachability map
- -- directly, without any self-reference, thereby avoiding a loop.
- add_one_comp :: IM.IntMap IS.IntSet -> SCC Vertex -> IM.IntMap IS.IntSet
- add_one_comp earlier (AcyclicSCC v) = IM.insert v all_remotes earlier
- where
- earlier_neighbours = neighboursOf v
- earlier_further = mapMaybe (flip IM.lookup earlier) earlier_neighbours
- all_remotes = IS.unions (IS.fromList earlier_neighbours : earlier_further)
- add_one_comp earlier (CyclicSCC vs) = IM.union (IM.fromList [(v, local v `IS.union` all_remotes) | v <- vs]) earlier
- where
- all_locals = IS.fromList vs
- local v = IS.delete v all_locals
- -- Arguably, for a cyclic SCC we should include each
- -- vertex in its own reachable set. However, this could
- -- lead to a lot of extra pain in client code to avoid
- -- looping when traversing the reachability map.
- all_neighbours = IS.fromList (concatMap neighboursOf vs)
- earlier_neighbours = all_neighbours IS.\\ all_locals
- earlier_further = mapMaybe (flip IM.lookup earlier) (IS.toList earlier_neighbours)
- all_remotes = IS.unions (earlier_neighbours : earlier_further)
-
{-
************************************************************************
* *
@@ -616,10 +577,36 @@ graphFromVerticesAndAdjacency
-- do the same thing for the other one for backcompat reasons.
-> Graph (Node key payload)
graphFromVerticesAndAdjacency [] _ = emptyGraph
-graphFromVerticesAndAdjacency vertices edges = Graph graph vertex_node (key_vertex . key_extractor)
+graphFromVerticesAndAdjacency vertices edges = Graph graph vertex_node (key_vertex . key_extractor) (reachableGraph graph)
where key_extractor = node_key
(bounds, vertex_node, key_vertex, _) = reduceNodesIntoVerticesOrd vertices key_extractor
key_vertex_pair (a, b) = (expectJust "graphFromVerticesAndAdjacency" $ key_vertex a,
expectJust "graphFromVerticesAndAdjacency" $ key_vertex b)
reduced_edges = map key_vertex_pair edges
graph = G.buildG bounds reduced_edges
+
+--------------------------------------------------------------------------------
+-- Reachability
+--------------------------------------------------------------------------------
+
+type ReachabilityIndex = IM.IntMap IS.IntSet
+
+reachabilityIndex :: Graph a -> ReachabilityIndex
+reachabilityIndex = gr_reachability
+
+nodeLookupByIx :: Graph node -> Vertex -> node
+nodeLookupByIx (Graph _ from _ _) v = from v
+
+ixLookupByNode :: Graph node -> node -> Maybe Vertex
+ixLookupByNode (Graph _ _ to _) v = to v
+
+-- | Reachability query. On graph @g@ and nodes @a@ and @b@, @reachable(g, a,
+-- b)@ asks whether @b@ can be reached starting from @a at .
+reachableQuery :: Graph node -- ^ @g@
+ -> node -- ^ @a@
+ -> node -- ^ @b@
+ -> Bool -- ^ @b@ is reachable from @a@
+reachableQuery (Graph _ _ to index) a b = IS.member b_i $ expectJust "reachable" $ IM.lookup a_i index
+ where a_i = expectJust "reachable:node not in graph" $ to a
+ b_i = expectJust "reachable:node not in graph" $ to b
+
=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -7,6 +7,7 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE ViewPatterns #-}
-- -----------------------------------------------------------------------------
--
@@ -149,6 +150,8 @@ import GHC.Types.Unique
import GHC.Iface.Errors.Types
import qualified GHC.Data.Word64Set as W
+import qualified Data.IntMap as IM
+import qualified Data.IntSet as IS
-- -----------------------------------------------------------------------------
-- Loading the program
@@ -610,20 +613,20 @@ createBuildPlan mod_graph maybe_top_mod =
mresolved_cycle = collapseSCC (topSortWithBoot nodes)
in acyclic ++ [either UnresolvedCycle ResolvedCycle mresolved_cycle] ++ toBuildPlan sccs []
- (mg, lookup_node) = moduleGraphNodes False (mgModSummaries' mod_graph)
- trans_deps_map = allReachable mg (mkNodeKey . node_payload)
-- Compute the intermediate modules between a file and its hs-boot file.
-- See Step 2a in Note [Upsweep]
boot_path mn uid =
- map (summaryNodeSummary . expectJust "toNode" . lookup_node) $ Set.toList $
+ Set.toList $
-- Don't include the boot module itself
- Set.delete (NodeKey_Module (key IsBoot)) $
+ Set.filter ((/= NodeKey_Module (key IsBoot)) . mkNodeKey) $
-- Keep intermediate dependencies: as per Step 2a in Note [Upsweep], these are
-- the transitive dependencies of the non-boot file which transitively depend
-- on the boot file.
- Set.filter (\nk -> nodeKeyUnitId nk == uid -- Cheap test
- && (NodeKey_Module (key IsBoot)) `Set.member` expectJust "dep_on_boot" (M.lookup nk trans_deps_map)) $
- expectJust "not_boot_dep" (M.lookup (NodeKey_Module (key NotBoot)) trans_deps_map)
+ Set.filter (\(mkNodeKey -> nk) ->
+ nodeKeyUnitId nk == uid -- Cheap test
+ && mgQuery mod_graph nk (NodeKey_Module (key IsBoot))) $
+ Set.fromList $
+ expectJust "not_boot_dep" (mgReachable mod_graph (NodeKey_Module (key NotBoot)))
where
key ib = ModNodeKeyWithUid (GWIB mn ib) uid
@@ -1497,7 +1500,7 @@ topSortModules drop_hs_boot_nodes summaries mb_root_mod
= node
| otherwise
= throwGhcException (ProgramError "module does not exist")
- in graphFromEdgedVerticesUniq (seq root (reachableG graph root))
+ in graphFromEdgedVerticesUniq (seq root (root:reachableFromG graph root))
newtype ModNodeMap a = ModNodeMap { unModNodeMap :: Map.Map ModNodeKey a }
deriving (Functor, Traversable, Foldable)
@@ -1829,8 +1832,11 @@ checkHomeUnitsClosed ue
-- downwards closure of graph
downwards_closure
- = graphFromEdgedVerticesUniq [ DigraphNode uid uid (Set.toList deps)
- | (uid, deps) <- M.toList (allReachable graph node_key)]
+ = graphFromEdgedVerticesUniq [ DigraphNode uid uid deps
+ | (uid_k, deps_k) <- IM.toList $ reachabilityIndex graph
+ , let uid = node_key $ nodeLookupByIx graph uid_k
+ , let deps = map (node_key . nodeLookupByIx graph) (IS.toList deps_k)
+ ]
inverse_closure = transposeG downwards_closure
=====================================
compiler/GHC/Linker/Deps.hs
=====================================
@@ -51,7 +51,6 @@ import Control.Monad
import Control.Applicative
import qualified Data.Set as Set
-import qualified Data.Map as M
import Data.List (isSuffixOf)
import System.FilePath
@@ -166,16 +165,16 @@ get_link_deps opts pls maybe_normal_osuf span mods = do
make_deps_loop found@(found_units, found_mods) (nk:nexts)
| NodeKey_Module nk `Set.member` found_mods = make_deps_loop found nexts
| otherwise =
- case M.lookup (NodeKey_Module nk) (mgTransDeps mod_graph) of
- Just trans_deps ->
- let deps = Set.insert (NodeKey_Module nk) trans_deps
- -- See #936 and the ghci.prog007 test for why we have to continue traversing through
- -- boot modules.
- todo_boot_mods = [ModNodeKeyWithUid (GWIB mn NotBoot) uid | NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid) <- Set.toList trans_deps]
- in make_deps_loop (found_units, deps `Set.union` found_mods) (todo_boot_mods ++ nexts)
- Nothing ->
+ case fmap mkNodeKey <$> mgReachable mod_graph (NodeKey_Module nk) of
+ Nothing ->
let (ModNodeKeyWithUid _ uid) = nk
in make_deps_loop (addOneToUniqDSet found_units uid, found_mods) nexts
+ Just trans_deps ->
+ let deps = Set.insert (NodeKey_Module nk) (Set.fromList trans_deps)
+ -- See #936 and the ghci.prog007 test for why we have to continue traversing through
+ -- boot modules.
+ todo_boot_mods = [ModNodeKeyWithUid (GWIB mn NotBoot) uid | NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid) <- trans_deps]
+ in make_deps_loop (found_units, deps `Set.union` found_mods) (todo_boot_mods ++ nexts)
mkNk m = ModNodeKeyWithUid (GWIB (moduleName m) NotBoot) (moduleUnitId m)
(init_pkg_set, all_deps) = make_deps_loop (emptyUniqDSet, Set.empty) $ map mkNk (filterOut isInteractiveModule mods)
=====================================
compiler/GHC/Unit/Module/Graph.hs
=====================================
@@ -18,11 +18,12 @@ module GHC.Unit.Module.Graph
, mgModSummaries
, mgModSummaries'
, mgLookupModule
- , mgTransDeps
, showModMsg
, moduleGraphNodeModule
, moduleGraphNodeModSum
, moduleGraphModulesBelow
+ , mgReachable
+ , mgQuery
, moduleGraphNodes
, SummaryNode
@@ -72,6 +73,7 @@ import Data.Bifunctor
import Data.Function
import Data.List (sort)
import GHC.Data.List.SetOps
+import GHC.Stack
-- | A '@ModuleGraphNode@' is a node in the '@ModuleGraph@'.
-- Edges between nodes mark dependencies arising from module imports
@@ -153,7 +155,7 @@ instance Outputable ModNodeKeyWithUid where
-- 'GHC.topSortModuleGraph' and 'GHC.Data.Graph.Directed.flattenSCC' to achieve this.
data ModuleGraph = ModuleGraph
{ mg_mss :: [ModuleGraphNode]
- , mg_trans_deps :: Map.Map NodeKey (Set.Set NodeKey)
+ , mg_graph :: (Graph SummaryNode, NodeKey -> Maybe SummaryNode)
-- A cached transitive dependency calculation so that a lot of work is not
-- repeated whenever the transitive dependencies need to be calculated (for example, hptInstances)
}
@@ -173,13 +175,9 @@ unionMG a b =
let new_mss = nubOrdBy compare $ mg_mss a `mappend` mg_mss b
in ModuleGraph {
mg_mss = new_mss
- , mg_trans_deps = mkTransDeps new_mss
+ , mg_graph = moduleGraphNodes False new_mss
}
-
-mgTransDeps :: ModuleGraph -> Map.Map NodeKey (Set.Set NodeKey)
-mgTransDeps = mg_trans_deps
-
mgModSummaries :: ModuleGraph -> [ModSummary]
mgModSummaries mg = [ m | ModuleNode _ m <- mgModSummaries' mg ]
@@ -199,7 +197,7 @@ mgLookupModule ModuleGraph{..} m = listToMaybe $ mapMaybe go mg_mss
go _ = Nothing
emptyMG :: ModuleGraph
-emptyMG = ModuleGraph [] Map.empty
+emptyMG = ModuleGraph [] (emptyGraph, const Nothing)
isTemplateHaskellOrQQNonBoot :: ModSummary -> Bool
isTemplateHaskellOrQQNonBoot ms =
@@ -212,14 +210,9 @@ isTemplateHaskellOrQQNonBoot ms =
extendMG :: ModuleGraph -> [NodeKey] -> ModSummary -> ModuleGraph
extendMG ModuleGraph{..} deps ms = ModuleGraph
{ mg_mss = ModuleNode deps ms : mg_mss
- , mg_trans_deps = mkTransDeps (ModuleNode deps ms : mg_mss)
+ , mg_graph = moduleGraphNodes False (ModuleNode deps ms : mg_mss)
}
-mkTransDeps :: [ModuleGraphNode] -> Map.Map NodeKey (Set.Set NodeKey)
-mkTransDeps mss =
- let (gg, _lookup_node) = moduleGraphNodes False mss
- in allReachable gg (mkNodeKey . node_payload)
-
extendMGInst :: ModuleGraph -> UnitId -> InstantiatedUnit -> ModuleGraph
extendMGInst mg uid depUnitId = mg
{ mg_mss = InstantiationNode uid depUnitId : mg_mss mg
@@ -394,12 +387,9 @@ type ModNodeKey = ModuleNameWithIsBoot
-- boot module and the non-boot module can be reached, it only returns the
-- non-boot one.
moduleGraphModulesBelow :: ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> Set ModNodeKeyWithUid
-moduleGraphModulesBelow mg uid mn = filtered_mods $ [ mn | NodeKey_Module mn <- modules_below]
+moduleGraphModulesBelow mg uid mn = filtered_mods [ mn | NodeKey_Module mn <- modules_below ]
where
- td_map = mgTransDeps mg
-
- modules_below = maybe [] Set.toList $ Map.lookup (NodeKey_Module (ModNodeKeyWithUid mn uid)) td_map
-
+ modules_below = maybe [] (map mkNodeKey) (mgReachable mg (NodeKey_Module (ModNodeKeyWithUid mn uid)))
filtered_mods = Set.fromDistinctAscList . filter_mods . sort
-- IsBoot and NotBoot modules are necessarily consecutive in the sorted list
@@ -415,3 +405,22 @@ moduleGraphModulesBelow mg uid mn = filtered_mods $ [ mn | NodeKey_Module mn <-
in r' : filter_mods rs
| otherwise -> r1 : filter_mods (r2:rs)
rs -> rs
+
+mgReachable :: HasCallStack => ModuleGraph -> NodeKey -> Maybe [ModuleGraphNode]
+mgReachable mg nk = map summaryNodeSummary <$> modules_below where
+ (td_map, lookup_node) = mg_graph mg
+ modules_below =
+ reachableFromG td_map <$> lookup_node nk
+
+-- | Reachability Query. @mgQuery(g, a, b)@ asks: Can we reach @b@ from @a@ in
+-- graph @g@?
+-- INVARIANT: Both @a@ and @b@ must be in @g at .
+mgQuery :: ModuleGraph -- ^ @g@
+ -> NodeKey -- ^ @a@
+ -> NodeKey -- ^ @b@
+ -> Bool -- ^ @b@ is reachable from @a@
+mgQuery mg nka nkb = reachableQuery td_map na nb where
+ (td_map, lookup_node) = mg_graph mg
+ na = expectJust "mgQuery:a" $ lookup_node nka
+ nb = expectJust "mgQuery:b" $ lookup_node nkb
+
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/6aed1559246007bb8c2049b790b2a8613e08e8c6...0b67174d08c3f1c19c02d43e50dec778c0709742
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/6aed1559246007bb8c2049b790b2a8613e08e8c6...0b67174d08c3f1c19c02d43e50dec778c0709742
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241119/815a28fe/attachment-0001.html>
More information about the ghc-commits
mailing list