[Git][ghc/ghc][wip/24275] driver: Really don't lose track of nodes when we fail to resolve cycles
Zubin (@wz1000)
gitlab at gitlab.haskell.org
Wed Dec 20 12:23:18 UTC 2023
Zubin pushed to branch wip/24275 at Glasgow Haskell Compiler / GHC
Commits:
0de44f90 by Zubin Duggal at 2023-12-20T17:52:35+05:30
driver: Really don't lose track of nodes when we fail to resolve cycles
This fixes a bug in 8db8d2fd1c881032b1b360c032b6d9d072c11723, where we could lose
track of acyclic components at the start of an unresolved cycle. We now ensure we
never loose track of any of these components.
As T24275 demonstrates, a "cyclic" SCC might not really be a true SCC:
When viewed without boot files, we have a single SCC
```
[REC main:T24275B [main:T24275B {-# SOURCE #-},
main:T24275A {-# SOURCE #-}]
main:T24275A [main:T24275A {-# SOURCE #-}]]
```
But with boot files this turns into
```
[NONREC main:T24275B {-# SOURCE #-} [],
REC main:T24275B [main:T24275B {-# SOURCE #-},
main:T24275A {-# SOURCE #-}]
main:T24275A {-# SOURCE #-} [main:T24275B],
NONREC main:T24275A [main:T24275A {-# SOURCE #-}]]
```
Note that this is truly not an SCC, as no nodes are reachable from T24275B.hs-boot.
However, we treat this entire group as a single "SCC" because it seems so when we
analyse the graph without taking boot files into account.
To handle this, I extended `findCycle` to not assume its input is an SCC, and to
try harder to find cycles in its input.
Fixes #24275
- - - - -
8 changed files:
- compiler/GHC/Data/Graph/Directed.hs
- compiler/GHC/Driver/Make.hs
- + testsuite/tests/driver/T24275/T24275.stderr
- + testsuite/tests/driver/T24275/T24275A.hs
- + testsuite/tests/driver/T24275/T24275A.hs-boot
- + testsuite/tests/driver/T24275/T24275B.hs
- + testsuite/tests/driver/T24275/T24275B.hs-boot
- + testsuite/tests/driver/T24275/all.T
Changes:
=====================================
compiler/GHC/Data/Graph/Directed.hs
=====================================
@@ -46,7 +46,7 @@ module GHC.Data.Graph.Directed (
import GHC.Prelude
-import GHC.Utils.Misc ( minWith, count )
+import GHC.Utils.Misc ( sortWith, count )
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Data.Maybe ( expectJust )
@@ -219,47 +219,52 @@ type WorkItem key payload
[payload]) -- Rest of the path;
-- [a,b,c] means c depends on b, b depends on a
--- | Find a reasonably short cycle a->b->c->a, in a strongly
--- connected component. The input nodes are presumed to be
--- a SCC, so you can start anywhere.
+-- | Find a reasonably short cycle a->b->c->a, in a graph
+-- The graph might not necessarily be strongly connected.
findCycle :: forall payload key. Ord key
=> [Node key payload] -- The nodes. The dependencies can
-- contain extra keys, which are ignored
-> Maybe [payload] -- A cycle, starting with node
-- so each depends on the next
findCycle graph
- = go Set.empty (new_work root_deps []) []
+ = goRoots plausible_roots
where
env :: Map.Map key (Node key payload)
env = Map.fromList [ (node_key node, node) | node <- graph ]
- -- Find the node with fewest dependencies among the SCC modules
+ goRoots [] = Nothing
+ goRoots (root:xs) =
+ case go Set.empty (new_work root_deps []) [] of
+ Nothing -> goRoots xs
+ Just res -> Just res
+ where
+ DigraphNode root_payload root_key root_deps = root
+ -- 'go' implements Dijkstra's algorithm, more or less
+ go :: Set.Set key -- Visited
+ -> [WorkItem key payload] -- Work list, items length n
+ -> [WorkItem key payload] -- Work list, items length n+1
+ -> Maybe [payload] -- Returned cycle
+ -- Invariant: in a call (go visited ps qs),
+ -- visited = union (map tail (ps ++ qs))
+
+ go _ [] [] = Nothing -- No cycles
+ go visited [] qs = go visited qs []
+ go visited (((DigraphNode payload key deps), path) : ps) qs
+ | key == root_key = Just (root_payload : reverse path)
+ | key `Set.member` visited = go visited ps qs
+ | key `Map.notMember` env = go visited ps qs
+ | otherwise = go (Set.insert key visited)
+ ps (new_qs ++ qs)
+ where
+ new_qs = new_work deps (payload : path)
+
+
+ -- Find the nodes with fewest dependencies among the SCC modules
-- This is just a heuristic to find some plausible root module
- root :: Node key payload
- root = fst (minWith snd [ (node, count (`Map.member` env)
- (node_dependencies node))
- | node <- graph ])
- DigraphNode root_payload root_key root_deps = root
-
-
- -- 'go' implements Dijkstra's algorithm, more or less
- go :: Set.Set key -- Visited
- -> [WorkItem key payload] -- Work list, items length n
- -> [WorkItem key payload] -- Work list, items length n+1
- -> Maybe [payload] -- Returned cycle
- -- Invariant: in a call (go visited ps qs),
- -- visited = union (map tail (ps ++ qs))
-
- go _ [] [] = Nothing -- No cycles
- go visited [] qs = go visited qs []
- go visited (((DigraphNode payload key deps), path) : ps) qs
- | key == root_key = Just (root_payload : reverse path)
- | key `Set.member` visited = go visited ps qs
- | key `Map.notMember` env = go visited ps qs
- | otherwise = go (Set.insert key visited)
- ps (new_qs ++ qs)
- where
- new_qs = new_work deps (payload : path)
+ plausible_roots :: [Node key payload]
+ plausible_roots = map fst (sortWith snd [ (node, count (`Map.member` env) (node_dependencies node))
+ | node <- graph ])
+
new_work :: [key] -> [payload] -> [WorkItem key payload]
new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ]
=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -642,7 +642,7 @@ createBuildPlan mod_graph maybe_top_mod =
collapseSCC :: [SCC ModuleGraphNode] -> Either [ModuleGraphNode] [(Either ModuleGraphNode ModuleGraphNodeWithBootFile)]
-- Must be at least two nodes, as we were in a cycle
collapseSCC [AcyclicSCC node1, AcyclicSCC node2] = Right [toNodeWithBoot node1, toNodeWithBoot node2]
- collapseSCC (AcyclicSCC node : nodes) = (toNodeWithBoot node :) <$> collapseSCC nodes
+ collapseSCC (AcyclicSCC node : nodes) = either (Left . (node :)) (Right . (toNodeWithBoot node :)) (collapseSCC nodes)
-- Cyclic
collapseSCC nodes = Left (flattenSCCs nodes)
=====================================
testsuite/tests/driver/T24275/T24275.stderr
=====================================
@@ -0,0 +1,4 @@
+Module graph contains a cycle:
+ module ‘T24275A’ (./T24275A.hs-boot)
+ imports module ‘T24275B’ (T24275B.hs)
+ which imports module ‘T24275A’ (./T24275A.hs-boot)
=====================================
testsuite/tests/driver/T24275/T24275A.hs
=====================================
@@ -0,0 +1 @@
+module T24275A where
=====================================
testsuite/tests/driver/T24275/T24275A.hs-boot
=====================================
@@ -0,0 +1,3 @@
+module T24275A where
+
+import T24275B
=====================================
testsuite/tests/driver/T24275/T24275B.hs
=====================================
@@ -0,0 +1,3 @@
+module T24275B where
+
+import {-# SOURCE #-} T24275A
=====================================
testsuite/tests/driver/T24275/T24275B.hs-boot
=====================================
@@ -0,0 +1 @@
+module T24275B where
=====================================
testsuite/tests/driver/T24275/all.T
=====================================
@@ -0,0 +1 @@
+test('T24275', extra_files(['T24275A.hs','T24275A.hs-boot','T24275B.hs-boot','T24275B.hs']), multimod_compile_fail, ['T24275B',''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0de44f902c6e86ddb79310fff8596820958c05e0
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0de44f902c6e86ddb79310fff8596820958c05e0
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231220/ab948bd3/attachment-0001.html>
More information about the ghc-commits
mailing list