[Git][ghc/ghc][wip/home-unit-closure-fast] driver: Make `checkHomeUnitsClosed` faster

Zubin (@wz1000) gitlab at gitlab.haskell.org
Wed Mar 20 12:29:01 UTC 2024



Zubin pushed to branch wip/home-unit-closure-fast at Glasgow Haskell Compiler / GHC


Commits:
37bc734d by Zubin Duggal at 2024-03-20T17:58:50+05:30
driver: Make `checkHomeUnitsClosed` faster

The implementation of `checkHomeUnitsClosed` was traversing every single path
in the unit dependency graph - this grows exponentially and quickly grows to be
infeasible on larger unit dependency graphs.

Instead we replace this with a faster implementation which follows from the
specificiation of the closure property - there is a closure error if there are
units which are both are both (transitively) depended upon by home units and
(transitively) depend on home units, but are not themselves home units.

To compute the set of units required for closure, we first compute the closure
of the unit dependency graph, then the transpose of this closure, and find all
units that are reachable from the home units in the transpose of the closure.

- - - - -


5 changed files:

- compiler/GHC/Driver/Make.hs
- + testsuite/tests/driver/multipleHomeUnits/mhu-perf/Makefile
- + testsuite/tests/driver/multipleHomeUnits/mhu-perf/Setup.hs
- + testsuite/tests/driver/multipleHomeUnits/mhu-perf/all.T
- + testsuite/tests/driver/multipleHomeUnits/mhu-perf/genLargeHMU


Changes:

=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -1565,8 +1565,8 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
        (root_errs, rootSummariesOk) <- partitionWithM getRootSummary roots -- #17549
        let root_map = mkRootMap rootSummariesOk
        checkDuplicates root_map
-       (deps, pkg_deps, map0) <- loopSummaries rootSummariesOk (M.empty, Set.empty, root_map)
-       let closure_errs = checkHomeUnitsClosed (hsc_unit_env hsc_env) (hsc_all_home_unit_ids hsc_env) (Set.toList pkg_deps)
+       (deps, map0) <- loopSummaries rootSummariesOk (M.empty, root_map)
+       let closure_errs = checkHomeUnitsClosed (hsc_unit_env hsc_env) (hsc_all_home_unit_ids hsc_env)
        let unit_env = hsc_unit_env hsc_env
        let tmpfs    = hsc_tmpfs    hsc_env
 
@@ -1660,19 +1660,19 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
 
         -- This loops over all the mod summaries in the dependency graph, accumulates the actual dependencies for each module/unit
         loopSummaries :: [ModSummary]
-              -> (M.Map NodeKey ModuleGraphNode, Set.Set (UnitId, UnitId),
+              -> (M.Map NodeKey ModuleGraphNode,
                     DownsweepCache)
-              -> IO ((M.Map NodeKey ModuleGraphNode), Set.Set (UnitId, UnitId), DownsweepCache)
+              -> IO ((M.Map NodeKey ModuleGraphNode), DownsweepCache)
         loopSummaries [] done = return done
-        loopSummaries (ms:next) (done, pkgs, summarised)
+        loopSummaries (ms:next) (done, summarised)
           | Just {} <- M.lookup k done
-          = loopSummaries next (done, pkgs, summarised)
+          = loopSummaries next (done, summarised)
           -- Didn't work out what the imports mean yet, now do that.
           | otherwise = do
-             (final_deps, pkgs1, done', summarised') <- loopImports (calcDeps ms) done summarised
+             (final_deps, done', summarised') <- loopImports (calcDeps ms) done summarised
              -- This has the effect of finding a .hs file if we are looking at the .hs-boot file.
-             (_, _, done'', summarised'') <- loopImports (maybeToList hs_file_for_boot) done' summarised'
-             loopSummaries next (M.insert k (ModuleNode final_deps ms) done'', pkgs1 `Set.union` pkgs, summarised'')
+             (_, done'', summarised'') <- loopImports (maybeToList hs_file_for_boot) done' summarised'
+             loopSummaries next (M.insert k (ModuleNode final_deps ms) done'', summarised'')
           where
             k = NodeKey_Module (msKey ms)
 
@@ -1692,18 +1692,17 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
                         -- Visited set; the range is a list because
                         -- the roots can have the same module names
                         -- if allow_dup_roots is True
-             -> IO ([NodeKey], Set.Set (UnitId, UnitId),
-
+             -> IO ([NodeKey],
                   M.Map NodeKey ModuleGraphNode, DownsweepCache)
                         -- The result is the completed NodeMap
-        loopImports [] done summarised = return ([], Set.empty, done, summarised)
+        loopImports [] done summarised = return ([], done, summarised)
         loopImports ((home_uid,mb_pkg, gwib) : ss) done summarised
           | Just summs <- M.lookup cache_key summarised
           = case summs of
               [Right ms] -> do
                 let nk = NodeKey_Module (msKey ms)
-                (rest, pkgs, summarised', done') <- loopImports ss done summarised
-                return (nk: rest, pkgs, summarised', done')
+                (rest, summarised', done') <- loopImports ss done summarised
+                return (nk: rest, summarised', done')
               [Left _err] ->
                 loopImports ss done summarised
               _errs ->  do
@@ -1715,20 +1714,20 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
                                        Nothing excl_mods
                case mb_s of
                    NotThere -> loopImports ss done summarised
-                   External uid -> do
-                    (other_deps, pkgs, done', summarised') <- loopImports ss done summarised
-                    return (other_deps, Set.insert (homeUnitId home_unit, uid) pkgs, done', summarised')
+                   External _ -> do
+                    (other_deps, done', summarised') <- loopImports ss done summarised
+                    return (other_deps, done', summarised')
                    FoundInstantiation iud -> do
-                    (other_deps, pkgs, done', summarised') <- loopImports ss done summarised
-                    return (NodeKey_Unit iud : other_deps, pkgs, done', summarised')
+                    (other_deps, done', summarised') <- loopImports ss done summarised
+                    return (NodeKey_Unit iud : other_deps, done', summarised')
                    FoundHomeWithError (_uid, e) ->  loopImports ss done (Map.insert cache_key [(Left e)] summarised)
                    FoundHome s -> do
-                     (done', pkgs1, summarised') <-
-                       loopSummaries [s] (done, Set.empty, Map.insert cache_key [Right s] summarised)
-                     (other_deps, pkgs2, final_done, final_summarised) <- loopImports ss done' summarised'
+                     (done', summarised') <-
+                       loopSummaries [s] (done, Map.insert cache_key [Right s] summarised)
+                     (other_deps, final_done, final_summarised) <- loopImports ss done' summarised'
 
                      -- MP: This assumes that we can only instantiate non home units, which is probably fair enough for now.
-                     return (NodeKey_Module (msKey s) : other_deps, pkgs1 `Set.union` pkgs2, final_done, final_summarised)
+                     return (NodeKey_Module (msKey s) : other_deps, final_done, final_summarised)
           where
             cache_key = (home_uid, mb_pkg, unLoc <$> gwib)
             home_unit = ue_unitHomeUnit home_uid (hsc_unit_env hsc_env)
@@ -1737,47 +1736,52 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
 
 -- This function checks then important property that if both p and q are home units
 -- then any dependency of p, which transitively depends on q is also a home unit.
-checkHomeUnitsClosed ::  UnitEnv -> Set.Set UnitId -> [(UnitId, UnitId)] -> [DriverMessages]
--- Fast path, trivially closed.
-checkHomeUnitsClosed ue home_id_set home_imp_ids
-  | Set.size home_id_set == 1 = []
-  | otherwise =
-  let res = foldMap loop home_imp_ids
-  -- Now check whether everything which transitively depends on a home_unit is actually a home_unit
-  -- These units are the ones which we need to load as home packages but failed to do for some reason,
-  -- it's a bug in the tool invoking GHC.
-      bad_unit_ids = Set.difference res home_id_set
-  in if Set.null bad_unit_ids
-        then []
-        else [singleMessage $ mkPlainErrorMsgEnvelope rootLoc $ DriverHomePackagesNotClosed (Set.toList bad_unit_ids)]
-
+checkHomeUnitsClosed ::  UnitEnv -> Set.Set UnitId -> [DriverMessages]
+checkHomeUnitsClosed ue home_id_set
+    | Set.null bad_unit_ids = []
+    | otherwise = [singleMessage $ mkPlainErrorMsgEnvelope rootLoc $ DriverHomePackagesNotClosed (Set.toList bad_unit_ids)]
   where
+    bad_unit_ids = upwards_closure Set.\\ home_id_set
     rootLoc = mkGeneralSrcSpan (fsLit "<command line>")
-    -- TODO: This could repeat quite a bit of work but I struggled to write this function.
-    -- Which units transitively depend on a home unit
-    loop :: (UnitId, UnitId) -> Set.Set UnitId -- The units which transitively depend on a home unit
-    loop (from_uid, uid) =
-      let us = ue_findHomeUnitEnv from_uid ue in
-      let um = unitInfoMap (homeUnitEnv_units us) in
-      case lookupUniqMap um uid of
-        Nothing -> pprPanic "uid not found" (ppr uid)
-        Just ui ->
-          let depends = unitDepends ui
-              home_depends = Set.fromList depends `Set.intersection` home_id_set
-              other_depends = Set.fromList depends `Set.difference` home_id_set
-          in
-            -- Case 1: The unit directly depends on a home_id
-            if not (null home_depends)
-              then
-                let res = foldMap (loop . (from_uid,)) other_depends
-                in Set.insert uid res
-             -- Case 2: Check the rest of the dependencies, and then see if any of them depended on
-              else
-                let res = foldMap (loop . (from_uid,)) other_depends
-                in
-                  if not (Set.null res)
-                    then Set.insert uid res
-                    else res
+
+    graph :: Graph (Node UnitId UnitId)
+    graph = graphFromEdgedVerticesUniq graphNodes
+
+    -- downwards closure of graph
+    downwards_closure
+      = graphFromEdgedVerticesUniq [ DigraphNode uid uid (Set.toList deps)
+                                   | (uid, deps) <- M.toList (allReachable graph node_key)]
+
+    inverse_closure = transposeG downwards_closure
+
+    upwards_closure = Set.fromList $ map node_key $ reachablesG inverse_closure [DigraphNode uid uid [] | uid <- Set.toList home_id_set]
+
+    all_unit_direct_deps :: UniqMap UnitId (Set.Set UnitId)
+    all_unit_direct_deps
+      = unitEnv_foldWithKey go emptyUniqMap $ ue_home_unit_graph ue
+      where
+        go rest this this_uis =
+           plusUniqMap_C Set.union
+             (addToUniqMap_C Set.union external_depends this (Set.fromList $ this_deps))
+             rest
+           where
+             external_depends = mapUniqMap (Set.fromList . unitDepends) (unitInfoMap this_units)
+             this_units = homeUnitEnv_units this_uis
+             this_deps = [ toUnitId unit | (unit,Just _) <- explicitUnits this_units]
+
+    graphNodes :: [Node UnitId UnitId]
+    graphNodes = go Set.empty home_id_set
+      where
+        go done todo
+          = case Set.minView todo of
+              Nothing -> []
+              Just (uid, todo')
+                | Set.member uid done -> go done todo'
+                | otherwise -> case lookupUniqMap all_unit_direct_deps uid of
+                    Nothing -> pprPanic "uid not found" (ppr (uid, all_unit_direct_deps))
+                    Just depends ->
+                      let todo'' = (depends Set.\\ done) `Set.union` todo'
+                      in DigraphNode uid uid (Set.toList depends) : go (Set.insert uid done) todo''
 
 -- | Update the every ModSummary that is depended on
 -- by a module that needs template haskell. We enable codegen to


=====================================
testsuite/tests/driver/multipleHomeUnits/mhu-perf/Makefile
=====================================
@@ -0,0 +1,23 @@
+TOP=../../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+SETUP=../Setup -v0
+
+mhu-perf: clean
+	$(MAKE) -s --no-print-directory clean
+	./genLargeHMU
+	'$(GHC_PKG)' init tmp.d
+	'$(TEST_HC)' $(TEST_HC_OPTS) -v0 --make Setup
+	for dir in unit-p*; do \
+	  cd $$dir && $(SETUP) clean && $(SETUP) configure $(CABAL_MINIMAL_BUILD) --ipid=$$dir-0.1.0.0 --with-ghc='$(TEST_HC)' --with-hc-pkg='$(GHC_PKG)' --ghc-options='$(TEST_HC_OPTS)' --package-db=../tmp.d && $(SETUP) build && $(SETUP) register --inplace && cd ..; \
+  done;
+
+
+ifeq "$(CLEANUP)" "1"
+	$(MAKE) -s --no-print-directory clean
+endif
+
+clean :
+	 $(RM) -r unitTop* unit-p* top*/ tmp*.d inst-* *.o *.hi */*.o */*.hi */Setup$(exeext) */dist Setup$(exeext)
+


=====================================
testsuite/tests/driver/multipleHomeUnits/mhu-perf/Setup.hs
=====================================
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain


=====================================
testsuite/tests/driver/multipleHomeUnits/mhu-perf/all.T
=====================================
@@ -0,0 +1,8 @@
+test('mhu-perf',
+     [ collect_compiler_stats('bytes allocated',2),
+       pre_cmd('$MAKE -s --no-print-directory mhu-perf'),
+       extra_files(['genMultiComp','Setup.hs']),
+       compile_timeout_multiplier(5)
+     ],
+     multiunit_compile,
+     [['unitTop1', 'unitTop2'], '-fhide-source-paths'])


=====================================
testsuite/tests/driver/multipleHomeUnits/mhu-perf/genLargeHMU
=====================================
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Generate $DEPTH layers of packages with $WIDTH modules on each layer
+# Every package on layer N depends on all the packages on layer N-1
+# unitTop imports all the units from the last layer
+DEPTH=8
+WIDTH=8
+for i in $(seq -w 1 $WIDTH); do
+  mkdir unit-p0M$i
+  echo "module DummyLevel0M$i where" > unit-p0M$i/DummyLevel0M$i.hs;
+  cat > unit-p0M$i/unit-p0M$i.cabal <<EOF
+name: unit-p0M$i
+version: 0.1.0.0
+build-type: Simple
+cabal-version: >=1.10
+library
+  default-language: Haskell2010
+  exposed-modules: DummyLevel0M$i
+  build-depends: base
+EOF
+done
+for l in $(seq 1 $DEPTH); do
+  for i in $(seq -w 1 $WIDTH); do
+    mkdir unit-p${l}M$i
+    cat > unit-p${l}M$i/unit-p${l}M$i.cabal <<EOF
+name: unit-p${l}M$i
+version: 0.1.0.0
+build-type: Simple
+cabal-version: >=1.10
+library
+  default-language: Haskell2010
+  exposed-modules: DummyLevel${l}M$i
+  build-depends: base
+EOF
+    echo "module DummyLevel${l}M$i where" > unit-p${l}M$i/DummyLevel${l}M$i.hs;
+    for j in $(seq -w 1 $WIDTH); do
+      echo "    , unit-p$((l-1))M$j" >> unit-p${l}M$i/unit-p${l}M$i.cabal
+      echo "import DummyLevel$((l-1))M$j" >> unit-p${l}M$i/DummyLevel${l}M$i.hs;
+    done
+  done
+done
+mkdir top1
+echo "module Top1 where" > top1/Top1.hs
+echo "-package-db ./tmp.d -i -itop1 Top1 -this-unit-id unit-top1 -package base" > unitTop1;
+for j in $(seq -w 1 $WIDTH); do
+  echo "-package unit-p${DEPTH}M$j" >> unitTop1;
+  echo "import DummyLevel${DEPTH}M$j" >> top1/Top1.hs;
+done
+mkdir top2
+echo "module Top2 where" > top2/Top2.hs
+echo "-package-db ./tmp.d -i  -itop2 Top2 -this-unit-id unit-top2 -package base" > unitTop2;
+for j in $(seq -w 2 $WIDTH); do
+  echo "-package unit-p${DEPTH}M$j" >> unitTop2;
+  echo "import DummyLevel${DEPTH}M$j" >> top2/Top2.hs;
+done



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/37bc734d4d25d54b096a042b2c676620059599bc

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/37bc734d4d25d54b096a042b2c676620059599bc
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240320/77ea3de2/attachment-0001.html>


More information about the ghc-commits mailing list