[Git][ghc/ghc][wip/romes/refator-modulegraph] Refactor ModuleGraph interface

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Wed Nov 27 11:03:35 UTC 2024



Rodrigo Mesquita pushed to branch wip/romes/refator-modulegraph at Glasgow Haskell Compiler / GHC


Commits:
062076f1 by Rodrigo Mesquita at 2024-11-27T11:03:15+00:00
Refactor ModuleGraph interface

The 'ModuleGraph' abstraction represents the relationship and strucutre
of the modules being compiled. This structure is meant to be constructed
once at the start of compilation, and never changed again.

However, it's exposed interface was confusing and exposed too many
footguns which led to inneficient usages of the ModuleGraph. This commit
improves significantly the exported interface of ModuleGraph, taking
into consideration the recent improvements around reachability queries.

Since the ModuleGraph graphs and related structures (HPT, EPS) are
performance critical in the sense that somewhat simple mistakes can
cause bad leaks and non-linear memory usage, we want to have proper APIs
that guide efficient usage. This is a good step in that direction.

- - - - -


8 changed files:

- compiler/GHC.hs
- compiler/GHC/Driver/Env.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Driver/Make.hs
- compiler/GHC/Unit/Module/Graph.hs
- compiler/GHC/Unit/Module/ModSummary.hs
- testsuite/tests/ghc-api/downsweep/OldModLocation.hs
- testsuite/tests/ghc-api/downsweep/PartialDownsweep.hs


Changes:

=====================================
compiler/GHC.hs
=====================================
@@ -74,7 +74,7 @@ module GHC (
         compileToCoreModule, compileToCoreSimplified,
 
         -- * Inspecting the module structure of the program
-        ModuleGraph, emptyMG, mapMG, mkModuleGraph, mgModSummaries,
+        ModuleGraph, emptyMG, mgMap, mkModuleGraph, mgModSummaries,
         mgLookupModule,
         ModSummary(..), ms_mod_name, ModLocation(..),
         pattern ModLocation,
@@ -874,7 +874,7 @@ setProgramDynFlags_ invalidate_needed dflags = do
 --
 invalidateModSummaryCache :: GhcMonad m => m ()
 invalidateModSummaryCache =
-  modifySession $ \h -> h { hsc_mod_graph = mapMG inval (hsc_mod_graph h) }
+  modifySession $ \h -> h { hsc_mod_graph = mgMap inval (hsc_mod_graph h) }
  where
   inval ms = ms { ms_hs_hash = fingerprint0 }
 


=====================================
compiler/GHC/Driver/Env.hs
=====================================
@@ -41,6 +41,8 @@ where
 
 import GHC.Prelude
 
+import GHC.Builtin.Names ( gHC_PRIM )
+
 import GHC.Driver.DynFlags
 import GHC.Driver.Errors ( printOrThrowDiagnostics )
 import GHC.Driver.Errors.Types ( GhcMessage )
@@ -70,8 +72,6 @@ import GHC.Types.Name
 import GHC.Types.Name.Env
 import GHC.Types.TyThing
 
-import GHC.Builtin.Names ( gHC_PRIM )
-
 import GHC.Data.Maybe
 
 import GHC.Utils.Exception as Ex
@@ -274,9 +274,11 @@ hptSomeThingsBelowUs extract include_hi_boot hsc_env uid mn
         mg  = hsc_mod_graph hsc_env
     in
     [ thing
-    |
-    -- Find each non-hi-boot module below me
-      (ModNodeKeyWithUid (GWIB { gwib_mod = mod, gwib_isBoot = is_boot }) mod_uid) <- Set.toList (moduleGraphModulesBelow mg uid mn)
+      -- "Finding each non-hi-boot module below me" maybe could be cached in the module
+      -- graph to avoid filtering the boots out of the transitive closure out
+      -- every time this is called
+    | (ModNodeKeyWithUid (GWIB { gwib_mod = mod, gwib_isBoot = is_boot }) mod_uid)
+          <- Set.toList (moduleGraphModulesBelow mg uid mn)
     , include_hi_boot || (is_boot == NotBoot)
 
         -- unsavoury: when compiling the base package with --make, we


=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -1453,7 +1453,7 @@ oneShotMsg logger recomp =
 batchMsg :: Messager
 batchMsg = batchMsgWith (\_ _ _ _ -> empty)
 batchMultiMsg :: Messager
-batchMultiMsg = batchMsgWith (\_ _ _ node -> brackets (ppr (moduleGraphNodeUnitId node)))
+batchMultiMsg = batchMsgWith (\_ _ _ node -> brackets (ppr (mgNodeUnitId node)))
 
 batchMsgWith :: (HscEnv -> (Int, Int) -> RecompileRequired -> ModuleGraphNode -> SDoc) -> Messager
 batchMsgWith extra hsc_env_start mod_index recomp node =
@@ -1469,7 +1469,7 @@ batchMsgWith extra hsc_env_start mod_index recomp node =
                     LinkNode {} -> "Linking"
                     InstantiationNode {} -> "Instantiating"
                     ModuleNode {} -> "Compiling"
-        hsc_env = hscSetActiveUnitId (moduleGraphNodeUnitId node) hsc_env_start
+        hsc_env = hscSetActiveUnitId (mgNodeUnitId node) hsc_env_start
         dflags = hsc_dflags hsc_env
         logger = hsc_logger hsc_env
         state  = hsc_units hsc_env


=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -250,11 +250,9 @@ depanalPartial diag_wrapper msg excluded_mods allow_dup_roots = do
     -- cached finder data.
     liftIO $ flushFinderCaches (hsc_FC hsc_env) (hsc_unit_env hsc_env)
 
-    (errs, graph_nodes) <- liftIO $ downsweep
+    (errs, mod_graph) <- liftIO $ downsweep
       hsc_env diag_wrapper msg (mgModSummaries old_graph)
       excluded_mods allow_dup_roots
-    let
-      mod_graph = mkModuleGraph graph_nodes
     return (unionManyMessages errs, mod_graph)
 
 -- | Collect the instantiations of dependencies to create 'InstantiationNode' work graph nodes.
@@ -289,7 +287,7 @@ linkNodes summaries uid hue =
       ofile = outputFile_ dflags
 
       unit_nodes :: [NodeKey]
-      unit_nodes = map mkNodeKey (filter ((== uid) . moduleGraphNodeUnitId) summaries)
+      unit_nodes = map mkNodeKey (filter ((== uid) . mgNodeUnitId) summaries)
   -- Issue a warning for the confusing case where the user
   -- said '-o foo' but we're not going to do any linking.
   -- We attempt linking if either (a) one of the modules is
@@ -632,13 +630,18 @@ createBuildPlan mod_graph maybe_top_mod =
 
         -- An environment mapping a module to its hs-boot file and all nodes on the path between the two, if one exists
         boot_modules = mkModuleEnv
-          [ (ms_mod ms, (m, boot_path (ms_mod_name ms) (ms_unitid ms))) | m@(ModuleNode _ ms) <- (mgModSummaries' mod_graph), isBootSummary ms == IsBoot]
+          [ (ms_mod ms, (m, boot_path (ms_mod_name ms) (ms_unitid ms)))
+            | m@(ModuleNode _ ms) <- mgModSummaries' mod_graph
+            , isBootSummary ms == IsBoot]
 
         select_boot_modules :: [ModuleGraphNode] -> [ModuleGraphNode]
         select_boot_modules = mapMaybe (fmap fst . get_boot_module)
 
         get_boot_module :: ModuleGraphNode -> Maybe (ModuleGraphNode, [ModuleGraphNode])
-        get_boot_module m = case m of ModuleNode _ ms | HsSrcFile <- ms_hsc_src ms -> lookupModuleEnv boot_modules (ms_mod ms); _ -> Nothing
+        get_boot_module (ModuleNode _ ms)
+          | HsSrcFile <- ms_hsc_src ms
+          = lookupModuleEnv boot_modules (ms_mod ms)
+        get_boot_module _ = Nothing
 
         -- Any cycles should be resolved now
         collapseSCC :: [SCC ModuleGraphNode] -> Either [ModuleGraphNode] [(Either ModuleGraphNode ModuleGraphNodeWithBootFile)]
@@ -668,8 +671,8 @@ createBuildPlan mod_graph maybe_top_mod =
 
   in
 
-    assertPpr (sum (map countMods build_plan) == length (mgModSummaries' mod_graph))
-              (vcat [text "Build plan missing nodes:", (text "PLAN:" <+> ppr (sum (map countMods build_plan))), (text "GRAPH:" <+> ppr (length (mgModSummaries' mod_graph )))])
+    assertPpr (sum (map countMods build_plan) == lengthMG mod_graph)
+              (vcat [text "Build plan missing nodes:", (text "PLAN:" <+> ppr (sum (map countMods build_plan))), (text "GRAPH:" <+> ppr (lengthMG mod_graph))])
               build_plan
 
 mkWorkerLimit :: DynFlags -> IO WorkerLimit
@@ -1142,7 +1145,7 @@ interpretBuildPlan hug mhmi_cache old_hpt plan = do
       !build_map <- getBuildMap
       hug_var <- gets hug_var
       -- 1. Get the direct dependencies of this module
-      let direct_deps = nodeDependencies False mod
+      let direct_deps = mgNodeDependencies False mod
           -- It's really important to force build_deps, or the whole buildMap is retained,
           -- which would retain all the result variables, preventing us from collecting them
           -- after they are no longer used.
@@ -1150,14 +1153,14 @@ interpretBuildPlan hug mhmi_cache old_hpt plan = do
       let !build_action =
             case mod of
               InstantiationNode uid iu -> do
-                withCurrentUnit (moduleGraphNodeUnitId mod) $ do
+                withCurrentUnit (mgNodeUnitId mod) $ do
                   (hug, deps) <- wait_deps_hug hug_var build_deps
                   executeInstantiationNode mod_idx n_mods hug uid iu
                   return (Nothing, deps)
               ModuleNode _build_deps ms ->
                 let !old_hmi = M.lookup (msKey ms) old_hpt
                     rehydrate_mods = mapMaybe nodeKeyModName <$> rehydrate_nodes
-                in withCurrentUnit (moduleGraphNodeUnitId mod) $ do
+                in withCurrentUnit (mgNodeUnitId mod) $ do
                      (hug, deps) <- wait_deps_hug hug_var build_deps
                      hmi <- executeCompileNode mod_idx n_mods old_hmi hug rehydrate_mods ms
                      -- Write the HMI to an external cache (if one exists)
@@ -1166,9 +1169,9 @@ interpretBuildPlan hug mhmi_cache old_hpt plan = do
                      -- This global MVar is incrementally modified in order to avoid having to
                      -- recreate the HPT before compiling each module which leads to a quadratic amount of work.
                      liftIO $ modifyMVar_ hug_var (return . addHomeModInfoToHug hmi)
-                     return (Just hmi, addToModuleNameSet (moduleGraphNodeUnitId mod) (ms_mod_name ms) deps )
+                     return (Just hmi, addToModuleNameSet (mgNodeUnitId mod) (ms_mod_name ms) deps )
               LinkNode _nks uid -> do
-                  withCurrentUnit (moduleGraphNodeUnitId mod) $ do
+                  withCurrentUnit (mgNodeUnitId mod) $ do
                     (hug, deps) <- wait_deps_hug hug_var build_deps
                     executeLinkNode hug (mod_idx, n_mods) uid direct_deps
                     return (Nothing, deps)
@@ -1570,7 +1573,7 @@ downsweep :: HscEnv
           -> Bool               -- True <=> allow multiple targets to have
                                 --          the same module name; this is
                                 --          very useful for ghc -M
-          -> IO ([DriverMessages], [ModuleGraphNode])
+          -> IO ([DriverMessages], ModuleGraph)
                 -- The non-error elements of the returned list all have distinct
                 -- (Modules, IsBoot) identifiers, unless the Bool is true in
                 -- which case there can be repeats
@@ -1595,7 +1598,7 @@ downsweep_imports :: HscEnv
                   -> [ModuleName]
                   -> Bool
                   -> ([(UnitId, DriverMessages)], [ModSummary])
-                  -> IO ([DriverMessages], [ModuleGraphNode])
+                  -> IO ([DriverMessages], ModuleGraph)
 downsweep_imports hsc_env old_summaries excl_mods allow_dup_roots (root_errs, rootSummariesOk)
    = do
        let root_map = mkRootMap rootSummariesOk
@@ -1620,7 +1623,7 @@ downsweep_imports hsc_env old_summaries excl_mods allow_dup_roots (root_errs, ro
        th_enabled_nodes <- enableCodeGenForTH logger tmpfs unit_env all_nodes
        if null all_root_errs
          then return (all_errs, th_enabled_nodes)
-         else pure $ (all_root_errs, [])
+         else pure $ (all_root_errs, emptyMG)
      where
         -- Dependencies arising on a unit (backpack and module linking deps)
         unitModuleNodes :: [ModuleGraphNode] -> UnitId -> HomeUnitEnv -> [Either (Messages DriverMessage) ModuleGraphNode]
@@ -1870,7 +1873,7 @@ enableCodeGenForTH
   -> TmpFs
   -> UnitEnv
   -> [ModuleGraphNode]
-  -> IO [ModuleGraphNode]
+  -> IO ModuleGraph
 enableCodeGenForTH logger tmpfs unit_env =
   enableCodeGenWhen logger tmpfs TFL_CurrentModule TFL_GhcSession unit_env
 
@@ -1893,19 +1896,19 @@ enableCodeGenWhen
   -> TempFileLifetime
   -> UnitEnv
   -> [ModuleGraphNode]
-  -> IO [ModuleGraphNode]
-enableCodeGenWhen logger tmpfs staticLife dynLife unit_env mod_graph =
-  mapM enable_code_gen mod_graph
+  -> IO ModuleGraph
+enableCodeGenWhen logger tmpfs staticLife dynLife unit_env mod_graph = do
+  mgMapM enable_code_gen mg
   where
     defaultBackendOf ms = platformDefaultBackend (targetPlatform $ ue_unitFlags (ms_unitid ms) unit_env)
-    enable_code_gen :: ModuleGraphNode -> IO ModuleGraphNode
-    enable_code_gen n@(ModuleNode deps ms)
+    enable_code_gen :: ModSummary -> IO ModSummary
+    enable_code_gen ms
       | ModSummary
         { ms_location = ms_location
         , ms_hsc_src = HsSrcFile
         , ms_hspp_opts = dflags
         } <- ms
-      , Just enable_spec <- mkNodeKey n `Map.lookup` needs_codegen_map =
+      , Just enable_spec <- needs_codegen_map (NodeKey_Module (msKey ms)) =
       if | nocode_enable ms -> do
                let new_temp_file suf dynsuf = do
                      tn <- newTempName logger tmpfs (tmpDir dflags) staticLife suf
@@ -1937,7 +1940,7 @@ enableCodeGenWhen logger tmpfs staticLife dynLife unit_env mod_graph =
                      , ms_hspp_opts = updOptLevel 0 $ new_dflags
                      }
                -- Recursive call to catch the other cases
-               enable_code_gen (ModuleNode deps ms')
+               enable_code_gen ms'
 
          -- If -fprefer-byte-code then satisfy dependency by enabling bytecode (if normal object not enough)
          -- we only get to this case if the default backend is already generating object files, but we need dynamic
@@ -1947,21 +1950,21 @@ enableCodeGenWhen logger tmpfs staticLife dynLife unit_env mod_graph =
                      { ms_hspp_opts = gopt_set (ms_hspp_opts ms) Opt_ByteCodeAndObjectCode
                      }
                -- Recursive call to catch the other cases
-               enable_code_gen (ModuleNode deps ms')
+               enable_code_gen ms'
          | dynamic_too_enable enable_spec ms -> do
                let ms' = ms
                      { ms_hspp_opts = gopt_set (ms_hspp_opts ms) Opt_BuildDynamicToo
                      }
                -- Recursive call to catch the other cases
-               enable_code_gen (ModuleNode deps ms')
+               enable_code_gen ms'
          | ext_interp_enable ms -> do
                let ms' = ms
                      { ms_hspp_opts = gopt_set (ms_hspp_opts ms) Opt_ExternalInterpreter
                      }
                -- Recursive call to catch the other cases
-               enable_code_gen (ModuleNode deps ms')
+               enable_code_gen ms'
 
-         | otherwise -> return n
+         | otherwise -> return ms
 
     enable_code_gen ms = return ms
 
@@ -2018,23 +2021,23 @@ enableCodeGenWhen logger tmpfs staticLife dynLife unit_env mod_graph =
        lcl_dflags   = ms_hspp_opts ms
        internalInterpreter = not (gopt Opt_ExternalInterpreter lcl_dflags)
 
-    (mg, lookup_node) = moduleGraphNodes False mod_graph
-
-    mk_needed_set roots = Set.fromList $ map (mkNodeKey . node_payload) $ reachablesG mg (map (expectJust "needs_th" . lookup_node) roots)
+    mg = mkModuleGraph mod_graph
 
-    needs_obj_set, needs_bc_set :: Set.Set NodeKey
-    needs_obj_set = mk_needed_set need_obj_set
+    needs_obj_set, needs_bc_set :: NodeKey -> Bool
+    needs_obj_set k = k `elem` need_obj_set || mgQueryMany mg need_obj_set k
 
-    needs_bc_set = mk_needed_set need_bc_set
+    needs_bc_set k = k `elem` need_obj_set || mgQueryMany mg need_bc_set k
 
     -- A map which tells us how to enable code generation for a NodeKey
-    needs_codegen_map :: Map.Map NodeKey CodeGenEnable
-    needs_codegen_map =
+    needs_codegen_map :: NodeKey -> Maybe CodeGenEnable
+    needs_codegen_map nk =
       -- Another option here would be to just produce object code, rather than both object and
       -- byte code
-      Map.unionWith (\_ _ -> EnableByteCodeAndObject)
-        (Map.fromList $ [(m, EnableObject) | m <- Set.toList needs_obj_set])
-        (Map.fromList $ [(m, EnableByteCode) | m <- Set.toList needs_bc_set])
+      case (needs_obj_set nk, needs_bc_set nk) of
+        (True, True)   -> Just EnableByteCodeAndObject
+        (True, False)  -> Just EnableObject
+        (False, True)  -> Just EnableByteCode
+        (False, False) -> Nothing
 
     -- The direct dependencies of modules which require object code
     need_obj_set =
@@ -2470,14 +2473,14 @@ cyclicModuleErr mss
        Just path -> mkPlainErrorMsgEnvelope src_span $
                     GhcDriverMessage $ DriverModuleGraphCycle path
         where
-          src_span = maybe noSrcSpan (mkFileSrcSpan . ms_location) (moduleGraphNodeModSum (head path))
+          src_span = maybe noSrcSpan (mkFileSrcSpan . ms_location) (mgNodeModSum (head path))
   where
     graph :: [Node NodeKey ModuleGraphNode]
     graph =
       [ DigraphNode
         { node_payload = ms
         , node_key = mkNodeKey ms
-        , node_dependencies = nodeDependencies False ms
+        , node_dependencies = mgNodeDependencies False ms
         }
       | ms <- mss
       ]


=====================================
compiler/GHC/Unit/Module/Graph.hs
=====================================
@@ -2,52 +2,106 @@
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE DeriveTraversable #-}
 
+-- | A module graph should be constructed once and never change from there onwards.
+--
+-- The only operations should be for building the 'ModuleGraph'
+-- (once and for all -- no update-like/insert-like functions)
+-- and querying the structure in various ways, e.g. to determine reachability.
+--
+-- We should avoid exposing fields like 'mg_mss' since it may be a footgun
+-- trying to use the nodes directly... We do still expose it, but it feels like
+-- all its use cases would be better served by a more proper ModuleGraph
+-- abstraction
 module GHC.Unit.Module.Graph
-   ( ModuleGraph
-   , ModuleGraphNode(..)
-   , nodeDependencies
+   (
+    -- * Construct a module graph
+    --
+    -- | A module graph should be constructed once by downsweep and never modified.
+     ModuleGraph
    , emptyMG
    , mkModuleGraph
-   , extendMG
-   , extendMGInst
-   , extendMG'
-   , unionMG
-   , isTemplateHaskellOrQQNonBoot
-   , filterToposortToModules
-   , mapMG
+
+    -- * Nodes in a module graph
+    --
+    -- | The user-facing nodes in a module graph are 'ModuleGraphNode's.
+    -- There are a few things which we can query out of each 'ModuleGraphNode':
+    --
+    -- - 'mgNodeDependencies' gets the immediate dependencies of this node
+    -- - 'mgNodeUnitId' returns the 'UnitId' of that node
+    -- - 'mgNodeModSum' extracts the 'ModSummary' of a node if exists
+   , ModuleGraphNode(..)
+   , mgNodeDependencies
+   , mgNodeModSum
+   , mgNodeUnitId
+
+    -- * Module graph operations
+   , lengthMG
+
+    -- ** 'ModSummary' operations
+    --
+    -- | A couple of operations on the module graph allow access to the
+    -- 'ModSummary's of the modules in it contained.
+    --
+    -- In particular, 'mapMG' and 'mapMGM' allow updating these 'ModSummary's
+    -- (without changing the 'ModuleGraph' structure itself!).
+    -- 'mgModSummaries' lists out all 'ModSummary's, and
+    -- 'mgLookupModule' looks up a 'ModSummary' for a given module.
+   , mgMap, mgMapM
    , mgModSummaries
-   , mgModSummaries'
    , mgLookupModule
-   , showModMsg
-   , moduleGraphNodeModule
-   , moduleGraphNodeModSum
-   , moduleGraphModulesBelow
+
+    -- ** Reachability queries
+    --
+    -- | A module graph explains the structure and relationship between the
+    -- modules being compiled. Often times, this structure is relevant to
+    -- answer reachability queries -- is X reachable from Y; or, what is the
+    -- transitive closure of Z?
    , mgReachable
    , mgQuery
-
+   , mgQueryMany
+
+    -- ** Other operations
+    --
+    -- | These operations allow more-internal-than-ideal access to the
+    -- ModuleGraph structure. Ideally, we could restructure the code using
+    -- these functions to avoid deconstructing/reconstructing the ModuleGraph
+    -- and instead extend the "proper interface" of the ModuleGraph to achieve
+    -- what is currently done but through a better abstraction.
+   , mgModSummaries'
    , moduleGraphNodes
-   , SummaryNode
-   , summaryNodeSummary
+   , moduleGraphModulesBelow -- needed for 'hptSomeThingsBelowUs',
+                             -- but I think we could be more clever and cache
+                             -- the graph-ixs of boot modules to efficiently
+                             -- filter them out of the returned list.
+                             -- hptInstancesBelow is re-doing that work every
+                             -- time it's called.
+   , filterToposortToModules
 
+    -- * Keys into the 'ModuleGraph'
    , NodeKey(..)
+   , mkNodeKey
    , nodeKeyUnitId
    , nodeKeyModName
    , ModNodeKey
-   , mkNodeKey
+   , ModNodeKeyWithUid(..)
    , msKey
 
+    -- ** Internal node representation
+    --
+    -- | 'SummaryNode' is the internal representation for each node stored in
+    -- the graph. It's not immediately clear to me why users do depend on them.
+   , SummaryNode
+   , summaryNodeSummary
+   , summaryNodeKey
 
-   , moduleGraphNodeUnitId
-
-   , ModNodeKeyWithUid(..)
+    -- * Utilities
+   , showModMsg
    )
 where
 
 import GHC.Prelude
 import GHC.Platform
 
-import qualified GHC.LanguageExtensions as LangExt
-
 import GHC.Data.Maybe
 import GHC.Data.Graph.Directed
 import GHC.Data.Graph.Directed.Reachability
@@ -73,7 +127,44 @@ import GHC.Linker.Static.Utils
 import Data.Bifunctor
 import Data.Function
 import Data.List (sort)
-import GHC.Data.List.SetOps
+import Control.Monad
+
+-- | A '@ModuleGraph@' contains all the nodes from the home package (only). See
+-- '@ModuleGraphNode@' for information about the nodes.
+--
+-- Modules need to be compiled. hs-boots need to be typechecked before
+-- the associated "real" module so modules with {-# SOURCE #-} imports can be
+-- built. Instantiations also need to be typechecked to ensure that the module
+-- fits the signature. Substantiation typechecking is roughly comparable to the
+-- check that the module and its hs-boot agree.
+--
+-- The graph is not necessarily stored in topologically-sorted order. Use
+-- 'GHC.topSortModuleGraph' and 'GHC.Data.Graph.Directed.flattenSCC' to achieve this.
+data ModuleGraph = ModuleGraph
+  { mg_mss :: [ModuleGraphNode]
+  , mg_graph :: (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
+    -- A cached transitive dependency calculation so that a lot of work is not
+    -- repeated whenever the transitive dependencies need to be calculated (for example, hptInstances)
+  }
+
+-- | Why do we ever need to construct empty graphs? Is it because of one shot mode?
+emptyMG :: ModuleGraph
+emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing)
+
+-- | Construct a module graph. This function should be the only entry point for
+-- building a 'ModuleGraph', since it is supposed to be built once and never modified.
+--
+-- If you ever find the need to build a 'ModuleGraph' iteratively, don't
+-- add insert and update functions to the API since they become footguns.
+-- Instead, design an API that allows iterative construction without posterior
+-- modification, perhaps like what is done for building arrays from mutable
+-- arrays.
+mkModuleGraph :: [ModuleGraphNode] -> ModuleGraph
+mkModuleGraph = foldr (flip extendMG') emptyMG
+
+--------------------------------------------------------------------------------
+-- * Module Graph Nodes
+--------------------------------------------------------------------------------
 
 -- | A '@ModuleGraphNode@' is a node in the '@ModuleGraph@'.
 -- Edges between nodes mark dependencies arising from module imports
@@ -87,16 +178,36 @@ data ModuleGraphNode
   -- | Link nodes are whether are are creating a linked product (ie executable/shared object etc) for a unit.
   | LinkNode [NodeKey] UnitId
 
-moduleGraphNodeModule :: ModuleGraphNode -> Maybe ModuleName
-moduleGraphNodeModule mgn = ms_mod_name <$> (moduleGraphNodeModSum mgn)
+-- | Collect the immediate dependencies of a ModuleGraphNode,
+-- optionally avoiding hs-boot dependencies.
+-- If the drop_hs_boot_nodes flag is False, and if this is a .hs and there is
+-- an equivalent .hs-boot, add a link from the former to the latter.  This
+-- has the effect of detecting bogus cases where the .hs-boot depends on the
+-- .hs, by introducing a cycle.  Additionally, it ensures that we will always
+-- process the .hs-boot before the .hs, and so the HomePackageTable will always
+-- have the most up to date information.
+mgNodeDependencies :: Bool -> ModuleGraphNode -> [NodeKey]
+mgNodeDependencies drop_hs_boot_nodes = \case
+    LinkNode deps _uid -> deps
+    InstantiationNode uid iuid ->
+      NodeKey_Module . (\mod -> ModNodeKeyWithUid (GWIB mod NotBoot) uid)  <$> uniqDSetToList (instUnitHoles iuid)
+    ModuleNode deps _ms ->
+      map drop_hs_boot deps
+  where
+    -- Drop hs-boot nodes by using HsSrcFile as the key
+    hs_boot_key | drop_hs_boot_nodes = NotBoot -- is regular mod or signature
+                | otherwise          = IsBoot
+
+    drop_hs_boot (NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid)) = (NodeKey_Module (ModNodeKeyWithUid (GWIB mn hs_boot_key) uid))
+    drop_hs_boot x = x
 
-moduleGraphNodeModSum :: ModuleGraphNode -> Maybe ModSummary
-moduleGraphNodeModSum (InstantiationNode {}) = Nothing
-moduleGraphNodeModSum (LinkNode {})          = Nothing
-moduleGraphNodeModSum (ModuleNode _ ms)      = Just ms
+mgNodeModSum :: ModuleGraphNode -> Maybe ModSummary
+mgNodeModSum (InstantiationNode {}) = Nothing
+mgNodeModSum (LinkNode {})          = Nothing
+mgNodeModSum (ModuleNode _ ms)      = Just ms
 
-moduleGraphNodeUnitId :: ModuleGraphNode -> UnitId
-moduleGraphNodeUnitId mgn =
+mgNodeUnitId :: ModuleGraphNode -> UnitId
+mgNodeUnitId mgn =
   case mgn of
     InstantiationNode uid _iud -> uid
     ModuleNode _ ms           -> toUnitId (moduleUnit (ms_mod ms))
@@ -114,79 +225,43 @@ instance Eq ModuleGraphNode where
 instance Ord ModuleGraphNode where
   compare = compare `on` mkNodeKey
 
-data NodeKey = NodeKey_Unit {-# UNPACK #-} !InstantiatedUnit
-             | NodeKey_Module {-# UNPACK #-} !ModNodeKeyWithUid
-             | NodeKey_Link !UnitId
-  deriving (Eq, Ord)
-
-instance Outputable NodeKey where
-  ppr nk = pprNodeKey nk
+--------------------------------------------------------------------------------
+-- * Module Graph operations
+--------------------------------------------------------------------------------
 
-pprNodeKey :: NodeKey -> SDoc
-pprNodeKey (NodeKey_Unit iu) = ppr iu
-pprNodeKey (NodeKey_Module mk) = ppr mk
-pprNodeKey (NodeKey_Link uid)  = ppr uid
+-- | Returns the number of nodes in a 'ModuleGraph'
+lengthMG :: ModuleGraph -> Int
+lengthMG = length . mg_mss
 
-nodeKeyUnitId :: NodeKey -> UnitId
-nodeKeyUnitId (NodeKey_Unit iu)   = instUnitInstanceOf iu
-nodeKeyUnitId (NodeKey_Module mk) = mnkUnitId mk
-nodeKeyUnitId (NodeKey_Link uid)  = uid
-
-nodeKeyModName :: NodeKey -> Maybe ModuleName
-nodeKeyModName (NodeKey_Module mk) = Just (gwib_mod $ mnkModuleName mk)
-nodeKeyModName _ = Nothing
-
-data ModNodeKeyWithUid = ModNodeKeyWithUid { mnkModuleName :: !ModuleNameWithIsBoot
-                                           , mnkUnitId     :: !UnitId } deriving (Eq, Ord)
-
-instance Outputable ModNodeKeyWithUid where
-  ppr (ModNodeKeyWithUid mnwib uid) = ppr uid <> colon <> ppr mnwib
-
--- | A '@ModuleGraph@' contains all the nodes from the home package (only). See
--- '@ModuleGraphNode@' for information about the nodes.
---
--- Modules need to be compiled. hs-boots need to be typechecked before
--- the associated "real" module so modules with {-# SOURCE #-} imports can be
--- built. Instantiations also need to be typechecked to ensure that the module
--- fits the signature. Substantiation typechecking is roughly comparable to the
--- check that the module and its hs-boot agree.
---
--- The graph is not necessarily stored in topologically-sorted order.  Use
--- 'GHC.topSortModuleGraph' and 'GHC.Data.Graph.Directed.flattenSCC' to achieve this.
-data ModuleGraph = ModuleGraph
-  { mg_mss :: [ModuleGraphNode]
-  , mg_graph :: (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
-    -- A cached transitive dependency calculation so that a lot of work is not
-    -- repeated whenever the transitive dependencies need to be calculated (for example, hptInstances)
-  }
+--------------------------------------------------------------------------------
+-- ** ModSummaries
+--------------------------------------------------------------------------------
 
 -- | Map a function 'f' over all the 'ModSummaries'.
--- To preserve invariants 'f' can't change the isBoot status.
-mapMG :: (ModSummary -> ModSummary) -> ModuleGraph -> ModuleGraph
-mapMG f mg at ModuleGraph{..} = mg
+-- To preserve invariants, 'f' can't change the isBoot status.
+mgMap :: (ModSummary -> ModSummary) -> ModuleGraph -> ModuleGraph
+mgMap f mg at ModuleGraph{..} = mg
   { mg_mss = flip fmap mg_mss $ \case
       InstantiationNode uid iuid -> InstantiationNode uid iuid
       LinkNode uid nks -> LinkNode uid nks
       ModuleNode deps ms  -> ModuleNode deps (f ms)
   }
 
-unionMG :: ModuleGraph -> ModuleGraph -> ModuleGraph
-unionMG a b =
-  let new_mss = nubOrdBy compare $ mg_mss a `mappend` mg_mss b
-  in ModuleGraph {
-        mg_mss = new_mss
-      , mg_graph = mkTransDeps new_mss
-      }
-
-mkTransDeps :: [ModuleGraphNode] -> (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
-mkTransDeps = first graphReachability {- module graph is acyclic -} . moduleGraphNodes False
+-- | Map a function 'f' over all the 'ModSummaries', in 'IO'.
+-- To preserve invariants, 'f' can't change the isBoot status.
+mgMapM :: (ModSummary -> IO ModSummary) -> ModuleGraph -> IO ModuleGraph
+mgMapM f mg at ModuleGraph{..} = do
+  mss' <- forM mg_mss $ \case
+    InstantiationNode uid iuid -> pure $ InstantiationNode uid iuid
+    LinkNode uid nks -> pure $ LinkNode uid nks
+    ModuleNode deps ms  -> ModuleNode deps <$> (f ms)
+  return mg
+    { mg_mss = mss'
+    }
 
 mgModSummaries :: ModuleGraph -> [ModSummary]
 mgModSummaries mg = [ m | ModuleNode _ m <- mgModSummaries' mg ]
 
-mgModSummaries' :: ModuleGraph -> [ModuleGraphNode]
-mgModSummaries' = mg_mss
-
 -- | Look up a ModSummary in the ModuleGraph
 -- Looks up the non-boot ModSummary
 -- Linear in the size of the module graph
@@ -199,128 +274,53 @@ mgLookupModule ModuleGraph{..} m = listToMaybe $ mapMaybe go mg_mss
       = Just ms
     go _ = Nothing
 
-emptyMG :: ModuleGraph
-emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing)
-
-isTemplateHaskellOrQQNonBoot :: ModSummary -> Bool
-isTemplateHaskellOrQQNonBoot ms =
-  (xopt LangExt.TemplateHaskell (ms_hspp_opts ms)
-    || xopt LangExt.QuasiQuotes (ms_hspp_opts ms)) &&
-  (isBootSummary ms == NotBoot)
-
--- | Add an ExtendedModSummary to ModuleGraph. Assumes that the new ModSummary is
--- not an element of the ModuleGraph.
-extendMG :: ModuleGraph -> [NodeKey] -> ModSummary -> ModuleGraph
-extendMG ModuleGraph{..} deps ms = ModuleGraph
-  { mg_mss = ModuleNode deps ms : mg_mss
-  , mg_graph = mkTransDeps (ModuleNode deps ms : mg_mss)
-  }
-
-extendMGInst :: ModuleGraph -> UnitId -> InstantiatedUnit -> ModuleGraph
-extendMGInst mg uid depUnitId = mg
-  { mg_mss = InstantiationNode uid depUnitId : mg_mss mg
-  }
-
-extendMGLink :: ModuleGraph -> UnitId -> [NodeKey] -> ModuleGraph
-extendMGLink mg uid nks = mg { mg_mss = LinkNode nks uid : mg_mss mg }
+--------------------------------------------------------------------------------
+-- ** Reachability
+--------------------------------------------------------------------------------
 
-extendMG' :: ModuleGraph -> ModuleGraphNode -> ModuleGraph
-extendMG' mg = \case
-  InstantiationNode uid depUnitId -> extendMGInst mg uid depUnitId
-  ModuleNode deps ms -> extendMG mg deps ms
-  LinkNode deps uid   -> extendMGLink mg uid deps
-
-mkModuleGraph :: [ModuleGraphNode] -> ModuleGraph
-mkModuleGraph = foldr (flip extendMG') emptyMG
-
--- | This function filters out all the instantiation nodes from each SCC of a
--- topological sort. Use this with care, as the resulting "strongly connected components"
--- may not really be strongly connected in a direct way, as instantiations have been
--- removed. It would probably be best to eliminate uses of this function where possible.
-filterToposortToModules
-  :: [SCC ModuleGraphNode] -> [SCC ModSummary]
-filterToposortToModules = mapMaybe $ mapMaybeSCC $ \case
-  InstantiationNode _ _ -> Nothing
-  LinkNode{} -> Nothing
-  ModuleNode _deps node -> Just node
-  where
-    -- This higher order function is somewhat bogus,
-    -- as the definition of "strongly connected component"
-    -- is not necessarily respected.
-    mapMaybeSCC :: (a -> Maybe b) -> SCC a -> Maybe (SCC b)
-    mapMaybeSCC f = \case
-      AcyclicSCC a -> AcyclicSCC <$> f a
-      CyclicSCC as -> case mapMaybe f as of
-        [] -> Nothing
-        [a] -> Just $ AcyclicSCC a
-        as -> Just $ CyclicSCC as
-
-showModMsg :: DynFlags -> Bool -> ModuleGraphNode -> SDoc
-showModMsg dflags _ (LinkNode {}) =
-      let staticLink = case ghcLink dflags of
-                          LinkStaticLib -> True
-                          _ -> False
-
-          platform  = targetPlatform dflags
-          arch_os   = platformArchOS platform
-          exe_file  = exeFileName arch_os staticLink (outputFile_ dflags)
-      in text exe_file
-showModMsg _ _ (InstantiationNode _uid indef_unit) =
-  ppr $ instUnitInstanceOf indef_unit
-showModMsg dflags recomp (ModuleNode _ mod_summary) =
-  if gopt Opt_HideSourcePaths dflags
-      then text mod_str
-      else hsep $
-         [ text (mod_str ++ replicate (max 0 (16 - length mod_str)) ' ')
-         , char '('
-         , text (op $ msHsFilePath mod_summary) <> char ','
-         , message, char ')' ]
-
-  where
-    op       = normalise
-    mod_str  = moduleNameString (moduleName (ms_mod mod_summary)) ++
-               hscSourceString (ms_hsc_src mod_summary)
-    dyn_file = op $ msDynObjFilePath mod_summary
-    obj_file = op $ msObjFilePath mod_summary
-    files    = [ obj_file ]
-               ++ [ dyn_file | gopt Opt_BuildDynamicToo dflags ]
-               ++ [ "interpreted" | gopt Opt_ByteCodeAndObjectCode dflags ]
-    message = case backendSpecialModuleSource (backend dflags) recomp of
-                Just special -> text special
-                Nothing -> foldr1 (\ofile rest -> ofile <> comma <+> rest) (map text files)
-
-
-
-type SummaryNode = Node Int ModuleGraphNode
+-- | Return all nodes reachable from the given 'NodeKey'.
+mgReachable :: ModuleGraph -> NodeKey -> Maybe [ModuleGraphNode]
+mgReachable mg nk = map summaryNodeSummary <$> modules_below where
+  (td_map, lookup_node) = mg_graph mg
+  modules_below =
+    allReachable td_map <$> lookup_node nk
 
-summaryNodeKey :: SummaryNode -> Int
-summaryNodeKey = node_key
+-- | Reachability Query.
+--
+-- @mgQuery(g, a, b)@ asks:
+-- Can we reach @b@ from @a@ in graph @g@?
+--
+-- Both @a@ and @b@ must be in @g at .
+mgQuery :: ModuleGraph -- ^ @g@
+        -> NodeKey -- ^ @a@
+        -> NodeKey -- ^ @b@
+        -> Bool -- ^ @b@ is reachable from @a@
+mgQuery mg nka nkb = isReachable td_map na nb where
+  (td_map, lookup_node) = mg_graph mg
+  na = expectJust "mgQuery:a" $ lookup_node nka
+  nb = expectJust "mgQuery:b" $ lookup_node nkb
 
-summaryNodeSummary :: SummaryNode -> ModuleGraphNode
-summaryNodeSummary = node_payload
+-- | Many roots reachability Query.
+--
+-- @mgQuery(g, roots, b)@ asks:
+-- Can we reach @b@ from any of the @roots@ in graph @g@?
+--
+-- Node @b@ must be in @g at .
+mgQueryMany :: ModuleGraph -- ^ @g@
+            -> [NodeKey] -- ^ @roots@
+            -> NodeKey -- ^ @b@
+            -> Bool -- ^ @b@ is reachable from @roots@
+mgQueryMany mg roots nkb = isReachableMany td_map nroots nb where
+  (td_map, lookup_node) = mg_graph mg
+  nroots = mapMaybe lookup_node roots
+  nb = expectJust "mgQuery:b" $ lookup_node nkb
 
--- | Collect the immediate dependencies of a ModuleGraphNode,
--- optionally avoiding hs-boot dependencies.
--- If the drop_hs_boot_nodes flag is False, and if this is a .hs and there is
--- an equivalent .hs-boot, add a link from the former to the latter.  This
--- has the effect of detecting bogus cases where the .hs-boot depends on the
--- .hs, by introducing a cycle.  Additionally, it ensures that we will always
--- process the .hs-boot before the .hs, and so the HomePackageTable will always
--- have the most up to date information.
-nodeDependencies :: Bool -> ModuleGraphNode -> [NodeKey]
-nodeDependencies drop_hs_boot_nodes = \case
-    LinkNode deps _uid -> deps
-    InstantiationNode uid iuid ->
-      NodeKey_Module . (\mod -> ModNodeKeyWithUid (GWIB mod NotBoot) uid)  <$> uniqDSetToList (instUnitHoles iuid)
-    ModuleNode deps _ms ->
-      map drop_hs_boot deps
-  where
-    -- Drop hs-boot nodes by using HsSrcFile as the key
-    hs_boot_key | drop_hs_boot_nodes = NotBoot -- is regular mod or signature
-                | otherwise          = IsBoot
+--------------------------------------------------------------------------------
+-- ** Other operations (read haddocks on export list)
+--------------------------------------------------------------------------------
 
-    drop_hs_boot (NodeKey_Module (ModNodeKeyWithUid (GWIB mn IsBoot) uid)) = (NodeKey_Module (ModNodeKeyWithUid (GWIB mn hs_boot_key) uid))
-    drop_hs_boot x = x
+mgModSummaries' :: ModuleGraph -> [ModuleGraphNode]
+mgModSummaries' = mg_mss
 
 -- | Turn a list of graph nodes into an efficient queriable graph.
 -- The first boolean parameter indicates whether nodes corresponding to hs-boot files
@@ -340,16 +340,16 @@ moduleGraphNodes drop_hs_boot_nodes summaries =
                 ModuleNode __deps ms | isBootSummary ms == IsBoot, drop_hs_boot_nodes
                   -- Using nodeDependencies here converts dependencies on other
                   -- boot files to dependencies on dependencies on non-boot files.
-                  -> Left (ms_mod ms, nodeDependencies drop_hs_boot_nodes s)
+                  -> Left (ms_mod ms, mgNodeDependencies drop_hs_boot_nodes s)
                 _ -> normal_case
           where
            normal_case =
-              let lkup_key = ms_mod <$> moduleGraphNodeModSum s
+              let lkup_key = ms_mod <$> mgNodeModSum s
                   extra = (lkup_key >>= \key -> Map.lookup key boot_summaries)
 
               in Right $ DigraphNode s key $ out_edge_keys $
                       (fromMaybe [] extra
-                        ++ nodeDependencies drop_hs_boot_nodes s)
+                        ++ mgNodeDependencies drop_hs_boot_nodes s)
 
     numbered_summaries = zip summaries [1..]
 
@@ -370,20 +370,6 @@ moduleGraphNodes drop_hs_boot_nodes summaries =
     out_edge_keys = mapMaybe lookup_key
         -- If we want keep_hi_boot_nodes, then we do lookup_key with
         -- IsBoot; else False
-newtype NodeMap a = NodeMap { unNodeMap :: Map.Map NodeKey a }
-  deriving (Functor, Traversable, Foldable)
-
-mkNodeKey :: ModuleGraphNode -> NodeKey
-mkNodeKey = \case
-  InstantiationNode _ iu -> NodeKey_Unit iu
-  ModuleNode _ x -> NodeKey_Module $ msKey x
-  LinkNode _ uid   -> NodeKey_Link uid
-
-msKey :: ModSummary -> ModNodeKeyWithUid
-msKey ms = ModNodeKeyWithUid (ms_mnwib ms) (ms_unitid ms)
-
-type ModNodeKey = ModuleNameWithIsBoot
-
 
 -- | This function returns all the modules belonging to the home-unit that can
 -- be reached by following the given dependencies. Additionally, if both the
@@ -409,21 +395,150 @@ moduleGraphModulesBelow mg uid mn = filtered_mods [ mn | NodeKey_Module mn <- mo
         | otherwise -> r1 : filter_mods (r2:rs)
       rs -> rs
 
-mgReachable :: ModuleGraph -> NodeKey -> Maybe [ModuleGraphNode]
-mgReachable mg nk = map summaryNodeSummary <$> modules_below where
-  (td_map, lookup_node) = mg_graph mg
-  modules_below =
-    allReachable td_map <$> lookup_node nk
+-- | This function filters out all the instantiation nodes from each SCC of a
+-- topological sort. Use this with care, as the resulting "strongly connected components"
+-- may not really be strongly connected in a direct way, as instantiations have been
+-- removed. It would probably be best to eliminate uses of this function where possible.
+filterToposortToModules
+  :: [SCC ModuleGraphNode] -> [SCC ModSummary]
+filterToposortToModules = mapMaybe $ mapMaybeSCC $ \case
+  InstantiationNode _ _ -> Nothing
+  LinkNode{} -> Nothing
+  ModuleNode _deps node -> Just node
+  where
+    -- This higher order function is somewhat bogus,
+    -- as the definition of "strongly connected component"
+    -- is not necessarily respected.
+    mapMaybeSCC :: (a -> Maybe b) -> SCC a -> Maybe (SCC b)
+    mapMaybeSCC f = \case
+      AcyclicSCC a -> AcyclicSCC <$> f a
+      CyclicSCC as -> case mapMaybe f as of
+        [] -> Nothing
+        [a] -> Just $ AcyclicSCC a
+        as -> Just $ CyclicSCC as
 
--- | Reachability Query. @mgQuery(g, a, b)@ asks: Can we reach @b@ from @a@ in
--- graph @g@?
--- INVARIANT: Both @a@ and @b@ must be in @g at .
-mgQuery :: ModuleGraph -- ^ @g@
-        -> NodeKey -- ^ @a@
-        -> NodeKey -- ^ @b@
-        -> Bool -- ^ @b@ is reachable from @a@
-mgQuery mg nka nkb = isReachable td_map na nb where
-  (td_map, lookup_node) = mg_graph mg
-  na = expectJust "mgQuery:a" $ lookup_node nka
-  nb = expectJust "mgQuery:b" $ lookup_node nkb
+--------------------------------------------------------------------------------
+-- * Keys into ModuleGraph
+--------------------------------------------------------------------------------
+
+data NodeKey = NodeKey_Unit {-# UNPACK #-} !InstantiatedUnit
+             | NodeKey_Module {-# UNPACK #-} !ModNodeKeyWithUid
+             | NodeKey_Link !UnitId
+  deriving (Eq, Ord)
+
+instance Outputable NodeKey where
+  ppr (NodeKey_Unit iu)   = ppr iu
+  ppr (NodeKey_Module mk) = ppr mk
+  ppr (NodeKey_Link uid)  = ppr uid
+
+mkNodeKey :: ModuleGraphNode -> NodeKey
+mkNodeKey = \case
+  InstantiationNode _ iu -> NodeKey_Unit iu
+  ModuleNode _ x -> NodeKey_Module $ msKey x
+  LinkNode _ uid   -> NodeKey_Link uid
+
+nodeKeyUnitId :: NodeKey -> UnitId
+nodeKeyUnitId (NodeKey_Unit iu)   = instUnitInstanceOf iu
+nodeKeyUnitId (NodeKey_Module mk) = mnkUnitId mk
+nodeKeyUnitId (NodeKey_Link uid)  = uid
+
+nodeKeyModName :: NodeKey -> Maybe ModuleName
+nodeKeyModName (NodeKey_Module mk) = Just (gwib_mod $ mnkModuleName mk)
+nodeKeyModName _ = Nothing
+
+type ModNodeKey = ModuleNameWithIsBoot
+data ModNodeKeyWithUid = ModNodeKeyWithUid { mnkModuleName :: !ModuleNameWithIsBoot
+                                           , mnkUnitId     :: !UnitId } deriving (Eq, Ord)
+
+instance Outputable ModNodeKeyWithUid where
+  ppr (ModNodeKeyWithUid mnwib uid) = ppr uid <> colon <> ppr mnwib
+
+msKey :: ModSummary -> ModNodeKeyWithUid
+msKey ms = ModNodeKeyWithUid (ms_mnwib ms) (ms_unitid ms)
+
+--------------------------------------------------------------------------------
+-- ** Internal node representation (exposed)
+--------------------------------------------------------------------------------
+
+type SummaryNode = Node Int ModuleGraphNode
+
+summaryNodeKey :: SummaryNode -> Int
+summaryNodeKey = node_key
+
+summaryNodeSummary :: SummaryNode -> ModuleGraphNode
+summaryNodeSummary = node_payload
+
+--------------------------------------------------------------------------------
+-- * Misc utilities
+--------------------------------------------------------------------------------
+
+showModMsg :: DynFlags -> Bool -> ModuleGraphNode -> SDoc
+showModMsg dflags _ (LinkNode {}) =
+      let staticLink = case ghcLink dflags of
+                          LinkStaticLib -> True
+                          _ -> False
+
+          platform  = targetPlatform dflags
+          arch_os   = platformArchOS platform
+          exe_file  = exeFileName arch_os staticLink (outputFile_ dflags)
+      in text exe_file
+showModMsg _ _ (InstantiationNode _uid indef_unit) =
+  ppr $ instUnitInstanceOf indef_unit
+showModMsg dflags recomp (ModuleNode _ mod_summary) =
+  if gopt Opt_HideSourcePaths dflags
+      then text mod_str
+      else hsep $
+         [ text (mod_str ++ replicate (max 0 (16 - length mod_str)) ' ')
+         , char '('
+         , text (op $ msHsFilePath mod_summary) <> char ','
+         , message, char ')' ]
+
+  where
+    op       = normalise
+    mod_str  = moduleNameString (moduleName (ms_mod mod_summary)) ++
+               hscSourceString (ms_hsc_src mod_summary)
+    dyn_file = op $ msDynObjFilePath mod_summary
+    obj_file = op $ msObjFilePath mod_summary
+    files    = [ obj_file ]
+               ++ [ dyn_file | gopt Opt_BuildDynamicToo dflags ]
+               ++ [ "interpreted" | gopt Opt_ByteCodeAndObjectCode dflags ]
+    message = case backendSpecialModuleSource (backend dflags) recomp of
+                Just special -> text special
+                Nothing -> foldr1 (\ofile rest -> ofile <> comma <+> rest) (map text files)
+
+--------------------------------------------------------------------------------
+-- * Internal methods for module graph
+--
+-- These are *really* meant to be internal!
+-- Don't expose them without careful consideration about the invariants
+-- described in the export list haddocks.
+--------------------------------------------------------------------------------
+
+newtype NodeMap a = NodeMap { unNodeMap :: Map.Map NodeKey a }
+  deriving (Functor, Traversable, Foldable)
+
+mkTransDeps :: [ModuleGraphNode] -> (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode)
+mkTransDeps = first graphReachability {- module graph is acyclic -} . moduleGraphNodes False
+
+extendMG' :: ModuleGraph -> ModuleGraphNode -> ModuleGraph
+extendMG' mg = \case
+  InstantiationNode uid depUnitId -> extendMGInst mg uid depUnitId
+  ModuleNode deps ms -> extendMG mg deps ms
+  LinkNode deps uid   -> extendMGLink mg uid deps
+
+-- | Add an ExtendedModSummary to ModuleGraph. Assumes that the new ModSummary is
+-- not an element of the ModuleGraph.
+extendMG :: ModuleGraph -> [NodeKey] -> ModSummary -> ModuleGraph
+extendMG ModuleGraph{..} deps ms = ModuleGraph
+  { mg_mss = ModuleNode deps ms : mg_mss
+  , mg_graph = mkTransDeps (ModuleNode deps ms : mg_mss)
+  }
+
+extendMGInst :: ModuleGraph -> UnitId -> InstantiatedUnit -> ModuleGraph
+extendMGInst mg uid depUnitId = mg
+  { mg_mss = InstantiationNode uid depUnitId : mg_mss mg
+  }
+
+extendMGLink :: ModuleGraph -> UnitId -> [NodeKey] -> ModuleGraph
+extendMGLink mg uid nks = mg { mg_mss = LinkNode nks uid : mg_mss mg }
 


=====================================
compiler/GHC/Unit/Module/ModSummary.hs
=====================================
@@ -24,12 +24,14 @@ module GHC.Unit.Module.ModSummary
    , msDynObjFileOsPath
    , msDeps
    , isBootSummary
+   , isTemplateHaskellOrQQNonBoot
    , findTarget
    )
 where
 
 import GHC.Prelude
 
+import qualified GHC.LanguageExtensions as LangExt
 import GHC.Hs
 
 import GHC.Driver.DynFlags
@@ -163,6 +165,12 @@ msDynObjFileOsPath ms = ml_dyn_obj_file_ospath (ms_location ms)
 isBootSummary :: ModSummary -> IsBootInterface
 isBootSummary ms = if ms_hsc_src ms == HsBootFile then IsBoot else NotBoot
 
+isTemplateHaskellOrQQNonBoot :: ModSummary -> Bool
+isTemplateHaskellOrQQNonBoot ms =
+  (xopt LangExt.TemplateHaskell (ms_hspp_opts ms)
+    || xopt LangExt.QuasiQuotes (ms_hspp_opts ms)) &&
+  (isBootSummary ms == NotBoot)
+
 ms_mnwib :: ModSummary -> ModuleNameWithIsBoot
 ms_mnwib ms = GWIB (ms_mod_name ms) (isBootSummary ms)
 
@@ -206,4 +214,3 @@ findTarget ms ts =
     _ `matches` _
         = False
 
-


=====================================
testsuite/tests/ghc-api/downsweep/OldModLocation.hs
=====================================
@@ -61,7 +61,7 @@ main = do
     -- using the 'location' parameter we'd end up using the old location of
     -- the "B" module in this test. Make sure that doesn't happen.
 
-    hPrint stderr $ sort (map (ml_hs_file . ms_location) (mapMaybe moduleGraphNodeModSum nodes))
+    hPrint stderr $ sort (map (ml_hs_file . ms_location) (mapMaybe mgNodeModSum nodes))
 
 writeMod :: [String] -> IO ()
 writeMod src@(head -> stripPrefix "module " -> Just (takeWhile (/=' ') -> mod))


=====================================
testsuite/tests/ghc-api/downsweep/PartialDownsweep.hs
=====================================
@@ -171,7 +171,7 @@ go label mods cnd =
     hsc_env <- getSession
     (_, nodes) <- liftIO $ downsweep hsc_env mkUnknownDiagnostic Nothing [] [] False
 
-    it label $ cnd (mapMaybe moduleGraphNodeModSum nodes)
+    it label $ cnd (mapMaybe mgNodeModSum nodes)
 
 
 writeMod :: [String] -> IO ()



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/062076f1469da23aa81bcc8bf87038141f71dc2e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/062076f1469da23aa81bcc8bf87038141f71dc2e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241127/5f36a1f1/attachment-0001.html>


More information about the ghc-commits mailing list