[Git][ghc/ghc][wip/romes/graph-compact-easy] Fix performance issue
Matthew Pickering (@mpickering)
gitlab at gitlab.haskell.org
Fri Jan 3 13:01:15 UTC 2025
Matthew Pickering pushed to branch wip/romes/graph-compact-easy at Glasgow Haskell Compiler / GHC
Commits:
499c8c70 by Matthew Pickering at 2025-01-03T13:00:57+00:00
Fix performance issue
- - - - -
7 changed files:
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Driver/Env.hs
- compiler/GHC/Driver/Make.hs
- compiler/GHC/Tc/Module.hs
- compiler/GHC/Unit/Env.hs
- compiler/GHC/Unit/Home/Graph.hs
- compiler/GHC/Unit/Home/PackageTable.hs
Changes:
=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -99,7 +99,7 @@ core2core hsc_env guts@(ModGuts { mg_module = mod
dflags = hsc_dflags hsc_env
logger = hsc_logger hsc_env
extra_vars = interactiveInScope (hsc_IC hsc_env)
- home_pkg_rules = UnitEnv.hugRulesBelow (hsc_unit_env hsc_env) (hsc_mod_graph hsc_env) (moduleUnitId mod)
+ home_pkg_rules = rulesBelow hsc_env (moduleUnitId mod)
(GWIB { gwib_mod = moduleName mod, gwib_isBoot = NotBoot })
name_ppr_ctx = mkNamePprCtx ptc (hsc_unit_env hsc_env) rdr_env
ptc = initPromotionTickContext dflags
=====================================
compiler/GHC/Driver/Env.hs
=====================================
@@ -29,6 +29,10 @@ module GHC.Driver.Env
, lookupIfaceByModule
, mainModIs
+ , rulesBelow
+ , instancesBelow
+ , annsBelow
+
-- * Legacy API
, hscUpdateHPT
)
@@ -52,6 +56,8 @@ import GHC.Unit.Module.ModIface
import GHC.Unit.Module.ModDetails
import GHC.Unit.Home.ModInfo
import GHC.Unit.Home.PackageTable
+import GHC.Unit.Home.Graph
+import GHC.Unit.Module.Graph
import qualified GHC.Unit.Home.Graph as HUG
import GHC.Unit.Env as UnitEnv
import GHC.Unit.External
@@ -71,6 +77,13 @@ import GHC.Utils.Panic
import GHC.Utils.Misc
import GHC.Utils.Logger
+import GHC.Core.Rules
+import GHC.Types.Annotations
+import GHC.Types.CompleteMatch
+import GHC.Core.InstEnv
+import GHC.Core.FamInstEnv
+import GHC.Builtin.Names
+
import Data.IORef
import qualified Data.Set as Set
@@ -199,6 +212,126 @@ configured via command-line flags (in `GHC.setSessionDynFlags`).
hscEPS :: HscEnv -> IO ExternalPackageState
hscEPS hsc_env = readIORef (euc_eps (ue_eps (hsc_unit_env hsc_env)))
+
+--------------------------------------------------------------------------------
+-- TODO
+--------------------------------------------------------------------------------
+-- WE MAY WANT TO CACHE SOME OF THESE AS WE BUILD UP THE HPT, to make these
+-- queries O(1). But it's kind of hard because they wouldn't be rehydrated!!!!!
+-- Then we'd have the HPT itself rehydrated, but the cached fields with
+-- bad references.
+
+-- | Get annotations from all modules "below" this one (in the dependency
+-- sense) within the home units. If the module is @Nothing@, returns /all/
+-- annotations in the home units.
+annsBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO AnnEnv
+annsBelow hsc_env uid mn = hugAnnsBelow hsc_env uid mn
+
+---- | Get rules from modules "below" this one (in the dependency sense) within
+--the home units.
+rulesBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO RuleBase
+rulesBelow hsc_env uid mn = hugRulesBelow hsc_env uid mn
+
+-- | Find instances visible from the given set of imports
+instancesBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO (InstEnv, [FamInst])
+instancesBelow hsc_env uid mn = hugInstancesBelow hsc_env uid mn
+
+--------------------------------------------------------------------------------
+-- * Queries on Transitive Closure
+--------------------------------------------------------------------------------
+-- ROMES:TODO: Something else I want to do here is to receive a ModuleGraph and
+-- then use the fast reachability queries to determine whether something is
+-- reachable or not. That means we can very efficiently filter out things which
+-- are not part of the transitive closure...
+--
+-- So, e.g. it could probably be done faster by filtering out a cached list of
+-- rules using a 'ReachabilityIndex' as the filter $O(1)$ fast queries.
+
+-- ROMES:TODO: Do something about the
+--
+-- | isOneShot (ghcMode (hsc_dflags hsc_env)) = []
+--
+-- shortcut that we used when this was a function on HscEnv...
+
+-- | Find all rules in modules that are in the transitive closure of the given
+-- module.
+--
+-- $O(n)$ in the number of dependencies?
+hugRulesBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO RuleBase
+hugRulesBelow hsc uid mn = foldr (flip extendRuleBaseList) emptyRuleBase <$>
+ hugSomeThingsBelowUs (md_rules . hm_details) False hsc uid mn
+
+-- | Get annotations from modules "below" this one (in the dependency sense)
+--
+-- $O(n)$ in the number of dependencies?
+hugAnnsBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO AnnEnv
+hugAnnsBelow hsc uid mn = foldr (flip extendAnnEnvList) emptyAnnEnv <$>
+ hugSomeThingsBelowUs (md_anns . hm_details) False hsc uid mn
+
+hugInstancesBelow :: HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO (InstEnv, [FamInst])
+hugInstancesBelow hsc_env uid mnwib = do
+-- ouch... improve
+ let mn = gwib_mod mnwib
+ (insts, famInsts) <-
+ unzip . concat <$>
+ hugSomeThingsBelowUs (\mod_info ->
+ let details = hm_details mod_info
+ -- Don't include instances for the current module
+ in if moduleName (mi_module (hm_iface mod_info)) == mn
+ then []
+ else [(md_insts details, md_fam_insts details)])
+ True -- Include -hi-boot
+ hsc_env
+ uid
+ mnwib
+ return (foldl' unionInstEnv emptyInstEnv insts, concat famInsts)
+
+-- | Get things from modules in the transitive closure of the given module.
+--
+-- Note: Don't expose this function. We can improve the interface further --
+-- let's keep the queries on the HPT contained in this module so we can optimise
+-- internally without breaking the API to the rest of GHC. This is a footgun if
+-- exposed!
+--
+-- NOTE: We should be able to import this considerably with the reachability
+-- index and caching?...
+--
+-- For example, easiest to go through all modules and filter out the ones in the
+-- hpt via the module graph.........
+--
+-- TODO: This include_hi_boot business is also pretty weird. Do we need it at all?
+hugSomeThingsBelowUs :: (HomeModInfo -> [a]) -> Bool -> HscEnv -> UnitId -> ModuleNameWithIsBoot -> IO [[a]]
+hugSomeThingsBelowUs extract include_hi_boot hsc_env uid mn
+ = let hug = hsc_HUG hsc_env
+ mg = hsc_mod_graph hsc_env
+ in
+ sequence
+ [ things
+ -- "Finding each non-hi-boot module below me" maybe could be cached (well,
+ -- the inverse) in the module graph to avoid filtering the boots out of
+ -- the transitive closure out every time this is called
+ | (ModNodeKeyWithUid (GWIB { gwib_mod = mod, gwib_isBoot = is_boot }) mod_uid)
+ <- Set.toList (moduleGraphModulesBelow mg uid mn)
+ , include_hi_boot || (is_boot == NotBoot)
+
+ -- unsavoury: when compiling the base package with --make, we
+ -- sometimes try to look up RULES etc for GHC.Prim. GHC.Prim won't
+ -- be in the HPT, because we never compile it; it's in the EPT
+ -- instead. ToDo: clean up, and remove this slightly bogus filter:
+ , mod /= moduleName gHC_PRIM
+ , not (mod == gwib_mod mn && uid == mod_uid)
+
+ -- Look it up in the HUG
+ , let things = lookupHug hug mod_uid mod >>= \case
+ Just info -> return $ extract info
+ Nothing -> pprTrace "WARNING in hugSomeThingsBelowUs" msg mempty
+ msg = vcat [text "missing module" <+> ppr mod,
+ text "When starting from" <+> ppr mn,
+ text "below:" <+> ppr (moduleGraphModulesBelow mg uid mn),
+ text "Probable cause: out-of-date interface files"]
+ -- This really shouldn't happen, but see #962
+ ]
+
-- | Deal with gathering annotations in from all possible places
-- and combining them into a single 'AnnEnv'
prepareAnnotations :: HscEnv -> Maybe ModGuts -> IO AnnEnv
@@ -211,7 +344,7 @@ prepareAnnotations hsc_env mb_guts = do
-- entries regardless of dependency ordering.
get_mod mg = (moduleUnitId (mg_module mg), GWIB (moduleName (mg_module mg)) NotBoot)
home_pkg_anns <- fromMaybe (hugAllAnns (hsc_unit_env hsc_env))
- $ uncurry (hugAnnsBelow (hsc_unit_env hsc_env) (hsc_mod_graph hsc_env))
+ $ uncurry (hugAnnsBelow hsc_env)
. get_mod <$> mb_guts
let
other_pkg_anns = eps_ann_env eps
=====================================
compiler/GHC/Driver/Make.hs
=====================================
@@ -1212,11 +1212,11 @@ interpretBuildPlan hug mhmi_cache old_hpt plan = do
!build_deps = getDependencies (map gwib_mod deps) build_map
let loop_action = withCurrentUnit loop_unit $ do
!_ <- wait_deps build_deps
- hsc_env_TODO <- asks hsc_env
+ hsc_env <- asks hsc_env
let mns :: [ModuleName]
mns = mapMaybe (nodeKeyModName . gwib_mod) deps
- hmis' <- liftIO $ rehydrateAfter hsc_env_TODO mns
+ hmis' <- liftIO $ rehydrateAfter hsc_env mns
checkRehydrationInvariant hmis' deps
=====================================
compiler/GHC/Tc/Module.hs
=====================================
@@ -462,7 +462,7 @@ tcRnImports hsc_env import_decls
-- modules batch (@--make@) compiled before this one, but
-- which are not below this one.
; (home_insts, home_fam_insts) <- liftIO $
- UnitEnv.hugInstancesBelow (hsc_unit_env hsc_env) (hsc_mod_graph hsc_env) unitId mnwib
+ instancesBelow hsc_env unitId mnwib
-- Record boot-file info in the EPS, so that it's
-- visible to loadHiBootInterface in tcRnSrcDecls,
=====================================
compiler/GHC/Unit/Env.hs
=====================================
@@ -99,9 +99,6 @@ module GHC.Unit.Env
, hugCompleteSigs
, hugAllInstances
, hugAllAnns
- , hugAnnsBelow
- , hugRulesBelow
- , hugInstancesBelow
-- * Legacy API
@@ -141,21 +138,6 @@ import GHC.Core.FamInstEnv
-- The hard queries
--------------------------------------------------------------------------------
--- | Get annotations from all modules "below" this one (in the dependency
--- sense) within the home units. If the module is @Nothing@, returns /all/
--- annotations in the home units.
-hugAnnsBelow :: UnitEnv -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO AnnEnv
-hugAnnsBelow = HUG.annsBelow . ue_home_unit_graph
-
----- | Get rules from modules "below" this one (in the dependency sense) within
---the home units.
-hugRulesBelow :: UnitEnv -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO RuleBase
-hugRulesBelow = HUG.rulesBelow . ue_home_unit_graph
-
--- | Find instances visible from the given set of imports
-hugInstancesBelow :: UnitEnv -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO (InstEnv, [FamInst])
-hugInstancesBelow = HUG.instancesBelow . ue_home_unit_graph
-
-- | Find all the instance declarations (of classes and families) from
-- the Home Package Table filtered by the provided predicate function.
-- Used in @tcRnImports@, to select the instances that are in the
=====================================
compiler/GHC/Unit/Home/Graph.hs
=====================================
@@ -43,9 +43,6 @@ module GHC.Unit.Home.Graph
, allFamInstances
, allAnns
, allCompleteSigs
- , rulesBelow
- , annsBelow
- , instancesBelow
-- * Utilities
, hugSCCs
@@ -94,32 +91,6 @@ import GHC.Types.CompleteMatch
import GHC.Core.InstEnv
import GHC.Types.Name.Env
---------------------------------------------------------------------------------
--- TODO
---------------------------------------------------------------------------------
--- WE MAY WANT TO CACHE SOME OF THESE AS WE BUILD UP THE HPT, to make these
--- queries O(1). But it's kind of hard because they wouldn't be rehydrated!!!!!
--- Then we'd have the HPT itself rehydrated, but the cached fields with
--- bad references.
-
--- | Get annotations from all modules "below" this one (in the dependency
--- sense) within the home units. If the module is @Nothing@, returns /all/
--- annotations in the home units.
-annsBelow :: HomeUnitGraph -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO AnnEnv
-annsBelow hug mg uid mn = foldr go (pure emptyAnnEnv) hug where
- go hue = liftA2 plusAnnEnv (hptAnnsBelow (homeUnitEnv_hpt hue) mg uid mn)
-
----- | Get rules from modules "below" this one (in the dependency sense) within
---the home units.
-rulesBelow :: HomeUnitGraph -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO RuleBase
-rulesBelow hug mg uid mn = foldr go (pure emptyRuleBase) hug where
- go hue = liftA2 plusNameEnv (hptRulesBelow (homeUnitEnv_hpt hue) mg uid mn)
-
--- | Find instances visible from the given set of imports
-instancesBelow :: HomeUnitGraph -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO (InstEnv, [FamInst])
-instancesBelow hug mg uid mn = foldr go (pure (emptyInstEnv, [])) hug where
- go hue = liftA2 (\(a,b) (a',b') -> (a `unionInstEnv` a', b ++ b'))
- (hptInstancesBelow (homeUnitEnv_hpt hue) mg uid mn)
-- | Get all 'CompleteMatches' (arising from COMPLETE pragmas) present across
-- all home units.
=====================================
compiler/GHC/Unit/Home/PackageTable.hs
=====================================
@@ -58,15 +58,6 @@ module GHC.Unit.Home.PackageTable
, hptAllFamInstances
, hptAllAnnotations
- -- ** Transitive closure queries
- --
- -- | These are the queries which also require access to the 'ModuleGraph'
- -- which describes the structure of the modules, rather than being "global queries".
- -- Typically about the transitive closure
- , hptRulesBelow
- , hptAnnsBelow
- , hptInstancesBelow
-
-- ** More Traversal-based queries
, hptCollectDependencies
, hptCollectObjects
@@ -250,99 +241,6 @@ hptAllFamInstances = fmap mkModuleEnv . concatHpt (\hmi -> [(hmiModule hmi, hmiF
hptAllAnnotations :: HomePackageTable -> IO AnnEnv
hptAllAnnotations = fmap mkAnnEnv . concatHpt (md_anns . hm_details)
---------------------------------------------------------------------------------
--- * Queries on Transitive Closure
---------------------------------------------------------------------------------
--- ROMES:TODO: Something else I want to do here is to receive a ModuleGraph and
--- then use the fast reachability queries to determine whether something is
--- reachable or not. That means we can very efficiently filter out things which
--- are not part of the transitive closure...
---
--- So, e.g. it could probably be done faster by filtering out a cached list of
--- rules using a 'ReachabilityIndex' as the filter $O(1)$ fast queries.
-
--- ROMES:TODO: Do something about the
---
--- | isOneShot (ghcMode (hsc_dflags hsc_env)) = []
---
--- shortcut that we used when this was a function on HscEnv...
-
--- | Find all rules in modules that are in the transitive closure of the given
--- module.
---
--- $O(n)$ in the number of dependencies?
-hptRulesBelow :: HomePackageTable -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO RuleBase
-hptRulesBelow hpt mg uid mn = foldr (flip extendRuleBaseList) emptyRuleBase <$>
- hptSomeThingsBelowUs (md_rules . hm_details) False hpt mg uid mn
-
--- | Get annotations from modules "below" this one (in the dependency sense)
---
--- $O(n)$ in the number of dependencies?
-hptAnnsBelow :: HomePackageTable -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO AnnEnv
-hptAnnsBelow hpt mg uid mn = foldr (flip extendAnnEnvList) emptyAnnEnv <$>
- hptSomeThingsBelowUs (md_anns . hm_details) False hpt mg uid mn
-
-hptInstancesBelow :: HomePackageTable -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO (InstEnv, [FamInst])
-hptInstancesBelow hpt mg uid mnwib = do
--- ouch... improve
- let mn = gwib_mod mnwib
- (insts, famInsts) <-
- unzip . concat <$>
- hptSomeThingsBelowUs (\mod_info ->
- let details = hm_details mod_info
- -- Don't include instances for the current module
- in if moduleName (mi_module (hm_iface mod_info)) == mn
- then []
- else [(md_insts details, md_fam_insts details)])
- True -- Include -hi-boot
- hpt
- mg
- uid
- mnwib
- return (foldl' unionInstEnv emptyInstEnv insts, concat famInsts)
-
--- | Get things from modules in the transitive closure of the given module.
---
--- Note: Don't expose this function. We can improve the interface further --
--- let's keep the queries on the HPT contained in this module so we can optimise
--- internally without breaking the API to the rest of GHC. This is a footgun if
--- exposed!
---
--- NOTE: We should be able to import this considerably with the reachability
--- index and caching?...
---
--- For example, easiest to go through all modules and filter out the ones in the
--- hpt via the module graph.........
---
--- TODO: This include_hi_boot business is also pretty weird. Do we need it at all?
-hptSomeThingsBelowUs :: (HomeModInfo -> [a]) -> Bool -> HomePackageTable -> ModuleGraph -> UnitId -> ModuleNameWithIsBoot -> IO [[a]]
-hptSomeThingsBelowUs extract include_hi_boot hpt mg uid mn
- = sequence
- [ things
- -- "Finding each non-hi-boot module below me" maybe could be cached (well,
- -- the inverse) in the module graph to avoid filtering the boots out of
- -- the transitive closure out every time this is called
- | (ModNodeKeyWithUid (GWIB { gwib_mod = mod, gwib_isBoot = is_boot }) mod_uid)
- <- Set.toList (moduleGraphModulesBelow mg uid mn)
- , include_hi_boot || (is_boot == NotBoot)
-
- -- unsavoury: when compiling the base package with --make, we
- -- sometimes try to look up RULES etc for GHC.Prim. GHC.Prim won't
- -- be in the HPT, because we never compile it; it's in the EPT
- -- instead. ToDo: clean up, and remove this slightly bogus filter:
- , mod /= moduleName gHC_PRIM
- , not (mod == gwib_mod mn && uid == mod_uid)
-
- -- Look it up in the HPT
- , let things = lookupHpt hpt mod >>= \case
- Just info -> return $ extract info
- Nothing -> pprTrace "WARNING in hptSomeThingsBelowUs" msg mempty
- msg = vcat [text "missing module" <+> ppr mod,
- text "When starting from" <+> ppr mn,
- text "below:" <+> ppr (moduleGraphModulesBelow mg uid mn),
- text "Probable cause: out-of-date interface files"]
- -- This really shouldn't happen, but see #962
- ]
--------------------------------------------------------------------------------
-- * Traversal-based queries
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/499c8c70f73010262d843da475577f416d3cf9a6
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/499c8c70f73010262d843da475577f416d3cf9a6
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250103/1212bf26/attachment-0001.html>
More information about the ghc-commits
mailing list