[Git][ghc/ghc][wip/fprof-overloaded] add -fprof-late-overloaded and -fprof-late-overloaded-calls
Finley McIlwaine (@FinleyMcIlwaine)
gitlab at gitlab.haskell.org
Mon Mar 4 17:02:53 UTC 2024
Finley McIlwaine pushed to branch wip/fprof-overloaded at Glasgow Haskell Compiler / GHC
Commits:
61bb5ff6 by Finley McIlwaine at 2024-03-04T09:01:40-08:00
add -fprof-late-overloaded and -fprof-late-overloaded-calls
* Refactor late cost centre insertion for extensibility
* Add two more late cost centre insertion methods that add SCCs to overloaded
top level bindings and call sites with dictionary arguments.
* Some tests for the basic functionality of the new insertion methods
Resolves: #24500
- - - - -
26 changed files:
- compiler/GHC/Core/LateCC.hs
- + compiler/GHC/Core/LateCC/OverloadedCalls.hs
- + compiler/GHC/Core/LateCC/TopLevelBinds.hs
- + compiler/GHC/Core/LateCC/Types.hs
- + compiler/GHC/Core/LateCC/Utils.hs
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Tc/Utils/TcType.hs
- compiler/ghc.cabal.in
- docs/users_guide/9.10.1-notes.rst
- docs/users_guide/profiling.rst
- testsuite/tests/profiling/should_run/all.T
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.stdout
Changes:
=====================================
compiler/GHC/Core/LateCC.hs
=====================================
@@ -1,164 +1,90 @@
-{-# LANGUAGE DerivingStrategies #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE RecordWildCards #-}
--- | Adds cost-centers after the core piple has run.
+-- | Adds cost-centers after the core pipline has run.
module GHC.Core.LateCC
- ( addLateCostCentresMG
- , addLateCostCentresPgm
- , addLateCostCentres -- Might be useful for API users
- , Env(..)
+ ( -- * Inserting cost centres
+ addLateCostCenters
) where
-import Control.Applicative
-import Control.Monad
-import qualified Data.Set as S
-
import GHC.Prelude
-import GHC.Types.CostCentre
-import GHC.Types.CostCentre.State
-import GHC.Types.Name hiding (varName)
-import GHC.Types.Tickish
-import GHC.Unit.Module.ModGuts
-import GHC.Types.Var
-import GHC.Unit.Types
-import GHC.Data.FastString
-import GHC.Core
-import GHC.Core.Opt.Monad
-import GHC.Core.Utils (mkTick)
-import GHC.Types.Id
-import GHC.Driver.DynFlags
+import GHC.Core
+import GHC.Core.LateCC.OverloadedCalls
+import GHC.Core.LateCC.TopLevelBinds
+import GHC.Core.LateCC.Types
+import GHC.Core.LateCC.Utils
+import GHC.Core.Seq
+import qualified GHC.Data.Strict as Strict
+import GHC.Core.Utils
+import GHC.Tc.Utils.TcType
+import GHC.Types.SrcLoc
+import GHC.Utils.Error
import GHC.Utils.Logger
import GHC.Utils.Outputable
-import GHC.Utils.Misc
-import GHC.Utils.Error (withTiming)
-import GHC.Utils.Monad.State.Strict
-
-
-{- Note [Collecting late cost centres]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Usually cost centres defined by a module are collected
-during tidy by collectCostCentres. However with `-fprof-late`
-we insert cost centres after inlining. So we keep a list of
-all the cost centres we inserted and combine that with the list
-of cost centres found during tidy.
-
-To avoid overhead when using -fprof-inline there is a flag to stop
-us from collecting them here when we run this pass before tidy.
-
-Note [Adding late cost centres]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The basic idea is very simple. For every top level binder
-`f = rhs` we compile it as if the user had written
-`f = {-# SCC f #-} rhs`.
-
-If we do this after unfoldings for `f` have been created this
-doesn't impact core-level optimizations at all. If we do it
-before the cost centre will be included in the unfolding and
-might inhibit optimizations at the call site. For this reason
-we provide flags for both approaches as they have different
-tradeoffs.
-
-We also don't add a cost centre for any binder that is a constructor
-worker or wrapper. These will never meaningfully enrich the resulting
-profile so we improve efficiency by omitting those.
-
--}
-
-addLateCostCentresMG :: ModGuts -> CoreM ModGuts
-addLateCostCentresMG guts = do
- dflags <- getDynFlags
- let env :: Env
- env = Env
- { thisModule = mg_module guts
- , countEntries = gopt Opt_ProfCountEntries dflags
- , collectCCs = False -- See Note [Collecting late cost centres]
- }
- let guts' = guts { mg_binds = fstOf3 (addLateCostCentres env (mg_binds guts))
- }
- return guts'
-
-addLateCostCentresPgm :: DynFlags -> Logger -> Module -> CoreProgram -> IO (CoreProgram, S.Set CostCentre, CostCentreState)
-addLateCostCentresPgm dflags logger mod binds =
- withTiming logger
- (text "LateCC"<+>brackets (ppr mod))
- (\(a,b,c) -> a `seqList` (b `seq` (c `seq` ()))) $ do
- let env = Env
- { thisModule = mod
- , countEntries = gopt Opt_ProfCountEntries dflags
- , collectCCs = True -- See Note [Collecting late cost centres]
- }
- (binds', ccs, cc_state) = addLateCostCentres env binds
- when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $
- putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr binds'))
- return (binds', ccs, cc_state)
-addLateCostCentres :: Env -> CoreProgram -> (CoreProgram, S.Set CostCentre, CostCentreState)
-addLateCostCentres env binds =
- let (binds', state) = runState (mapM (doBind env) binds) initLateCCState
- in (binds', lcs_ccs state, lcs_state state)
-
-
-doBind :: Env -> CoreBind -> M CoreBind
-doBind env (NonRec b rhs) = NonRec b <$> doBndr env b rhs
-doBind env (Rec bs) = Rec <$> mapM doPair bs
+-- | Late cost center insertion logic used by the driver
+addLateCostCenters ::
+ Logger
+ -- ^ Logger
+ -> LateCCConfig
+ -- ^ Late cost center configuration
+ -> CoreProgram
+ -- ^ The program
+ -> IO (CoreProgram, LateCCState (Strict.Maybe SrcSpan))
+addLateCostCenters logger LateCCConfig{..} core_binds = do
+
+ -- If top-level late CCs are enabled via either -fprof-late or
+ -- -fprof-late-overloaded, add them
+ (top_level_cc_binds, top_level_late_cc_state) <-
+ case lateCCConfig_whichBinds of
+ LateCCNone ->
+ return (core_binds, initLateCCState ())
+ _ ->
+ withTiming
+ logger
+ (text "LateTopLevelCCs" <+> brackets (ppr this_mod))
+ (\(binds, late_cc_state) -> seqBinds binds `seq` late_cc_state `seq` ())
+ $ {-# SCC lateTopLevelCCs #-} do
+ pure $
+ doLateCostCenters
+ lateCCConfig_env
+ (initLateCCState ())
+ (topLevelBindsCC top_level_cc_pred)
+ core_binds
+
+ -- If overloaded call CCs are enabled via -fprof-late-overloaded-calls, add
+ -- them
+ (late_cc_binds, late_cc_state) <-
+ if lateCCConfig_overloadedCalls then
+ withTiming
+ logger
+ (text "LateOverloadedCallsCCs" <+> brackets (ppr this_mod))
+ (\(binds, late_cc_state) -> seqBinds binds `seq` late_cc_state `seq` ())
+ $ {-# SCC lateoverloadedCallsCCs #-} do
+ pure $
+ doLateCostCenters
+ lateCCConfig_env
+ (top_level_late_cc_state { lateCCState_extra = Strict.Nothing })
+ overloadedCallsCC
+ top_level_cc_binds
+ else
+ return
+ ( top_level_cc_binds
+ , top_level_late_cc_state { lateCCState_extra = Strict.Nothing }
+ )
+
+ return (late_cc_binds, late_cc_state)
where
- doPair :: ((Id, CoreExpr) -> M (Id, CoreExpr))
- doPair (b,rhs) = (b,) <$> doBndr env b rhs
-
-doBndr :: Env -> Id -> CoreExpr -> M CoreExpr
-doBndr env bndr rhs
- -- Cost centres on constructor workers are pretty much useless
- -- so we don't emit them if we are looking at the rhs of a constructor
- -- binding.
- | Just _ <- isDataConId_maybe bndr = pure rhs
- | otherwise = doBndr' env bndr rhs
-
-
--- We want to put the cost centre below the lambda as we only care about executions of the RHS.
-doBndr' :: Env -> Id -> CoreExpr -> State LateCCState CoreExpr
-doBndr' env bndr (Lam b rhs) = Lam b <$> doBndr' env bndr rhs
-doBndr' env bndr rhs = do
- let name = idName bndr
- name_loc = nameSrcSpan name
- cc_name = getOccFS name
- count = countEntries env
- cc_flavour <- getCCFlavour cc_name
- let cc_mod = thisModule env
- bndrCC = NormalCC cc_flavour cc_name cc_mod name_loc
- note = ProfNote bndrCC count True
- addCC env bndrCC
- return $ mkTick note rhs
-
-data LateCCState = LateCCState
- { lcs_state :: !CostCentreState
- , lcs_ccs :: S.Set CostCentre
- }
-type M = State LateCCState
-
-initLateCCState :: LateCCState
-initLateCCState = LateCCState newCostCentreState mempty
-
-getCCFlavour :: FastString -> M CCFlavour
-getCCFlavour name = mkLateCCFlavour <$> getCCIndex' name
-
-getCCIndex' :: FastString -> M CostCentreIndex
-getCCIndex' name = do
- state <- get
- let (index,cc_state') = getCCIndex name (lcs_state state)
- put (state { lcs_state = cc_state'})
- return index
-
-addCC :: Env -> CostCentre -> M ()
-addCC !env cc = do
- state <- get
- when (collectCCs env) $ do
- let ccs' = S.insert cc (lcs_ccs state)
- put (state { lcs_ccs = ccs'})
-
-data Env = Env
- { thisModule :: !Module
- , countEntries:: !Bool
- , collectCCs :: !Bool
- }
-
+ top_level_cc_pred :: CoreExpr -> Bool
+ top_level_cc_pred =
+ case lateCCConfig_whichBinds of
+ LateCCAllBinds ->
+ const True
+ LateCCOverloadedBinds ->
+ isOverloadedTy . exprType
+ LateCCNone ->
+ -- This is here for completeness, we won't actually use this
+ -- predicate in this case since we'll shortcut.
+ const False
+
+ this_mod = lateCCEnv_module lateCCConfig_env
=====================================
compiler/GHC/Core/LateCC/OverloadedCalls.hs
=====================================
@@ -0,0 +1,204 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE TupleSections #-}
+
+module GHC.Core.LateCC.OverloadedCalls
+ ( overloadedCallsCC
+ ) where
+
+import GHC.Prelude
+
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
+import qualified GHC.Data.Strict as Strict
+
+import GHC.Data.FastString
+import GHC.Core
+import GHC.Core.LateCC.Utils
+import GHC.Core.LateCC.Types
+import GHC.Core.Make
+import GHC.Core.Predicate
+import GHC.Core.Type
+import GHC.Core.Utils
+import GHC.Tc.Utils.TcType
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.SrcLoc
+import GHC.Types.Tickish
+import GHC.Types.Var
+import GHC.Utils.Outputable
+
+type OverloadedCallsCCState = Strict.Maybe SrcSpan
+
+-- | Insert cost centres on function applications with dictionary arguments. The
+-- source locations attached to the cost centres is approximated based on the
+-- "closest" source note encountered in the traversal.
+overloadedCallsCC :: CoreBind -> LateCCM OverloadedCallsCCState CoreBind
+overloadedCallsCC =
+ processBind
+ where
+ processBind :: CoreBind -> LateCCM OverloadedCallsCCState CoreBind
+ processBind core_bind =
+ case core_bind of
+ NonRec b e ->
+ NonRec b <$> wrap_if_join b (processExpr e)
+ Rec es ->
+ Rec <$> mapM (\(b,e) -> (b,) <$> wrap_if_join b (processExpr e)) es
+ where
+ -- If an overloaded function is turned into a join point, we won't add
+ -- SCCs directly to calls since it makes them non-tail calls. Instead,
+ -- we look for join points here and add an SCC to their RHS if they are
+ -- overloaded.
+ wrap_if_join ::
+ CoreBndr
+ -> LateCCM OverloadedCallsCCState CoreExpr
+ -> LateCCM OverloadedCallsCCState CoreExpr
+ wrap_if_join b pexpr = do
+ expr <- pexpr
+ if isJoinId b && isOverloadedTy (exprType expr) then do
+ let
+ cc_name :: FastString
+ cc_name = fsLit "join-rhs-" `appendFS` getOccFS b
+
+ cc_srcspan <-
+ fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
+ lift $ gets lateCCState_extra
+
+ insertCC cc_name cc_srcspan expr
+ else
+ return expr
+
+
+ processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
+ processExpr expr =
+ case expr of
+ -- The case we care about: Application
+ app at App{} -> do
+ -- Here we have some application like `f v1 ... vN`, where v1 ... vN
+ -- should be the function's type arguments followed by the value
+ -- arguments. To determine if the `f` is an overloaded function, we
+ -- check if any of the arguments v1 ... vN are dictionaries.
+ let
+ (f, xs) = collectArgs app
+ resultTy = applyTypeToArgs empty (exprType f) xs
+
+ -- Recursively process the arguments first for no particular reason
+ args <- mapM processExpr xs
+ let app' = mkCoreApps f args
+
+ if
+ -- Check if any of the arguments are dictionaries
+ any isDictExpr args
+
+ -- Avoid instrumenting dictionary functions, which may be
+ -- overloaded if there are superclasses, by checking if the result
+ -- type of the function is a dictionary type.
+ && not (isDictTy resultTy)
+
+ -- Avoid instrumenting constraint selectors like eq_sel
+ && (typeTypeOrConstraint resultTy /= ConstraintLike)
+
+ -- Avoid instrumenting join points.
+ -- (See comment in processBind above)
+ && not (isJoinVarExpr f)
+ then do
+ -- Extract a name and source location from the function being
+ -- applied
+ let
+ cc_name :: FastString
+ cc_name =
+ fsLit $ maybe "<no name available>" getOccString (exprName app)
+
+ cc_srcspan <-
+ fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
+ lift $ gets lateCCState_extra
+
+ insertCC cc_name cc_srcspan app'
+ else
+ return app'
+
+ -- For recursive constructors of Expr, we traverse the nested Exprs
+ Lam b e ->
+ mkCoreLams [b] <$> processExpr e
+ Let b e ->
+ mkCoreLet <$> processBind b <*> processExpr e
+ Case e b t alts ->
+ Case
+ <$> processExpr e
+ <*> pure b
+ <*> pure t
+ <*> mapM processAlt alts
+ Cast e co ->
+ mkCast <$> processExpr e <*> pure co
+ Tick t e -> do
+ trackSourceNote t $
+ mkTick t <$> processExpr e
+
+ -- For non-recursive constructors of Expr, we do nothing
+ x -> return x
+
+ processAlt :: CoreAlt -> LateCCM OverloadedCallsCCState CoreAlt
+ processAlt (Alt c bs e) = Alt c bs <$> processExpr e
+
+ trackSourceNote :: CoreTickish -> LateCCM OverloadedCallsCCState a -> LateCCM OverloadedCallsCCState a
+ trackSourceNote tick act =
+ case tick of
+ SourceNote rss _ -> do
+ -- Prefer source notes from the current file
+ in_current_file <-
+ maybe False ((== EQ) . lexicalCompareFS (srcSpanFile rss)) <$>
+ asks lateCCEnv_file
+ if not in_current_file then
+ act
+ else do
+ loc <- lift $ gets lateCCState_extra
+ lift . modify $ \s ->
+ s { lateCCState_extra =
+ Strict.Just $ RealSrcSpan rss mempty
+ }
+ x <- act
+ lift . modify $ \s ->
+ s { lateCCState_extra = loc
+ }
+ return x
+ _ ->
+ act
+
+ -- Utility functions
+
+ -- Extract a Name from an expression. If it is an application, attempt to
+ -- extract a name from the applied function. If it is a variable, return the
+ -- Name of the variable. If it is a tick/cast, attempt to extract a Name
+ -- from the expression held in the tick/cast. Otherwise return Nothing.
+ exprName :: CoreExpr -> Maybe Name
+ exprName =
+ \case
+ App f _ ->
+ exprName f
+ Var f ->
+ Just (idName f)
+ Tick _ e ->
+ exprName e
+ Cast e _ ->
+ exprName e
+ _ ->
+ Nothing
+
+ -- Determine whether an expression is a dictionary
+ isDictExpr :: CoreExpr -> Bool
+ isDictExpr =
+ maybe False isDictTy . exprType'
+ where
+ exprType' :: CoreExpr -> Maybe Type
+ exprType' = \case
+ Type{} -> Nothing
+ expr -> Just $ exprType expr
+
+ -- Determine whether an expression is a join variable
+ isJoinVarExpr :: CoreExpr -> Bool
+ isJoinVarExpr =
+ \case
+ Var var -> isJoinId var
+ Tick _ e -> isJoinVarExpr e
+ Cast e _ -> isJoinVarExpr e
+ _ -> False
=====================================
compiler/GHC/Core/LateCC/TopLevelBinds.hs
=====================================
@@ -0,0 +1,106 @@
+{-# LANGUAGE TupleSections #-}
+module GHC.Core.LateCC.TopLevelBinds where
+
+import GHC.Prelude
+
+import GHC.Core
+-- import GHC.Core.LateCC
+import GHC.Core.LateCC.Types
+import GHC.Core.LateCC.Utils
+import GHC.Core.Opt.Monad
+import GHC.Driver.DynFlags
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Unit.Module.ModGuts
+
+{- Note [Collecting late cost centres]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Usually cost centres defined by a module are collected
+during tidy by collectCostCentres. However with `-fprof-late`
+we insert cost centres after inlining. So we keep a list of
+all the cost centres we inserted and combine that with the list
+of cost centres found during tidy.
+
+To avoid overhead when using -fprof-inline there is a flag to stop
+us from collecting them here when we run this pass before tidy.
+
+Note [Adding late cost centres to top level bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The basic idea is very simple. For every top level binder
+`f = rhs` we compile it as if the user had written
+`f = {-# SCC f #-} rhs`.
+
+If we do this after unfoldings for `f` have been created this
+doesn't impact core-level optimizations at all. If we do it
+before the cost centre will be included in the unfolding and
+might inhibit optimizations at the call site. For this reason
+we provide flags for both approaches as they have different
+tradeoffs.
+
+We also don't add a cost centre for any binder that is a constructor
+worker or wrapper. These will never meaningfully enrich the resulting
+profile so we improve efficiency by omitting those.
+
+-}
+
+-- | Add late cost centres directly to the 'ModGuts'. This is used inside the
+-- core pipeline with the -fprof-late-inline flag. It should not be used after
+-- tidy, since it does not manually track inserted cost centers. See
+-- Note [Collecting late cost centres].
+topLevelBindsCCMG :: ModGuts -> CoreM ModGuts
+topLevelBindsCCMG guts = do
+ dflags <- getDynFlags
+ let
+ env =
+ LateCCEnv
+ { lateCCEnv_module = mg_module guts
+
+ -- We don't use this for topLevelBindsCC, so Nothing is okay
+ , lateCCEnv_file = Nothing
+
+ , lateCCEnv_countEntries= gopt Opt_ProfCountEntries dflags
+ , lateCCEnv_collectCCs = False
+ }
+ guts' =
+ guts
+ { mg_binds =
+ fst
+ ( doLateCostCenters
+ env
+ (initLateCCState ())
+ (topLevelBindsCC (const True))
+ (mg_binds guts)
+ )
+ }
+ return guts'
+
+-- | Insert cost centres on top-level bindings in the module, depending on
+-- whether or not they satisfy the given predicate.
+topLevelBindsCC :: (CoreExpr -> Bool) -> CoreBind -> LateCCM s CoreBind
+topLevelBindsCC pred core_bind =
+ case core_bind of
+ NonRec b rhs ->
+ NonRec b <$> doBndr b rhs
+ Rec bs ->
+ Rec <$> mapM doPair bs
+ where
+ doPair :: ((Id, CoreExpr) -> LateCCM s (Id, CoreExpr))
+ doPair (b,rhs) = (b,) <$> doBndr b rhs
+
+ doBndr :: Id -> CoreExpr -> LateCCM s CoreExpr
+ doBndr bndr rhs
+ -- Cost centres on constructor workers are pretty much useless
+ -- so we don't emit them if we are looking at the rhs of a constructor
+ -- binding.
+ | Just _ <- isDataConId_maybe bndr = pure rhs
+ | otherwise = if pred rhs then addCC bndr rhs else pure rhs
+
+ -- We want to put the cost centre below the lambda as we only care about
+ -- executions of the RHS.
+ addCC :: Id -> CoreExpr -> LateCCM s CoreExpr
+ addCC bndr (Lam b rhs) = Lam b <$> addCC bndr rhs
+ addCC bndr rhs = do
+ let name = idName bndr
+ cc_loc = nameSrcSpan name
+ cc_name = getOccFS name
+ insertCC cc_name cc_loc rhs
\ No newline at end of file
=====================================
compiler/GHC/Core/LateCC/Types.hs
=====================================
@@ -0,0 +1,74 @@
+-- | Types related to late cost center insertion
+module GHC.Core.LateCC.Types
+ ( LateCCConfig(..)
+ , LateCCBindSpec(..)
+ , LateCCEnv(..)
+ , LateCCState(..)
+ , initLateCCState
+ , LateCCM
+ ) where
+
+import GHC.Prelude
+
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
+import qualified Data.Set as S
+
+import GHC.Data.FastString
+import GHC.Types.CostCentre
+import GHC.Types.CostCentre.State
+import GHC.Unit.Types
+
+-- | Late cost center insertion configuration.
+--
+-- Specifies whether cost centers are added to overloaded function call sites
+-- and/or top-level bindings, and which top-level bindings they are added to.
+-- Also holds the cost center insertion environment.
+data LateCCConfig =
+ LateCCConfig
+ { lateCCConfig_whichBinds :: !LateCCBindSpec
+ , lateCCConfig_overloadedCalls :: !Bool
+ , lateCCConfig_env :: !LateCCEnv
+ }
+
+-- | The types of top-level bindings we support adding cost centers to.
+data LateCCBindSpec =
+ LateCCNone
+ | LateCCAllBinds
+ | LateCCOverloadedBinds
+
+-- | Late cost centre insertion environment
+data LateCCEnv = LateCCEnv
+ { lateCCEnv_module :: !Module
+ -- ^ Current module
+ , lateCCEnv_file :: Maybe FastString
+ -- ^ Current file, if we have one
+ , lateCCEnv_countEntries:: !Bool
+ -- ^ Whether the inserted cost centers should count entries
+ , lateCCEnv_collectCCs :: !Bool
+ -- ^ Whether to collect the cost centres we insert. See
+ -- Note [Collecting late cost centres]
+
+ }
+
+-- | Late cost centre insertion state, indexed by some extra state type that an
+-- insertion method may require.
+data LateCCState s = LateCCState
+ { lateCCState_ccs :: !(S.Set CostCentre)
+ -- ^ Cost centres that have been inserted
+ , lateCCState_ccState :: !CostCentreState
+ -- ^ Per-module state tracking for cost centre indices
+ , lateCCState_extra :: !s
+ }
+
+-- | The empty late cost centre insertion state
+initLateCCState :: s -> LateCCState s
+initLateCCState s =
+ LateCCState
+ { lateCCState_ccState = newCostCentreState
+ , lateCCState_ccs = mempty
+ , lateCCState_extra = s
+ }
+
+-- | Late cost centre insertion monad
+type LateCCM s = ReaderT LateCCEnv (State (LateCCState s))
=====================================
compiler/GHC/Core/LateCC/Utils.hs
=====================================
@@ -0,0 +1,80 @@
+module GHC.Core.LateCC.Utils
+ ( -- * Inserting cost centres
+ doLateCostCenters -- Might be useful for API users
+
+ -- ** Helpers for defining insertion methods
+ , getCCFlavour
+ , insertCC
+ ) where
+
+import GHC.Prelude
+
+import Control.Monad
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
+import qualified Data.Set as S
+
+import GHC.Core
+import GHC.Core.LateCC.Types
+import GHC.Core.Utils
+import GHC.Data.FastString
+import GHC.Types.CostCentre
+import GHC.Types.CostCentre.State
+import GHC.Types.SrcLoc
+import GHC.Types.Tickish
+
+-- | Insert cost centres into the 'CoreProgram' using the provided environment,
+-- initial state, and insertion method.
+doLateCostCenters
+ :: LateCCEnv
+ -- ^ Environment to run the insertion in
+ -> LateCCState s
+ -- ^ Initial state to run the insertion with
+ -> (CoreBind -> LateCCM s CoreBind)
+ -- ^ Insertion method
+ -> CoreProgram
+ -- ^ Bindings to consider
+ -> (CoreProgram, LateCCState s)
+doLateCostCenters env state method binds =
+ runLateCC env state $ mapM method binds
+
+-- | Evaluate late cost centre insertion
+runLateCC :: LateCCEnv -> LateCCState s -> LateCCM s a -> (a, LateCCState s)
+runLateCC env state = (`runState` state) . (`runReaderT` env)
+
+-- | Given the name of a cost centre, get its flavour
+getCCFlavour :: FastString -> LateCCM s CCFlavour
+getCCFlavour name = mkLateCCFlavour <$> getCCIndex' name
+ where
+ getCCIndex' :: FastString -> LateCCM s CostCentreIndex
+ getCCIndex' name = do
+ cc_state <- lift $ gets lateCCState_ccState
+ let (index, cc_state') = getCCIndex name cc_state
+ lift . modify $ \s -> s { lateCCState_ccState = cc_state'}
+ return index
+
+-- | Insert a cost centre with the specified name and source span on the given
+-- expression. The inserted cost centre will be appropriately tracked in the
+-- late cost centre state.
+insertCC
+ :: FastString
+ -- ^ Name of the cost centre to insert
+ -> SrcSpan
+ -- ^ Source location to associate with the cost centre
+ -> CoreExpr
+ -- ^ Expression to wrap in the cost centre
+ -> LateCCM s CoreExpr
+insertCC cc_name cc_loc expr = do
+ cc_flavour <- getCCFlavour cc_name
+ env <- ask
+ let
+ cc_mod = lateCCEnv_module env
+ cc = NormalCC cc_flavour cc_name cc_mod cc_loc
+ note = ProfNote cc (lateCCEnv_countEntries env) True
+ when (lateCCEnv_collectCCs env) $ do
+ lift . modify $ \s ->
+ s { lateCCState_ccs = S.insert cc (lateCCState_ccs s)
+ }
+ return $ mkTick note expr
+
=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -43,7 +43,7 @@ import GHC.Core.Opt.CallArity ( callArityAnalProgram )
import GHC.Core.Opt.Exitify ( exitifyProgram )
import GHC.Core.Opt.WorkWrap ( wwTopBinds )
import GHC.Core.Opt.CallerCC ( addCallerCostCentres )
-import GHC.Core.LateCC (addLateCostCentresMG)
+import GHC.Core.LateCC.TopLevelBinds (topLevelBindsCCMG)
import GHC.Core.Seq (seqBinds)
import GHC.Core.FamInstEnv
@@ -520,7 +520,7 @@ doCorePass pass guts = do
addCallerCostCentres guts
CoreAddLateCcs -> {-# SCC "AddLateCcs" #-}
- addLateCostCentresMG guts
+ topLevelBindsCCMG guts
CoreDoPrintCore -> {-# SCC "PrintCore" #-}
liftIO $ printCore logger (mg_binds guts) >> return guts
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -341,6 +341,8 @@ data GeneralFlag
| Opt_ProfCountEntries
| Opt_ProfLateInlineCcs
| Opt_ProfLateCcs
+ | Opt_ProfLateOverloadedCcs
+ | Opt_ProfLateoverloadedCallsCCs
| Opt_ProfManualCcs -- ^ Ignore manual SCC annotations
-- misc opts
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -175,7 +175,6 @@ import GHC.Iface.Ext.Debug ( diffFile, validateScopes )
import GHC.Core
import GHC.Core.Lint.Interactive ( interactiveInScope )
import GHC.Core.Tidy ( tidyExpr )
-import GHC.Core.Type ( Type, Kind )
import GHC.Core.Utils ( exprType )
import GHC.Core.ConLike
import GHC.Core.Opt.Pipeline
@@ -185,7 +184,8 @@ import GHC.Core.InstEnv
import GHC.Core.FamInstEnv
import GHC.Core.Rules
import GHC.Core.Stats
-import GHC.Core.LateCC (addLateCostCentresPgm)
+import GHC.Core.LateCC
+import GHC.Core.LateCC.Types
import GHC.CoreToStg.Prep
@@ -197,6 +197,7 @@ import GHC.Parser.Lexer as Lexer
import GHC.Tc.Module
import GHC.Tc.Utils.Monad
+import GHC.Tc.Utils.TcType
import GHC.Tc.Zonk.Env ( ZonkFlexi (DefaultFlexi) )
import GHC.Stg.Syntax
@@ -297,7 +298,6 @@ import GHC.StgToCmm.Utils (IPEStats)
import GHC.Types.Unique.FM
import GHC.Types.Unique.DFM
import GHC.Cmm.Config (CmmConfig)
-import GHC.Types.CostCentre.State (newCostCentreState)
{- **********************************************************************
@@ -1791,22 +1791,41 @@ hscGenHardCode hsc_env cgguts location output_filename = do
-------------------
- -- Insert late cost centres if enabled.
- -- If `-fprof-late-inline` is enabled we can skip this, as it will have added
- -- a superset of cost centres we would add here already.
-
- (late_cc_binds, late_local_ccs, cc_state) <-
- if gopt Opt_ProfLateCcs dflags && not (gopt Opt_ProfLateInlineCcs dflags)
- then
- withTiming
- logger
- (text "LateCCs"<+>brackets (ppr this_mod))
- (const ())
- $ {-# SCC lateCC #-} do
- (binds, late_ccs, cc_state) <- addLateCostCentresPgm dflags logger this_mod core_binds
- return ( binds, (S.toList late_ccs `mappend` local_ccs ), cc_state)
+ -- Insert late cost centres based on the provided flags.
+ --
+ -- If -fprof-late-inline is enabled, we will skip adding CCs on any
+ -- top-level bindings here (via shortcut in `addLateCostCenters`),
+ -- since it will have already added a superset of the CCs we would add
+ -- here.
+ let
+ late_cc_config :: LateCCConfig
+ late_cc_config =
+ LateCCConfig
+ { lateCCConfig_whichBinds =
+ if gopt Opt_ProfLateInlineCcs dflags then
+ LateCCNone
+ else if gopt Opt_ProfLateCcs dflags then
+ LateCCAllBinds
+ else if gopt Opt_ProfLateOverloadedCcs dflags then
+ LateCCOverloadedBinds
else
- return (core_binds, local_ccs, newCostCentreState)
+ LateCCNone
+ , lateCCConfig_overloadedCalls =
+ gopt Opt_ProfLateoverloadedCallsCCs dflags
+ , lateCCConfig_env =
+ LateCCEnv
+ { lateCCEnv_module = this_mod
+ , lateCCEnv_file = fsLit <$> ml_hs_file location
+ , lateCCEnv_countEntries= gopt Opt_ProfCountEntries dflags
+ , lateCCEnv_collectCCs = True
+ }
+ }
+
+ (late_cc_binds, late_cc_state) <-
+ addLateCostCenters logger late_cc_config core_binds
+
+ when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $
+ putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr late_cc_binds))
-------------------
-- Run late plugins
@@ -1820,7 +1839,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
cg_hpc_info = hpc_info,
cg_spt_entries = spt_entries,
cg_binds = late_binds,
- cg_ccs = late_local_ccs'
+ cg_ccs = late_local_ccs
}
, _
) <-
@@ -1833,9 +1852,9 @@ hscGenHardCode hsc_env cgguts location output_filename = do
(($ hsc_env) . latePlugin)
( cgguts
{ cg_binds = late_cc_binds
- , cg_ccs = late_local_ccs
+ , cg_ccs = S.toList (lateCCState_ccs late_cc_state) ++ local_ccs
}
- , cc_state
+ , lateCCState_ccState late_cc_state
)
let
@@ -1876,7 +1895,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
let (stg_binds,_stg_deps) = unzip stg_binds_with_deps
let cost_centre_info =
- (late_local_ccs' ++ caf_ccs, caf_cc_stacks)
+ (late_local_ccs ++ caf_ccs, caf_cc_stacks)
platform = targetPlatform dflags
prof_init
| sccProfilingEnabled dflags = profilingInitCode platform this_mod cost_centre_info
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2444,6 +2444,8 @@ fFlagsDeps = [
flagSpec "prof-cafs" Opt_AutoSccsOnIndividualCafs,
flagSpec "prof-count-entries" Opt_ProfCountEntries,
flagSpec "prof-late" Opt_ProfLateCcs,
+ flagSpec "prof-late-overloaded" Opt_ProfLateOverloadedCcs,
+ flagSpec "prof-late-overloaded-calls" Opt_ProfLateoverloadedCallsCCs,
flagSpec "prof-manual" Opt_ProfManualCcs,
flagSpec "prof-late-inline" Opt_ProfLateInlineCcs,
flagSpec "regs-graph" Opt_RegsGraph,
@@ -3763,6 +3765,10 @@ needSourceNotes :: DynFlags -> Bool
needSourceNotes dflags = debugLevel dflags > 0
|| gopt Opt_InfoTableMap dflags
+ -- Source ticks are used to approximate the location of
+ -- overloaded call cost centers
+ || gopt Opt_ProfLateoverloadedCallsCCs dflags
+
-- -----------------------------------------------------------------------------
-- Linker/compiler information
=====================================
compiler/GHC/Tc/Utils/TcType.hs
=====================================
@@ -1907,7 +1907,7 @@ isRhoExpTy (Infer {}) = True
isOverloadedTy :: Type -> Bool
-- Yes for a type of a function that might require evidence-passing
--- Used only by bindLocalMethods
+-- Used by bindLocalMethods and for -fprof-late-overloaded
isOverloadedTy ty | Just ty' <- coreView ty = isOverloadedTy ty'
isOverloadedTy (ForAllTy _ ty) = isOverloadedTy ty
isOverloadedTy (FunTy { ft_af = af }) = isInvisibleFunArg af
=====================================
compiler/ghc.cabal.in
=====================================
@@ -336,6 +336,10 @@ Library
GHC.Core.Lint
GHC.Core.Lint.Interactive
GHC.Core.LateCC
+ GHC.Core.LateCC.Types
+ GHC.Core.LateCC.TopLevelBinds
+ GHC.Core.LateCC.Utils
+ GHC.Core.LateCC.OverloadedCalls
GHC.Core.Make
GHC.Core.Map.Expr
GHC.Core.Map.Type
=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -186,6 +186,15 @@ Compiler
This means that if you are using ``-fllvm`` you now need ``llc``, ``opt`` and ``clang``
available.
+- The :ghc-flag:`-fprof-late-overloaded` flag has been introduced. It causes
+ cost centres to be added to *overloaded* top level bindings, unlike
+ :ghc-flag:`-fprof-late` which adds cost centres to all top level bindings.
+
+- The :ghc-flag:`-fprof-late-overloaded-calls` flag has been introduced. It
+ causes cost centres to be inserted at call sites including instance dictionary
+ arguments. This may be preferred over :ghc-flag:`-fprof-late-overloaded` since
+ it may reveal whether imported functions are called overloaded.
+
JavaScript backend
~~~~~~~~~~~~~~~~~~
=====================================
docs/users_guide/profiling.rst
=====================================
@@ -518,6 +518,49 @@ of your profiled program will be different to that of the unprofiled one.
You can try this mode if :ghc-flag:`-fprof-late` results in a profile that's too hard to interpret.
+.. ghc-flag:: -fprof-late-overloaded
+ :shortdesc: Auto-add ``SCC``\\ s to all top level overloaded bindings *after* the core pipeline has run.
+ :type: dynamic
+ :reverse: -fno-prof-late-overloaded
+ :category:
+
+ :since: 9.10.1
+
+ Adds an automatic ``SCC`` annotation to all *overloaded* top level bindings
+ late in the compilation pipeline after the optimizer has run and unfoldings
+ have been created. This means these cost centres will not interfere with
+ core-level optimizations and the resulting profile will be closer to the
+ performance profile of an optimized non-profiled executable.
+
+ This flag can help determine which top level bindings encountered during a
+ program's execution are still overloaded after inlining and specialization.
+
+.. ghc-flag:: -fprof-late-overloaded-calls
+ :shortdesc: Auto-add ``SCC``\\ s to all call sites that include dictionary arguments *after* the core pipeline has run.
+ :type: dynamic
+ :reverse: -fno-prof-late-overloaded-calls
+ :category:
+
+ :since: 9.10.1
+
+ Adds an automatic ``SCC`` annotation to all call sites that include
+ dictionary arguments late in the compilation pipeline after the optimizer
+ has run and unfoldings have been created. This means these cost centres will
+ not interfere with core-level optimizations and the resulting profile will
+ be closer to the performance profile of an optimized non-profiled
+ executable.
+
+ This flag is potentially more useful than :ghc-flag:`-fprof-late-overloaded`
+ since it will also add ``SCC`` annotations to call sites of imported
+ overloaded functions.
+
+ Some overloaded calls may not be annotated, specifically in cases where the
+ optimizer turns an overloaded function into a join point. Calls to such
+ functions will not be wrapped in ``SCC`` annotations, since it would make
+ them non-tail calls, which is a requirement for join points. Instead,
+ ``SCC`` annotations are added around the body of overloaded join variables
+ and given distinct names (``join-rhs-<var>``) to avoid confusion.
+
.. ghc-flag:: -fprof-cafs
:shortdesc: Auto-add ``SCC``\\ s to all CAFs
:type: dynamic
=====================================
testsuite/tests/profiling/should_run/all.T
=====================================
@@ -195,3 +195,30 @@ test('ignore_scc', [], compile_and_run,
['-fno-prof-manual'])
test('T21446', [], makefile_test, ['T21446'])
+
+
+test('scc-prof-overloaded001',
+ [],
+ compile_and_run,
+ ['-fno-prof-auto -fno-full-laziness -fprof-late-overloaded'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded002',
+ [],
+ compile_and_run,
+ ['-fno-prof-auto -fno-full-laziness -fprof-late-overloaded'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded-calls001',
+ [],
+ compile_and_run,
+ # Need optimizations to get rid of unwanted overloaded calls
+ ['-O -fno-prof-auto -fno-full-laziness -fprof-late-overloaded-calls'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded-calls002',
+ [],
+ compile_and_run,
+ # Need optimizations to get rid of unwanted overloaded calls
+ ['-O -fno-prof-auto -fprof-late-overloaded-calls']
+)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.hs
=====================================
@@ -0,0 +1,24 @@
+-- Running this program should result in two calls to overloaded functions: One
+-- with the $fShowX dictionary, the next with the $fShowList dictionary
+-- constructor for X.
+--
+-- Note that although the `$fShowList` dictionary constructor is itself
+-- overloaded, it should not get an SCC since we avoid instrumenting overloaded
+-- calls that result in dictionaries.
+--
+-- With just -fprof-late-overloaded, only `invoke` should get an SCC, since it
+-- is the only overloaded top level binding. With
+-- `-fprof-late-overloaded-calls`, the calls to both `invoke` and `f` (in the
+-- body of invoke) should get SCCs.
+
+module Main where
+
+{-# NOINLINE invoke #-}
+invoke :: Show a => (Show [a] => [a] -> String) -> a -> String
+invoke f x = f [x]
+
+data X = X
+ deriving Show
+
+main :: IO ()
+main = putStrLn (invoke show X)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.prof.sample
=====================================
@@ -0,0 +1,26 @@
+ Thu Jan 4 11:49 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded-calls001 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 48,320 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 20.5
+CAF GHC.IO.Handle.FD <entire-module> 0.0 71.9
+CAF GHC.IO.Encoding <entire-module> 0.0 5.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.3
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 20.5 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.0 0.0 0.8
+ invoke Main scc-prof-overloaded-calls001.hs:24:1-31 256 1 0.0 0.3 0.0 0.8
+ f Main scc-prof-overloaded-calls001.hs:18:1-18 257 1 0.0 0.6 0.0 0.6
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.3 0.0 1.3
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 5.1 0.0 5.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 71.9 0.0 71.9
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.stdout
=====================================
@@ -0,0 +1 @@
+[X]
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.hs
=====================================
@@ -0,0 +1,65 @@
+-- Running this program should result in seven calls to overloaded functions
+-- with increasing numbers of dictionary arguments.
+--
+-- With just -fprof-late-overloaded, no SCCs should be added, since none of the
+-- overloaded functions are top level. With `-fprof-late-overloaded-calls`, all
+-- seven calls should get *distinct* SCCs with separate source locations even
+-- though the overloaded functions share an OccName (`f`).
+
+module Main where
+
+data X = X
+
+instance Show X where
+instance Num X where
+instance Eq X where
+instance Enum X where
+instance Ord X where
+instance Real X where
+instance Integral X where
+
+-- No overloaded call
+{-# NOINLINE invoke0 #-}
+invoke0 :: (forall a. a -> a -> String) -> X -> String
+invoke0 f val = f val val
+
+{-# NOINLINE invoke1 #-}
+invoke1 :: (forall a. Show a => a -> a -> String) -> X -> String
+invoke1 f val = f val val
+
+{-# NOINLINE invoke2 #-}
+invoke2 :: (forall a. (Show a, Num a) => a -> a -> String) -> X -> String
+invoke2 f val = f val val
+
+{-# NOINLINE invoke3 #-}
+invoke3 :: (forall a. (Show a, Num a, Eq a) => a -> a -> String) -> X -> String
+invoke3 f val = f val val
+
+{-# NOINLINE invoke4 #-}
+invoke4 :: (forall a. (Show a, Num a, Eq a, Enum a) => a -> a -> String) -> X -> String
+invoke4 f val = f val val
+
+{-# NOINLINE invoke5 #-}
+invoke5 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a) => a -> a -> String) -> X -> String
+invoke5 f val = f val val
+
+{-# NOINLINE invoke6 #-}
+invoke6 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a) => a -> a -> String) -> X -> String
+invoke6 f val = f val val
+
+{-# NOINLINE invoke7 #-}
+invoke7 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a, Integral a) => a -> a -> String) -> X -> String
+invoke7 f val = f val val
+
+main :: IO ()
+main = do
+ putStrLn $ invoke0 (\_ _ -> s) X
+ putStrLn $ invoke1 (\_ _ -> s) X
+ putStrLn $ invoke2 (\_ _ -> s) X
+ putStrLn $ invoke3 (\_ _ -> s) X
+ putStrLn $ invoke4 (\_ _ -> s) X
+ putStrLn $ invoke5 (\_ _ -> s) X
+ putStrLn $ invoke6 (\_ _ -> s) X
+ putStrLn $ invoke7 (\_ _ -> s) X
+ where
+ s = "wibbly"
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.prof.sample
=====================================
@@ -0,0 +1,31 @@
+ Fri Jan 5 11:06 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded-calls002 +RTS -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 59,152 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 34.8
+CAF GHC.IO.Handle.FD <entire-module> 0.0 58.7
+CAF GHC.IO.Encoding <entire-module> 0.0 4.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.1
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 34.8 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.6 0.0 0.9
+ f Main scc-prof-overloaded-calls002.hs:52:1-25 262 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:48:1-25 261 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:44:1-25 260 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:40:1-25 259 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:36:1-25 258 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:32:1-25 257 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:28:1-25 256 1 0.0 0.0 0.0 0.0
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.1 0.0 1.1
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 4.1 0.0 4.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.3 0.0 0.3
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 58.7 0.0 58.7
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.stdout
=====================================
@@ -0,0 +1,8 @@
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.hs
=====================================
@@ -0,0 +1,24 @@
+-- Running this program should result in two calls to overloaded functions: One
+-- with the $fShowX dictionary, the next with the $fShowList dictionary
+-- constructor for X.
+--
+-- Note that although the `$fShowList` dictionary constructor is itself
+-- overloaded, it should not get an SCC since we avoid instrumenting overloaded
+-- calls that result in dictionaries.
+--
+-- With just -fprof-late-overloaded, only `invoke` should get an SCC, since it
+-- is the only overloaded top level binding. With
+-- `-fprof-late-overloaded-calls`, the calls to both `invoke` and `f` (in the
+-- body of invoke) should get SCCs.
+
+module Main where
+
+{-# NOINLINE invoke #-}
+invoke :: Show a => (Show [a] => [a] -> String) -> a -> String
+invoke f x = f [x]
+
+data X = X
+ deriving Show
+
+main :: IO ()
+main = putStrLn (invoke show X)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.prof.sample
=====================================
@@ -0,0 +1,25 @@
+ Thu Jan 4 11:26 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded001 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 48,304 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 20.5
+CAF GHC.IO.Handle.FD <entire-module> 0.0 71.9
+CAF GHC.IO.Encoding <entire-module> 0.0 5.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.3
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 20.5 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.0 0.0 0.8
+ invoke Main scc-prof-overloaded001.hs:18:1-6 256 1 0.0 0.8 0.0 0.8
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.3 0.0 1.3
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 5.1 0.0 5.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 71.9 0.0 71.9
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.stdout
=====================================
@@ -0,0 +1 @@
+[X]
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.hs
=====================================
@@ -0,0 +1,65 @@
+-- Running this program should result in seven calls to overloaded functions
+-- with increasing numbers of dictionary arguments.
+--
+-- With just -fprof-late-overloaded, no SCCs should be added, since none of the
+-- overloaded functions are top level. With `-fprof-late-overloaded-calls`, all
+-- seven calls should get *distinct* SCCs with separate source locations even
+-- though the overloaded functions share an OccName (`f`).
+
+module Main where
+
+data X = X
+
+instance Show X where
+instance Num X where
+instance Eq X where
+instance Enum X where
+instance Ord X where
+instance Real X where
+instance Integral X where
+
+-- No overloaded call
+{-# NOINLINE invoke0 #-}
+invoke0 :: (forall a. a -> a -> String) -> X -> String
+invoke0 f val = f val val
+
+{-# NOINLINE invoke1 #-}
+invoke1 :: (forall a. Show a => a -> a -> String) -> X -> String
+invoke1 f val = f val val
+
+{-# NOINLINE invoke2 #-}
+invoke2 :: (forall a. (Show a, Num a) => a -> a -> String) -> X -> String
+invoke2 f val = f val val
+
+{-# NOINLINE invoke3 #-}
+invoke3 :: (forall a. (Show a, Num a, Eq a) => a -> a -> String) -> X -> String
+invoke3 f val = f val val
+
+{-# NOINLINE invoke4 #-}
+invoke4 :: (forall a. (Show a, Num a, Eq a, Enum a) => a -> a -> String) -> X -> String
+invoke4 f val = f val val
+
+{-# NOINLINE invoke5 #-}
+invoke5 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a) => a -> a -> String) -> X -> String
+invoke5 f val = f val val
+
+{-# NOINLINE invoke6 #-}
+invoke6 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a) => a -> a -> String) -> X -> String
+invoke6 f val = f val val
+
+{-# NOINLINE invoke7 #-}
+invoke7 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a, Integral a) => a -> a -> String) -> X -> String
+invoke7 f val = f val val
+
+main :: IO ()
+main = do
+ putStrLn $ invoke0 (\_ _ -> s) X
+ putStrLn $ invoke1 (\_ _ -> s) X
+ putStrLn $ invoke2 (\_ _ -> s) X
+ putStrLn $ invoke3 (\_ _ -> s) X
+ putStrLn $ invoke4 (\_ _ -> s) X
+ putStrLn $ invoke5 (\_ _ -> s) X
+ putStrLn $ invoke6 (\_ _ -> s) X
+ putStrLn $ invoke7 (\_ _ -> s) X
+ where
+ s = "wibbly"
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.prof.sample
=====================================
@@ -0,0 +1,23 @@
+ Thu Jan 4 11:55 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded002 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 56,472 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 32.7
+CAF GHC.IO.Handle.FD <entire-module> 0.0 61.5
+CAF GHC.IO.Encoding <entire-module> 0.0 4.3
+CAF GHC.Conc.Signal <entire-module> 0.0 1.1
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 32.7 0.0 100.0
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.1 0.0 1.1
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 4.3 0.0 4.3
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 61.5 0.0 61.5
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.stdout
=====================================
@@ -0,0 +1,8 @@
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/61bb5ff68630c203eaae4baba554640246df5a63
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/61bb5ff68630c203eaae4baba554640246df5a63
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240304/9e8a919e/attachment-0001.html>
More information about the ghc-commits
mailing list