[Git][ghc/ghc][wip/fprof-overloaded] add -fprof-late-overloaded and -fprof-late-overloaded-calls
Finley McIlwaine (@FinleyMcIlwaine)
gitlab at gitlab.haskell.org
Wed Feb 28 05:04:41 UTC 2024
Finley McIlwaine pushed to branch wip/fprof-overloaded at Glasgow Haskell Compiler / GHC
Commits:
e95a780d by Finley McIlwaine at 2024-02-27T21:04:16-08:00
add -fprof-late-overloaded and -fprof-late-overloaded-calls
* Refactor late cost centre insertion for extensibility
* Add two more late cost centre insertion methods that add SCCs to overloaded
top level bindings and call sites with dictionary arguments.
* Some tests for the basic functionality of the new insertion methods
- - - - -
25 changed files:
- compiler/GHC/Core/LateCC.hs
- + compiler/GHC/Core/LateCC/OverloadedCalls.hs
- + compiler/GHC/Core/LateCC/TopLevelBinds.hs
- + compiler/GHC/Core/LateCC/Types.hs
- compiler/GHC/Core/Opt/Pipeline.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Tc/Utils/TcType.hs
- compiler/ghc.cabal.in
- docs/users_guide/9.10.1-notes.rst
- docs/users_guide/profiling.rst
- testsuite/tests/profiling/should_run/all.T
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded001.stdout
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.hs
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.prof.sample
- + testsuite/tests/profiling/should_run/scc-prof-overloaded002.stdout
Changes:
=====================================
compiler/GHC/Core/LateCC.hs
=====================================
@@ -1,164 +1,94 @@
-{-# LANGUAGE DerivingStrategies #-}
-{-# LANGUAGE TupleSections #-}
-
--- | Adds cost-centers after the core piple has run.
+-- | Adds cost-centers after the core pipline has run.
module GHC.Core.LateCC
- ( addLateCostCentresMG
- , addLateCostCentresPgm
- , addLateCostCentres -- Might be useful for API users
- , Env(..)
+ ( -- * Inserting cost centres
+ addLateCostCentres -- Might be useful for API users
+
+ -- * Helpers for defining insertion methods
+ , getCCFlavour
+ , insertCC
+
+ -- * Types
+ , LateCCEnv(..)
+ , LateCCState(..)
+ , initLateCCState
) where
-import Control.Applicative
+import GHC.Prelude
+
import Control.Monad
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
import qualified Data.Set as S
-import GHC.Prelude
+import GHC.Core
+import GHC.Core.LateCC.Types
+import GHC.Core.Utils
+import GHC.Data.FastString
import GHC.Types.CostCentre
import GHC.Types.CostCentre.State
-import GHC.Types.Name hiding (varName)
+import GHC.Types.SrcLoc
import GHC.Types.Tickish
-import GHC.Unit.Module.ModGuts
-import GHC.Types.Var
-import GHC.Unit.Types
-import GHC.Data.FastString
-import GHC.Core
-import GHC.Core.Opt.Monad
-import GHC.Core.Utils (mkTick)
-import GHC.Types.Id
-import GHC.Driver.DynFlags
-
-import GHC.Utils.Logger
-import GHC.Utils.Outputable
-import GHC.Utils.Misc
-import GHC.Utils.Error (withTiming)
-import GHC.Utils.Monad.State.Strict
-
-
-{- Note [Collecting late cost centres]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Usually cost centres defined by a module are collected
-during tidy by collectCostCentres. However with `-fprof-late`
-we insert cost centres after inlining. So we keep a list of
-all the cost centres we inserted and combine that with the list
-of cost centres found during tidy.
-
-To avoid overhead when using -fprof-inline there is a flag to stop
-us from collecting them here when we run this pass before tidy.
-
-Note [Adding late cost centres]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The basic idea is very simple. For every top level binder
-`f = rhs` we compile it as if the user had written
-`f = {-# SCC f #-} rhs`.
-
-If we do this after unfoldings for `f` have been created this
-doesn't impact core-level optimizations at all. If we do it
-before the cost centre will be included in the unfolding and
-might inhibit optimizations at the call site. For this reason
-we provide flags for both approaches as they have different
-tradeoffs.
-
-We also don't add a cost centre for any binder that is a constructor
-worker or wrapper. These will never meaningfully enrich the resulting
-profile so we improve efficiency by omitting those.
--}
-
-addLateCostCentresMG :: ModGuts -> CoreM ModGuts
-addLateCostCentresMG guts = do
- dflags <- getDynFlags
- let env :: Env
- env = Env
- { thisModule = mg_module guts
- , countEntries = gopt Opt_ProfCountEntries dflags
- , collectCCs = False -- See Note [Collecting late cost centres]
- }
- let guts' = guts { mg_binds = fstOf3 (addLateCostCentres env (mg_binds guts))
- }
- return guts'
-
-addLateCostCentresPgm :: DynFlags -> Logger -> Module -> CoreProgram -> IO (CoreProgram, S.Set CostCentre, CostCentreState)
-addLateCostCentresPgm dflags logger mod binds =
- withTiming logger
- (text "LateCC"<+>brackets (ppr mod))
- (\(a,b,c) -> a `seqList` (b `seq` (c `seq` ()))) $ do
- let env = Env
- { thisModule = mod
- , countEntries = gopt Opt_ProfCountEntries dflags
- , collectCCs = True -- See Note [Collecting late cost centres]
- }
- (binds', ccs, cc_state) = addLateCostCentres env binds
- when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $
- putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr binds'))
- return (binds', ccs, cc_state)
-
-addLateCostCentres :: Env -> CoreProgram -> (CoreProgram, S.Set CostCentre, CostCentreState)
-addLateCostCentres env binds =
- let (binds', state) = runState (mapM (doBind env) binds) initLateCCState
- in (binds', lcs_ccs state, lcs_state state)
-
-
-doBind :: Env -> CoreBind -> M CoreBind
-doBind env (NonRec b rhs) = NonRec b <$> doBndr env b rhs
-doBind env (Rec bs) = Rec <$> mapM doPair bs
+-- | Insert cost centres into the 'CoreProgram' using the provided environment,
+-- initial state, and insertion method.
+addLateCostCentres
+ :: LateCCEnv
+ -- ^ Environment to run the insertion in
+ -> LateCCState s
+ -- ^ Initial state to run the insertion with
+ -> (CoreBind -> LateCCM s CoreBind)
+ -- ^ Insertion method
+ -> CoreProgram
+ -- ^ Bindings to consider
+ -> (CoreProgram, LateCCState s)
+addLateCostCentres env state method binds =
+ runLateCC env state $ mapM method binds
+
+-- | Evaluate late cost centre insertion
+runLateCC :: LateCCEnv -> LateCCState s -> LateCCM s a -> (a, LateCCState s)
+runLateCC env state = (`runState` state) . (`runReaderT` env)
+
+-- | The empty late cost centre insertion state
+initLateCCState :: s -> LateCCState s
+initLateCCState s =
+ LateCCState
+ { lateCCState_ccState = newCostCentreState
+ , lateCCState_ccs = mempty
+ , lateCCState_extra = s
+ }
+
+-- | Given the name of a cost centre, get its flavour
+getCCFlavour :: FastString -> LateCCM s CCFlavour
+getCCFlavour name = mkLateCCFlavour <$> getCCIndex' name
where
- doPair :: ((Id, CoreExpr) -> M (Id, CoreExpr))
- doPair (b,rhs) = (b,) <$> doBndr env b rhs
-
-doBndr :: Env -> Id -> CoreExpr -> M CoreExpr
-doBndr env bndr rhs
- -- Cost centres on constructor workers are pretty much useless
- -- so we don't emit them if we are looking at the rhs of a constructor
- -- binding.
- | Just _ <- isDataConId_maybe bndr = pure rhs
- | otherwise = doBndr' env bndr rhs
-
-
--- We want to put the cost centre below the lambda as we only care about executions of the RHS.
-doBndr' :: Env -> Id -> CoreExpr -> State LateCCState CoreExpr
-doBndr' env bndr (Lam b rhs) = Lam b <$> doBndr' env bndr rhs
-doBndr' env bndr rhs = do
- let name = idName bndr
- name_loc = nameSrcSpan name
- cc_name = getOccFS name
- count = countEntries env
+ getCCIndex' :: FastString -> LateCCM s CostCentreIndex
+ getCCIndex' name = do
+ cc_state <- lift $ gets lateCCState_ccState
+ let (index, cc_state') = getCCIndex name cc_state
+ lift . modify $ \s -> s { lateCCState_ccState = cc_state'}
+ return index
+
+-- | Insert a cost centre with the specified name and source span on the given
+-- expression. The inserted cost centre will be appropriately tracked in the
+-- late cost centre state.
+insertCC
+ :: FastString
+ -- ^ Name of the cost centre to insert
+ -> SrcSpan
+ -- ^ Source location to associate with the cost centre
+ -> CoreExpr
+ -- ^ Expression to wrap in the cost centre
+ -> LateCCM s CoreExpr
+insertCC cc_name cc_loc expr = do
cc_flavour <- getCCFlavour cc_name
- let cc_mod = thisModule env
- bndrCC = NormalCC cc_flavour cc_name cc_mod name_loc
- note = ProfNote bndrCC count True
- addCC env bndrCC
- return $ mkTick note rhs
-
-data LateCCState = LateCCState
- { lcs_state :: !CostCentreState
- , lcs_ccs :: S.Set CostCentre
- }
-type M = State LateCCState
-
-initLateCCState :: LateCCState
-initLateCCState = LateCCState newCostCentreState mempty
-
-getCCFlavour :: FastString -> M CCFlavour
-getCCFlavour name = mkLateCCFlavour <$> getCCIndex' name
-
-getCCIndex' :: FastString -> M CostCentreIndex
-getCCIndex' name = do
- state <- get
- let (index,cc_state') = getCCIndex name (lcs_state state)
- put (state { lcs_state = cc_state'})
- return index
-
-addCC :: Env -> CostCentre -> M ()
-addCC !env cc = do
- state <- get
- when (collectCCs env) $ do
- let ccs' = S.insert cc (lcs_ccs state)
- put (state { lcs_ccs = ccs'})
-
-data Env = Env
- { thisModule :: !Module
- , countEntries:: !Bool
- , collectCCs :: !Bool
- }
-
+ env <- ask
+ let
+ cc_mod = lateCCEnv_module env
+ cc = NormalCC cc_flavour cc_name cc_mod cc_loc
+ note = ProfNote cc (lateCCEnv_countEntries env) True
+ when (lateCCEnv_collectCCs env) $ do
+ lift . modify $ \s ->
+ s { lateCCState_ccs = S.insert cc (lateCCState_ccs s)
+ }
+ return $ mkTick note expr
=====================================
compiler/GHC/Core/LateCC/OverloadedCalls.hs
=====================================
@@ -0,0 +1,179 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE TupleSections #-}
+
+module GHC.Core.LateCC.OverloadedCalls
+ ( overloadedCallsCC
+ ) where
+
+import GHC.Prelude
+
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
+import qualified GHC.Data.Strict as Strict
+
+import GHC.Data.FastString
+import GHC.Core
+import GHC.Core.LateCC
+import GHC.Core.LateCC.Types
+import GHC.Core.Make
+import GHC.Core.Predicate
+import GHC.Core.Type
+import GHC.Core.Utils
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.SrcLoc
+import GHC.Types.Tickish
+import GHC.Types.Var
+import GHC.Utils.Outputable
+
+type OverloadedCallsCCState = Strict.Maybe SrcSpan
+
+-- | Insert cost centres on function applications with dictionary arguments. The
+-- source locations attached to the cost centres is approximated based on the
+-- "closest" source note encountered in the traversal.
+overloadedCallsCC :: CoreBind -> LateCCM OverloadedCallsCCState CoreBind
+overloadedCallsCC =
+ processBind
+ where
+ processBind :: CoreBind -> LateCCM OverloadedCallsCCState CoreBind
+ processBind core_bind =
+ case core_bind of
+ NonRec b e ->
+ NonRec b <$> processExpr e
+ Rec es ->
+ Rec <$> mapM (\(b,e) -> (b,) <$> processExpr e) es
+
+ processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
+ processExpr expr =
+ case expr of
+ -- The case we care about: Application
+ app at App{} -> do
+ -- Here we have some application like `f v1 ... vN`, where v1 ... vN
+ -- should be the function's type arguments followed by the value
+ -- arguments. To determine if the `f` is an overloaded function (i.e. not
+ -- specialised), we check if any of the arguments v1 ... vN are
+ -- dictionaries.
+ let
+ (f, xs) = collectArgs app
+ resultTy = applyTypeToArgs empty (exprType f) xs
+
+ -- Recursively process the arguments first for no particular reason
+ args <- mapM processExpr xs
+ let app' = mkCoreApps f args
+
+ if
+ -- Check if any of the arguments are dictionaries
+ any isDictExpr args
+
+ -- Avoid instrumenting dictionary functions, which may be
+ -- overloaded if there are superclasses, by checking if the result
+ -- type of the function is a dictionary type.
+ && not (isDictTy resultTy)
+
+ -- Avoid instrumenting constraint selectors like eq_sel
+ && (typeTypeOrConstraint resultTy /= ConstraintLike)
+
+ -- Avoid instrumenting join points
+ && not (isJoinVarExpr f)
+ then do
+ -- Extract a name and source location from the function being
+ -- applied
+ let
+ cc_name :: FastString
+ cc_name =
+ fsLit $ maybe "<no name available>" getOccString (exprName app)
+
+ cc_srcspan <-
+ fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
+ lift $ gets lateCCState_extra
+
+ insertCC cc_name cc_srcspan app'
+ else
+ return app'
+
+ -- For recursive constructors of Expr, we traverse the nested Exprs
+ Lam b e ->
+ mkCoreLams [b] <$> processExpr e
+ Let b e ->
+ mkCoreLet <$> processBind b <*> processExpr e
+ Case e b t alts ->
+ Case
+ <$> processExpr e
+ <*> pure b
+ <*> pure t
+ <*> mapM processAlt alts
+ Cast e co ->
+ mkCast <$> processExpr e <*> pure co
+ Tick t e -> do
+ trackSourceNote t $
+ mkTick t <$> processExpr e
+
+ -- For non-recursive constructors of Expr, we do nothing
+ x -> return x
+
+ processAlt :: CoreAlt -> LateCCM OverloadedCallsCCState CoreAlt
+ processAlt (Alt c bs e) = Alt c bs <$> processExpr e
+
+ trackSourceNote :: CoreTickish -> LateCCM OverloadedCallsCCState a -> LateCCM OverloadedCallsCCState a
+ trackSourceNote tick act =
+ case tick of
+ SourceNote rss _ -> do
+ -- Prefer source notes from the current file
+ in_current_file <-
+ maybe False ((== EQ) . lexicalCompareFS (srcSpanFile rss)) <$>
+ asks lateCCEnv_file
+ if not in_current_file then
+ act
+ else do
+ loc <- lift $ gets lateCCState_extra
+ lift . modify $ \s ->
+ s { lateCCState_extra =
+ Strict.Just $ RealSrcSpan rss mempty
+ }
+ x <- act
+ lift . modify $ \s ->
+ s { lateCCState_extra = loc
+ }
+ return x
+ _ ->
+ act
+
+ -- Utility functions
+
+ -- Extract a Name from an expression. If it is an application, attempt to
+ -- extract a name from the applied function. If it is a variable, return the
+ -- Name of the variable. If it is a tick/cast, attempt to extract a Name
+ -- from the expression held in the tick/cast. Otherwise return Nothing.
+ exprName :: CoreExpr -> Maybe Name
+ exprName =
+ \case
+ App f _ ->
+ exprName f
+ Var f ->
+ Just (idName f)
+ Tick _ e ->
+ exprName e
+ Cast e _ ->
+ exprName e
+ _ ->
+ Nothing
+
+ -- Determine whether an expression is a dictionary
+ isDictExpr :: CoreExpr -> Bool
+ isDictExpr =
+ maybe False isDictTy . exprType'
+ where
+ exprType' :: CoreExpr -> Maybe Type
+ exprType' = \case
+ Type{} -> Nothing
+ expr -> Just $ exprType expr
+
+ -- Determine whether an expression is a join variable
+ isJoinVarExpr :: CoreExpr -> Bool
+ isJoinVarExpr =
+ \case
+ Var var -> isJoinId var
+ Tick _ e -> isJoinVarExpr e
+ Cast e _ -> isJoinVarExpr e
+ _ -> False
=====================================
compiler/GHC/Core/LateCC/TopLevelBinds.hs
=====================================
@@ -0,0 +1,105 @@
+{-# LANGUAGE TupleSections #-}
+module GHC.Core.LateCC.TopLevelBinds where
+
+import GHC.Prelude
+
+import GHC.Core
+import GHC.Core.LateCC
+import GHC.Core.LateCC.Types
+import GHC.Core.Opt.Monad
+import GHC.Driver.DynFlags
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Unit.Module.ModGuts
+
+{- Note [Collecting late cost centres]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Usually cost centres defined by a module are collected
+during tidy by collectCostCentres. However with `-fprof-late`
+we insert cost centres after inlining. So we keep a list of
+all the cost centres we inserted and combine that with the list
+of cost centres found during tidy.
+
+To avoid overhead when using -fprof-inline there is a flag to stop
+us from collecting them here when we run this pass before tidy.
+
+Note [Adding late cost centres to top level bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The basic idea is very simple. For every top level binder
+`f = rhs` we compile it as if the user had written
+`f = {-# SCC f #-} rhs`.
+
+If we do this after unfoldings for `f` have been created this
+doesn't impact core-level optimizations at all. If we do it
+before the cost centre will be included in the unfolding and
+might inhibit optimizations at the call site. For this reason
+we provide flags for both approaches as they have different
+tradeoffs.
+
+We also don't add a cost centre for any binder that is a constructor
+worker or wrapper. These will never meaningfully enrich the resulting
+profile so we improve efficiency by omitting those.
+
+-}
+
+-- | Add late cost centres directly to the 'ModGuts'. This is used inside the
+-- core pipeline with the -fprof-late-inline flag. It should not be used after
+-- tidy, since it does not manually track inserted cost centers. See
+-- Note [Collecting late cost centres].
+topLevelBindsCCMG :: ModGuts -> CoreM ModGuts
+topLevelBindsCCMG guts = do
+ dflags <- getDynFlags
+ let
+ env =
+ LateCCEnv
+ { lateCCEnv_module = mg_module guts
+
+ -- We don't use this for topLevelBindsCC, so Nothing is okay
+ , lateCCEnv_file = Nothing
+
+ , lateCCEnv_countEntries= gopt Opt_ProfCountEntries dflags
+ , lateCCEnv_collectCCs = False
+ }
+ guts' =
+ guts
+ { mg_binds =
+ fst
+ ( addLateCostCentres
+ env
+ (initLateCCState ())
+ (topLevelBindsCC (const True))
+ (mg_binds guts)
+ )
+ }
+ return guts'
+
+-- | Insert cost centres on top level bindings in the module, depending on
+-- whether or not they satisfy the given predicate.
+topLevelBindsCC :: (CoreExpr -> Bool) -> CoreBind -> LateCCM s CoreBind
+topLevelBindsCC pred core_bind =
+ case core_bind of
+ NonRec b rhs ->
+ NonRec b <$> doBndr b rhs
+ Rec bs ->
+ Rec <$> mapM doPair bs
+ where
+ doPair :: ((Id, CoreExpr) -> LateCCM s (Id, CoreExpr))
+ doPair (b,rhs) = (b,) <$> doBndr b rhs
+
+ doBndr :: Id -> CoreExpr -> LateCCM s CoreExpr
+ doBndr bndr rhs
+ -- Cost centres on constructor workers are pretty much useless
+ -- so we don't emit them if we are looking at the rhs of a constructor
+ -- binding.
+ | Just _ <- isDataConId_maybe bndr = pure rhs
+ | otherwise = if pred rhs then addCC bndr rhs else pure rhs
+
+ -- We want to put the cost centre below the lambda as we only care about
+ -- executions of the RHS.
+ addCC :: Id -> CoreExpr -> LateCCM s CoreExpr
+ addCC bndr (Lam b rhs) = Lam b <$> addCC bndr rhs
+ addCC bndr rhs = do
+ let name = idName bndr
+ cc_loc = nameSrcSpan name
+ cc_name = getOccFS name
+ insertCC cc_name cc_loc rhs
\ No newline at end of file
=====================================
compiler/GHC/Core/LateCC/Types.hs
=====================================
@@ -0,0 +1,39 @@
+{-# LANGUAGE DerivingStrategies #-}
+module GHC.Core.LateCC.Types where
+
+import GHC.Prelude
+
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State.Strict
+import qualified Data.Set as S
+
+import GHC.Data.FastString
+import GHC.Types.CostCentre
+import GHC.Types.CostCentre.State
+import GHC.Unit.Types
+
+-- | Late cost centre insertion environment
+data LateCCEnv = LateCCEnv
+ { lateCCEnv_module :: !Module
+ -- ^ Current module
+ , lateCCEnv_file :: Maybe FastString
+ -- ^ Current file, if we have one
+ , lateCCEnv_countEntries:: !Bool
+ -- ^ Whether the inserted cost centers should count entries
+ , lateCCEnv_collectCCs :: !Bool
+ -- ^ Whether to collect the cost centres we insert. See
+ -- Note [Collecting late cost centres]
+ }
+
+-- | Late cost centre insertion state, indexed by some extra state type that an
+-- insertion method may require.
+data LateCCState s = LateCCState
+ { lateCCState_ccs :: !(S.Set CostCentre)
+ -- ^ Cost centres that have been inserted
+ , lateCCState_ccState :: !CostCentreState
+ -- ^ Per-module state tracking for cost centre indices
+ , lateCCState_extra :: !s
+ }
+
+-- | Late cost centre insertion monad
+type LateCCM s = ReaderT LateCCEnv (State (LateCCState s))
=====================================
compiler/GHC/Core/Opt/Pipeline.hs
=====================================
@@ -43,7 +43,7 @@ import GHC.Core.Opt.CallArity ( callArityAnalProgram )
import GHC.Core.Opt.Exitify ( exitifyProgram )
import GHC.Core.Opt.WorkWrap ( wwTopBinds )
import GHC.Core.Opt.CallerCC ( addCallerCostCentres )
-import GHC.Core.LateCC (addLateCostCentresMG)
+import GHC.Core.LateCC.TopLevelBinds (topLevelBindsCCMG)
import GHC.Core.Seq (seqBinds)
import GHC.Core.FamInstEnv
@@ -520,7 +520,7 @@ doCorePass pass guts = do
addCallerCostCentres guts
CoreAddLateCcs -> {-# SCC "AddLateCcs" #-}
- addLateCostCentresMG guts
+ topLevelBindsCCMG guts
CoreDoPrintCore -> {-# SCC "PrintCore" #-}
liftIO $ printCore logger (mg_binds guts) >> return guts
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -341,6 +341,8 @@ data GeneralFlag
| Opt_ProfCountEntries
| Opt_ProfLateInlineCcs
| Opt_ProfLateCcs
+ | Opt_ProfLateOverloadedCcs
+ | Opt_ProfLateoverloadedCallsCCs
| Opt_ProfManualCcs -- ^ Ignore manual SCC annotations
-- misc opts
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -172,7 +172,6 @@ import GHC.Iface.Ext.Debug ( diffFile, validateScopes )
import GHC.Core
import GHC.Core.Lint.Interactive ( interactiveInScope )
import GHC.Core.Tidy ( tidyExpr )
-import GHC.Core.Type ( Type, Kind )
import GHC.Core.Utils ( exprType )
import GHC.Core.ConLike
import GHC.Core.Opt.Pipeline
@@ -182,7 +181,9 @@ import GHC.Core.InstEnv
import GHC.Core.FamInstEnv
import GHC.Core.Rules
import GHC.Core.Stats
-import GHC.Core.LateCC (addLateCostCentresPgm)
+import GHC.Core.LateCC
+import GHC.Core.LateCC.TopLevelBinds
+import GHC.Core.LateCC.OverloadedCalls
import GHC.CoreToStg.Prep
@@ -194,6 +195,7 @@ import GHC.Parser.Lexer as Lexer
import GHC.Tc.Module
import GHC.Tc.Utils.Monad
+import GHC.Tc.Utils.TcType
import GHC.Tc.Zonk.Env ( ZonkFlexi (DefaultFlexi) )
import GHC.Stg.Syntax
@@ -264,6 +266,7 @@ import GHC.Data.StringBuffer
import qualified GHC.Data.Stream as Stream
import GHC.Data.Stream (Stream)
import GHC.Data.Maybe
+import qualified GHC.Data.Strict as Strict
import GHC.SysTools (initSysTools)
import GHC.SysTools.BaseDir (findTopDir)
@@ -294,7 +297,6 @@ import GHC.StgToCmm.Utils (IPEStats)
import GHC.Types.Unique.FM
import GHC.Types.Unique.DFM
import GHC.Cmm.Config (CmmConfig)
-import GHC.Types.CostCentre.State (newCostCentreState)
{- **********************************************************************
@@ -1788,22 +1790,73 @@ hscGenHardCode hsc_env cgguts location output_filename = do
-------------------
- -- Insert late cost centres if enabled.
+ -- Insert late cost centres on top level bindings if enabled.
-- If `-fprof-late-inline` is enabled we can skip this, as it will have added
-- a superset of cost centres we would add here already.
- (late_cc_binds, late_local_ccs, cc_state) <-
- if gopt Opt_ProfLateCcs dflags && not (gopt Opt_ProfLateInlineCcs dflags)
- then
- withTiming
- logger
- (text "LateCCs"<+>brackets (ppr this_mod))
- (const ())
- $ {-# SCC lateCC #-} do
- (binds, late_ccs, cc_state) <- addLateCostCentresPgm dflags logger this_mod core_binds
- return ( binds, (S.toList late_ccs `mappend` local_ccs ), cc_state)
- else
- return (core_binds, local_ccs, newCostCentreState)
+ -- If `-fprof-late-overloaded` is enabled, only add CCs to bindings for
+ -- overloaded functions.
+ let
+ topLevelCCPred :: CoreExpr -> Bool
+ topLevelCCPred =
+ if gopt Opt_ProfLateOverloadedCcs dflags then
+ isOverloadedTy . exprType
+ else
+ const True
+
+ doLateTopLevelCcs :: Bool
+ doLateTopLevelCcs =
+ ( gopt Opt_ProfLateCcs dflags
+ || gopt Opt_ProfLateOverloadedCcs dflags
+ )
+ && not (gopt Opt_ProfLateInlineCcs dflags)
+
+ lateCCEnv :: LateCCEnv
+ lateCCEnv =
+ LateCCEnv
+ { lateCCEnv_module = this_mod
+ , lateCCEnv_file = fsLit <$> ml_hs_file location
+ , lateCCEnv_countEntries= gopt Opt_ProfCountEntries dflags
+ , lateCCEnv_collectCCs = True
+ }
+
+ (top_level_cc_binds, top_level_late_cc_state) <-
+ if doLateTopLevelCcs then do
+ withTiming
+ logger
+ (text "LateTopLevelCCs"<+>brackets (ppr this_mod))
+ (\(binds, late_cc_state) -> seqBinds binds `seq` late_cc_state)
+ $ {-# SCC lateTopLevelCCs #-} do
+ pure $
+ addLateCostCentres
+ lateCCEnv
+ (initLateCCState ())
+ (topLevelBindsCC topLevelCCPred)
+ core_binds
+ else
+ return (core_binds, initLateCCState ())
+
+ (late_cc_binds, late_cc_state) <-
+ if gopt Opt_ProfLateoverloadedCallsCCs dflags then
+ withTiming
+ logger
+ (text "LateOverloadedCallsCCs"<+>brackets (ppr this_mod))
+ (\(binds, late_cc_state) -> binds `seqList` (late_cc_state `seq` ()))
+ $ {-# SCC lateoverloadedCallsCCs #-} do
+ pure $
+ addLateCostCentres
+ lateCCEnv
+ (top_level_late_cc_state { lateCCState_extra = Strict.Nothing })
+ overloadedCallsCC
+ top_level_cc_binds
+ else
+ return
+ ( top_level_cc_binds
+ , top_level_late_cc_state { lateCCState_extra = Strict.Nothing }
+ )
+
+ when (dopt Opt_D_dump_late_cc dflags || dopt Opt_D_verbose_core2core dflags) $
+ putDumpFileMaybe logger Opt_D_dump_late_cc "LateCC" FormatCore (vcat (map ppr late_cc_binds))
-------------------
-- Run late plugins
@@ -1817,7 +1870,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
cg_hpc_info = hpc_info,
cg_spt_entries = spt_entries,
cg_binds = late_binds,
- cg_ccs = late_local_ccs'
+ cg_ccs = late_local_ccs
}
, _
) <-
@@ -1830,9 +1883,9 @@ hscGenHardCode hsc_env cgguts location output_filename = do
(($ hsc_env) . latePlugin)
( cgguts
{ cg_binds = late_cc_binds
- , cg_ccs = late_local_ccs
+ , cg_ccs = S.toList (lateCCState_ccs late_cc_state) ++ local_ccs
}
- , cc_state
+ , lateCCState_ccState late_cc_state
)
let
@@ -1873,7 +1926,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
let (stg_binds,_stg_deps) = unzip stg_binds_with_deps
let cost_centre_info =
- (late_local_ccs' ++ caf_ccs, caf_cc_stacks)
+ (late_local_ccs ++ caf_ccs, caf_cc_stacks)
platform = targetPlatform dflags
prof_init
| sccProfilingEnabled dflags = profilingInitCode platform this_mod cost_centre_info
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2444,6 +2444,8 @@ fFlagsDeps = [
flagSpec "prof-cafs" Opt_AutoSccsOnIndividualCafs,
flagSpec "prof-count-entries" Opt_ProfCountEntries,
flagSpec "prof-late" Opt_ProfLateCcs,
+ flagSpec "prof-late-overloaded" Opt_ProfLateOverloadedCcs,
+ flagSpec "prof-late-overloaded-calls" Opt_ProfLateoverloadedCallsCCs,
flagSpec "prof-manual" Opt_ProfManualCcs,
flagSpec "prof-late-inline" Opt_ProfLateInlineCcs,
flagSpec "regs-graph" Opt_RegsGraph,
@@ -3762,6 +3764,10 @@ needSourceNotes :: DynFlags -> Bool
needSourceNotes dflags = debugLevel dflags > 0
|| gopt Opt_InfoTableMap dflags
+ -- Source ticks are used to approximate the location of
+ -- overloaded call cost centers
+ || gopt Opt_ProfLateoverloadedCallsCCs dflags
+
-- -----------------------------------------------------------------------------
-- Linker/compiler information
=====================================
compiler/GHC/Tc/Utils/TcType.hs
=====================================
@@ -1907,7 +1907,7 @@ isRhoExpTy (Infer {}) = True
isOverloadedTy :: Type -> Bool
-- Yes for a type of a function that might require evidence-passing
--- Used only by bindLocalMethods
+-- Used by bindLocalMethods and for -fprof-late-overloaded
isOverloadedTy ty | Just ty' <- coreView ty = isOverloadedTy ty'
isOverloadedTy (ForAllTy _ ty) = isOverloadedTy ty
isOverloadedTy (FunTy { ft_af = af }) = isInvisibleFunArg af
=====================================
compiler/ghc.cabal.in
=====================================
@@ -337,6 +337,9 @@ Library
GHC.Core.Lint
GHC.Core.Lint.Interactive
GHC.Core.LateCC
+ GHC.Core.LateCC.Types
+ GHC.Core.LateCC.TopLevelBinds
+ GHC.Core.LateCC.OverloadedCalls
GHC.Core.Make
GHC.Core.Map.Expr
GHC.Core.Map.Type
=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -174,6 +174,15 @@ Compiler
This means that if you are using ``-fllvm`` you now need ``llc``, ``opt`` and ``clang``
available.
+- The :ghc-flag:`-fprof-late-overloaded` flag has been introduced. It causes
+ cost centres to be added to *overloaded* top level bindings, unlike
+ :ghc-flag:`-fprof-late` which adds cost centres to all top level bindings.
+
+- The :ghc-flag:`-fprof-late-overloaded-calls` flag has been introduced. It
+ causes cost centres to be inserted at call sites including instance dictionary
+ arguments. This may be preferred over :ghc-flag:`-fprof-late-overloaded` since
+ it may reveal whether imported functions are called overloaded.
+
JavaScript backend
~~~~~~~~~~~~~~~~~~
=====================================
docs/users_guide/profiling.rst
=====================================
@@ -518,6 +518,42 @@ of your profiled program will be different to that of the unprofiled one.
You can try this mode if :ghc-flag:`-fprof-late` results in a profile that's too hard to interpret.
+.. ghc-flag:: -fprof-late-overloaded
+ :shortdesc: Auto-add ``SCC``\\ s to all top level overloaded bindings *after* the core pipeline has run.
+ :type: dynamic
+ :reverse: -fno-prof-late-overloaded
+ :category:
+
+ :since: 9.10.1
+
+ Adds an automatic ``SCC`` annotation to all *overloaded* top level bindings
+ late in the compilation pipeline after the optimizer has run and unfoldings
+ have been created. This means these cost centres will not interfere with
+ core-level optimizations and the resulting profile will be closer to the
+ performance profile of an optimized non-profiled executable.
+
+ This flag can help determine which top level bindings encountered during a
+ program's execution are still overloaded after inlining and specialization.
+
+.. ghc-flag:: -fprof-late-overloaded-calls
+ :shortdesc: Auto-add ``SCC``\\ s to all call sites that include dictionary arguments *after* the core pipeline has run.
+ :type: dynamic
+ :reverse: -fno-prof-late-overloaded-calls
+ :category:
+
+ :since: 9.10.1
+
+ Adds an automatic ``SCC`` annotation to all call sites that include
+ dictionary arguments late in the compilation pipeline after the optimizer
+ has run and unfoldings have been created. This means these cost centres will
+ not interfere with core-level optimizations and the resulting profile will
+ be closer to the performance profile of an optimized non-profiled
+ executable.
+
+ This flag is potentially more useful than :ghc-flag:`-fprof-late-overloaded`
+ since it will also add ``SCC`` annotations to call sites of imported
+ overloaded functions.
+
.. ghc-flag:: -fprof-cafs
:shortdesc: Auto-add ``SCC``\\ s to all CAFs
:type: dynamic
=====================================
testsuite/tests/profiling/should_run/all.T
=====================================
@@ -195,3 +195,30 @@ test('ignore_scc', [], compile_and_run,
['-fno-prof-manual'])
test('T21446', [], makefile_test, ['T21446'])
+
+
+test('scc-prof-overloaded001',
+ [],
+ compile_and_run,
+ ['-fno-prof-auto -fno-full-laziness -fprof-late-overloaded'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded002',
+ [],
+ compile_and_run,
+ ['-fno-prof-auto -fno-full-laziness -fprof-late-overloaded'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded-calls001',
+ [],
+ compile_and_run,
+ # Need optimizations to get rid of unwanted overloaded calls
+ ['-O -fno-prof-auto -fno-full-laziness -fprof-late-overloaded-calls'] # See Note [consistent stacks]
+)
+
+test('scc-prof-overloaded-calls002',
+ [],
+ compile_and_run,
+ # Need optimizations to get rid of unwanted overloaded calls
+ ['-O -fno-prof-auto -fprof-late-overloaded-calls']
+)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.hs
=====================================
@@ -0,0 +1,24 @@
+-- Running this program should result in two calls to overloaded functions: One
+-- with the $fShowX dictionary, the next with the $fShowList dictionary
+-- constructor for X.
+--
+-- Note that although the `$fShowList` dictionary constructor is itself
+-- overloaded, it should not get an SCC since we avoid instrumenting overloaded
+-- calls that result in dictionaries.
+--
+-- With just -fprof-late-overloaded, only `invoke` should get an SCC, since it
+-- is the only overloaded top level binding. With
+-- `-fprof-late-overloaded-calls`, the calls to both `invoke` and `f` (in the
+-- body of invoke) should get SCCs.
+
+module Main where
+
+{-# NOINLINE invoke #-}
+invoke :: Show a => (Show [a] => [a] -> String) -> a -> String
+invoke f x = f [x]
+
+data X = X
+ deriving Show
+
+main :: IO ()
+main = putStrLn (invoke show X)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.prof.sample
=====================================
@@ -0,0 +1,26 @@
+ Thu Jan 4 11:49 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded-calls001 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 48,320 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 20.5
+CAF GHC.IO.Handle.FD <entire-module> 0.0 71.9
+CAF GHC.IO.Encoding <entire-module> 0.0 5.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.3
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 20.5 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.0 0.0 0.8
+ invoke Main scc-prof-overloaded-calls001.hs:24:1-31 256 1 0.0 0.3 0.0 0.8
+ f Main scc-prof-overloaded-calls001.hs:18:1-18 257 1 0.0 0.6 0.0 0.6
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.3 0.0 1.3
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 5.1 0.0 5.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 71.9 0.0 71.9
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls001.stdout
=====================================
@@ -0,0 +1 @@
+[X]
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.hs
=====================================
@@ -0,0 +1,65 @@
+-- Running this program should result in seven calls to overloaded functions
+-- with increasing numbers of dictionary arguments.
+--
+-- With just -fprof-late-overloaded, no SCCs should be added, since none of the
+-- overloaded functions are top level. With `-fprof-late-overloaded-calls`, all
+-- seven calls should get *distinct* SCCs with separate source locations even
+-- though the overloaded functions share an OccName (`f`).
+
+module Main where
+
+data X = X
+
+instance Show X where
+instance Num X where
+instance Eq X where
+instance Enum X where
+instance Ord X where
+instance Real X where
+instance Integral X where
+
+-- No overloaded call
+{-# NOINLINE invoke0 #-}
+invoke0 :: (forall a. a -> a -> String) -> X -> String
+invoke0 f val = f val val
+
+{-# NOINLINE invoke1 #-}
+invoke1 :: (forall a. Show a => a -> a -> String) -> X -> String
+invoke1 f val = f val val
+
+{-# NOINLINE invoke2 #-}
+invoke2 :: (forall a. (Show a, Num a) => a -> a -> String) -> X -> String
+invoke2 f val = f val val
+
+{-# NOINLINE invoke3 #-}
+invoke3 :: (forall a. (Show a, Num a, Eq a) => a -> a -> String) -> X -> String
+invoke3 f val = f val val
+
+{-# NOINLINE invoke4 #-}
+invoke4 :: (forall a. (Show a, Num a, Eq a, Enum a) => a -> a -> String) -> X -> String
+invoke4 f val = f val val
+
+{-# NOINLINE invoke5 #-}
+invoke5 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a) => a -> a -> String) -> X -> String
+invoke5 f val = f val val
+
+{-# NOINLINE invoke6 #-}
+invoke6 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a) => a -> a -> String) -> X -> String
+invoke6 f val = f val val
+
+{-# NOINLINE invoke7 #-}
+invoke7 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a, Integral a) => a -> a -> String) -> X -> String
+invoke7 f val = f val val
+
+main :: IO ()
+main = do
+ putStrLn $ invoke0 (\_ _ -> s) X
+ putStrLn $ invoke1 (\_ _ -> s) X
+ putStrLn $ invoke2 (\_ _ -> s) X
+ putStrLn $ invoke3 (\_ _ -> s) X
+ putStrLn $ invoke4 (\_ _ -> s) X
+ putStrLn $ invoke5 (\_ _ -> s) X
+ putStrLn $ invoke6 (\_ _ -> s) X
+ putStrLn $ invoke7 (\_ _ -> s) X
+ where
+ s = "wibbly"
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.prof.sample
=====================================
@@ -0,0 +1,31 @@
+ Fri Jan 5 11:06 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded-calls002 +RTS -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 59,152 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 34.8
+CAF GHC.IO.Handle.FD <entire-module> 0.0 58.7
+CAF GHC.IO.Encoding <entire-module> 0.0 4.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.1
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 34.8 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.6 0.0 0.9
+ f Main scc-prof-overloaded-calls002.hs:52:1-25 262 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:48:1-25 261 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:44:1-25 260 1 0.0 0.1 0.0 0.1
+ f Main scc-prof-overloaded-calls002.hs:40:1-25 259 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:36:1-25 258 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:32:1-25 257 1 0.0 0.0 0.0 0.0
+ f Main scc-prof-overloaded-calls002.hs:28:1-25 256 1 0.0 0.0 0.0 0.0
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.1 0.0 1.1
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 4.1 0.0 4.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.3 0.0 0.3
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 58.7 0.0 58.7
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded-calls002.stdout
=====================================
@@ -0,0 +1,8 @@
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.hs
=====================================
@@ -0,0 +1,24 @@
+-- Running this program should result in two calls to overloaded functions: One
+-- with the $fShowX dictionary, the next with the $fShowList dictionary
+-- constructor for X.
+--
+-- Note that although the `$fShowList` dictionary constructor is itself
+-- overloaded, it should not get an SCC since we avoid instrumenting overloaded
+-- calls that result in dictionaries.
+--
+-- With just -fprof-late-overloaded, only `invoke` should get an SCC, since it
+-- is the only overloaded top level binding. With
+-- `-fprof-late-overloaded-calls`, the calls to both `invoke` and `f` (in the
+-- body of invoke) should get SCCs.
+
+module Main where
+
+{-# NOINLINE invoke #-}
+invoke :: Show a => (Show [a] => [a] -> String) -> a -> String
+invoke f x = f [x]
+
+data X = X
+ deriving Show
+
+main :: IO ()
+main = putStrLn (invoke show X)
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.prof.sample
=====================================
@@ -0,0 +1,25 @@
+ Thu Jan 4 11:26 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded001 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 48,304 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 20.5
+CAF GHC.IO.Handle.FD <entire-module> 0.0 71.9
+CAF GHC.IO.Encoding <entire-module> 0.0 5.1
+CAF GHC.Conc.Signal <entire-module> 0.0 1.3
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 20.5 0.0 100.0
+ CAF Main <entire-module> 255 0 0.0 0.0 0.0 0.8
+ invoke Main scc-prof-overloaded001.hs:18:1-6 256 1 0.0 0.8 0.0 0.8
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.3 0.0 1.3
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 5.1 0.0 5.1
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 71.9 0.0 71.9
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded001.stdout
=====================================
@@ -0,0 +1 @@
+[X]
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.hs
=====================================
@@ -0,0 +1,65 @@
+-- Running this program should result in seven calls to overloaded functions
+-- with increasing numbers of dictionary arguments.
+--
+-- With just -fprof-late-overloaded, no SCCs should be added, since none of the
+-- overloaded functions are top level. With `-fprof-late-overloaded-calls`, all
+-- seven calls should get *distinct* SCCs with separate source locations even
+-- though the overloaded functions share an OccName (`f`).
+
+module Main where
+
+data X = X
+
+instance Show X where
+instance Num X where
+instance Eq X where
+instance Enum X where
+instance Ord X where
+instance Real X where
+instance Integral X where
+
+-- No overloaded call
+{-# NOINLINE invoke0 #-}
+invoke0 :: (forall a. a -> a -> String) -> X -> String
+invoke0 f val = f val val
+
+{-# NOINLINE invoke1 #-}
+invoke1 :: (forall a. Show a => a -> a -> String) -> X -> String
+invoke1 f val = f val val
+
+{-# NOINLINE invoke2 #-}
+invoke2 :: (forall a. (Show a, Num a) => a -> a -> String) -> X -> String
+invoke2 f val = f val val
+
+{-# NOINLINE invoke3 #-}
+invoke3 :: (forall a. (Show a, Num a, Eq a) => a -> a -> String) -> X -> String
+invoke3 f val = f val val
+
+{-# NOINLINE invoke4 #-}
+invoke4 :: (forall a. (Show a, Num a, Eq a, Enum a) => a -> a -> String) -> X -> String
+invoke4 f val = f val val
+
+{-# NOINLINE invoke5 #-}
+invoke5 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a) => a -> a -> String) -> X -> String
+invoke5 f val = f val val
+
+{-# NOINLINE invoke6 #-}
+invoke6 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a) => a -> a -> String) -> X -> String
+invoke6 f val = f val val
+
+{-# NOINLINE invoke7 #-}
+invoke7 :: (forall a. (Show a, Num a, Eq a, Enum a, Ord a, Real a, Integral a) => a -> a -> String) -> X -> String
+invoke7 f val = f val val
+
+main :: IO ()
+main = do
+ putStrLn $ invoke0 (\_ _ -> s) X
+ putStrLn $ invoke1 (\_ _ -> s) X
+ putStrLn $ invoke2 (\_ _ -> s) X
+ putStrLn $ invoke3 (\_ _ -> s) X
+ putStrLn $ invoke4 (\_ _ -> s) X
+ putStrLn $ invoke5 (\_ _ -> s) X
+ putStrLn $ invoke6 (\_ _ -> s) X
+ putStrLn $ invoke7 (\_ _ -> s) X
+ where
+ s = "wibbly"
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.prof.sample
=====================================
@@ -0,0 +1,23 @@
+ Thu Jan 4 11:55 2024 Time and Allocation Profiling Report (Final)
+
+ scc-prof-overloaded002 +RTS -hc -p -RTS
+
+ total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
+ total alloc = 56,472 bytes (excludes profiling overheads)
+
+COST CENTRE MODULE SRC %time %alloc
+
+MAIN MAIN <built-in> 0.0 32.7
+CAF GHC.IO.Handle.FD <entire-module> 0.0 61.5
+CAF GHC.IO.Encoding <entire-module> 0.0 4.3
+CAF GHC.Conc.Signal <entire-module> 0.0 1.1
+
+
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+
+MAIN MAIN <built-in> 128 0 0.0 32.7 0.0 100.0
+ CAF GHC.Conc.Signal <entire-module> 238 0 0.0 1.1 0.0 1.1
+ CAF GHC.IO.Encoding <entire-module> 219 0 0.0 4.3 0.0 4.3
+ CAF GHC.IO.Encoding.Iconv <entire-module> 217 0 0.0 0.4 0.0 0.4
+ CAF GHC.IO.Handle.FD <entire-module> 208 0 0.0 61.5 0.0 61.5
=====================================
testsuite/tests/profiling/should_run/scc-prof-overloaded002.stdout
=====================================
@@ -0,0 +1,8 @@
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
+wibbly
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e95a780d3a7be1d5e028cfca982e3f2eee9efcf8
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/e95a780d3a7be1d5e028cfca982e3f2eee9efcf8
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240228/48d3b91d/attachment-0001.html>
More information about the ghc-commits
mailing list