[Git][ghc/ghc][wip/llvm-debug-info] 5 commits: Add flags for switching off speculative evaluation.

Serge S. Gulin (@gulin.serge) gitlab at gitlab.haskell.org
Fri Jan 10 15:15:26 UTC 2025



Serge S. Gulin pushed to branch wip/llvm-debug-info at Glasgow Haskell Compiler / GHC


Commits:
23099752 by Luite Stegeman at 2025-01-08T00:33:33+01:00
Add flags for switching off speculative evaluation.

We found that speculative evaluation can increase the amount of
allocations in some circumstances. This patch adds new flags for
selectively disabling speculative evaluation, allowing us to
test the effect of the optimization.

The new flags are:

  -fspec-eval
     globally enable speculative evaluation

  -fspec-eval-dictfun
     enable speculative evaluation for dictionary functions (no effect
     if speculative evaluation is globally disabled)

The new flags are on by default for all optimisation levels.

See #25284

- - - - -
0161badc by Ben Gamari at 2025-01-09T17:30:05-05:00
rts/printClosure: Print IPE information for thunks and functions

This makes it considerably easier to grok the structure of the heap
when IPE information is available.

- - - - -
7b7239e1 by Ben Gamari at 2025-01-10T12:21:49+03:00
[WIP] llvmGen: Produce debug information metadata for functions

Summary:
It turns out that providing debug information in LLVM is relatively
straightforward. At this moment this only provides debug information with
procedure-level granularity.

Test Plan: Validate, look at DWARF output, try poking around in GDB

Reviewers: scpmw, simonmar, austin

Subscribers: spacekitteh, cocreature, thomie

Differential Revision: https://phabricator.haskell.org/D2343

- - - - -
51f12e32 by Ben Gamari at 2025-01-10T12:21:49+03:00
Fix distinction

- - - - -
62eab54b by Serge S. Gulin at 2025-01-10T12:21:49+03:00
Code cleanup after rebase

- - - - -


22 changed files:

- compiler/GHC/CmmToLlvm.hs
- compiler/GHC/CmmToLlvm/Base.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/CmmToLlvm/Ppr.hs
- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Driver/CodeOutput.hs
- compiler/GHC/Driver/Config/CoreToStg/Prep.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Llvm.hs
- compiler/GHC/Llvm/MetaData.hs
- compiler/GHC/Llvm/Ppr.hs
- compiler/GHC/Llvm/Syntax.hs
- docs/users_guide/using-optimisation.rst
- rts/Printer.c
- + testsuite/tests/core-to-stg/T25284/A.hs
- + testsuite/tests/core-to-stg/T25284/B.hs
- + testsuite/tests/core-to-stg/T25284/Cls.hs
- + testsuite/tests/core-to-stg/T25284/Main.hs
- + testsuite/tests/core-to-stg/T25284/T25284.stdout
- + testsuite/tests/core-to-stg/T25284/all.T


Changes:

=====================================
compiler/GHC/CmmToLlvm.hs
=====================================
@@ -26,13 +26,17 @@ import GHC.CmmToLlvm.Version
 import GHC.StgToCmm.CgUtils ( fixStgRegisters, CgStream )
 import GHC.Cmm
 import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.DebugBlock
 
 import GHC.Types.Unique.DSM
 import GHC.Utils.BufHandle
 import GHC.Driver.DynFlags
+import GHC.Data.FastString
 import GHC.Platform ( platformArch, Arch(..) )
+
+import GHC.Unit.Module.Location
+
 import GHC.Utils.Error
-import GHC.Data.FastString
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Logger
@@ -45,12 +49,12 @@ import System.IO
 -- -----------------------------------------------------------------------------
 -- | Top-level of the LLVM Code generator
 --
-llvmCodeGen :: Logger -> LlvmCgConfig -> Handle
+llvmCodeGen :: Logger -> LlvmCgConfig -> DynFlags -> ModLocation -> Handle
             -> DUniqSupply -- ^ The deterministic uniq supply to run the CgStream.
                            -- See Note [Deterministic Uniques in the CG]
             -> CgStream RawCmmGroup a
             -> IO a
-llvmCodeGen logger cfg h dus cmm_stream
+llvmCodeGen logger cfg dflags location h dus cmm_stream
   = withTiming logger (text "LLVM CodeGen") (const ()) $ do
        bufh <- newBufHandle h
 
@@ -87,22 +91,22 @@ llvmCodeGen logger cfg h dus cmm_stream
 
        -- run code generation
        (a, _) <- runLlvm logger cfg llvm_ver bufh dus $
-         llvmCodeGen' cfg cmm_stream
+         llvmCodeGen' dflags location cfg cmm_stream
 
        bFlush bufh
 
        return a
 
-llvmCodeGen' :: LlvmCgConfig
+llvmCodeGen' :: DynFlags -> ModLocation -> LlvmCgConfig
              -> CgStream RawCmmGroup a -> LlvmM a
-llvmCodeGen' cfg cmm_stream
+llvmCodeGen' dflags location cfg cmm_stream
   = do  -- Preamble
         renderLlvm (llvmHeader cfg) (llvmHeader cfg)
         ghcInternalFunctions
         cmmMetaLlvmPrelude
 
         -- Procedures
-        a <- Stream.consume cmm_stream (GHC.CmmToLlvm.Base.liftUDSMT) (llvmGroupLlvmGens)
+        a <- Stream.consume cmm_stream (GHC.CmmToLlvm.Base.liftUDSMT) (llvmGroupLlvmGens dflags location)
 
         -- Declare aliases for forward references
         decls <- generateExternDecls
@@ -112,8 +116,53 @@ llvmCodeGen' cfg cmm_stream
         -- Postamble
         cmmUsedLlvmGens
 
+        -- Debug metadata
+        debugInfoGen dflags location
+
         return a
 
+debugInfoGen :: DynFlags -> ModLocation -> LlvmM ()
+debugInfoGen dflags location
+  = do  fileMeta <- getMetaUniqueId
+        subprogramsMeta <- getMetaUniqueId
+        cuMeta <- getMetaUniqueId
+        dwarfVersionMeta <- getMetaUniqueId
+        debugInfoVersionMeta <- getMetaUniqueId
+        cfg <- getConfig
+        metaSubs <- getMetaDecls
+        renderLlvm (ppLlvmMetas cfg metaSubs) (ppLlvmMetas cfg metaSubs)
+        subprograms <- getSubprograms
+        let metaHeader =
+              [ MetaUnnamed fileMeta NotDistinct $ MetaDIFile
+                { difFilename     = fsLit $ fromMaybe "TODO" (ml_hs_file location)
+                , difDirectory    = fsLit ""
+                }
+              , MetaUnnamed cuMeta Distinct $ MetaDICompileUnit
+                { dicuLanguage    = fsLit "DW_LANG_Haskell"
+                , dicuFile        = fileMeta
+                , dicuProducer    = fsLit "ghc"
+                , dicuIsOptimized = llvmOptLevel dflags > 0
+                , dicuSubprograms = MetaStruct $ map MetaNode subprograms
+                }
+              , MetaNamed (fsLit "llvm.dbg.cu") NotDistinct [ cuMeta ]
+              , MetaUnnamed subprogramsMeta NotDistinct $ MetaStruct []
+              , MetaNamed (fsLit "llvm.module.flags") NotDistinct
+                [ dwarfVersionMeta
+                , debugInfoVersionMeta
+                ]
+              , MetaUnnamed dwarfVersionMeta NotDistinct $ MetaStruct
+                [ MetaVar $ LMLitVar $ LMIntLit 2 i32
+                , MetaStr $ fsLit "Dwarf Version"
+                , MetaVar $ LMLitVar $ LMIntLit 4 i32
+                ]
+              , MetaUnnamed debugInfoVersionMeta NotDistinct $ MetaStruct
+                [ MetaVar $ LMLitVar $ LMIntLit 2 i32
+                , MetaStr $ fsLit "Debug Info Version"
+                , MetaVar $ LMLitVar $ LMIntLit 3 i32
+                ]
+              ]
+        renderLlvm (ppLlvmMetas cfg metaHeader) (ppLlvmMetas cfg metaHeader)
+
 llvmHeader :: IsDoc doc => LlvmCgConfig -> doc
 llvmHeader cfg =
   let target  = llvmCgLlvmTarget cfg
@@ -133,8 +182,12 @@ llvmHeader cfg =
 {-# SPECIALIZE llvmHeader :: LlvmCgConfig -> SDoc #-}
 {-# SPECIALIZE llvmHeader :: LlvmCgConfig -> HDoc #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
 
-llvmGroupLlvmGens :: RawCmmGroup -> LlvmM ()
-llvmGroupLlvmGens cmm = do
+llvmGroupLlvmGens :: DynFlags -> ModLocation -> RawCmmGroup -> LlvmM ()
+llvmGroupLlvmGens dflags location cmm = do
+        let debug_map :: LabelMap DebugBlock
+            debug_map
+              | (debugLevel dflags) >= 1 = debugToMap $ cmmDebugGen location cmm
+              | otherwise    = mapEmpty
 
         -- Insert functions into map, collect data
         let split (CmmData s d' )     = return $ Just (s, d')
@@ -151,7 +204,7 @@ llvmGroupLlvmGens cmm = do
         {-# SCC "llvm_datas_gen" #-}
           cmmDataLlvmGens cdata
         {-# SCC "llvm_procs_gen" #-}
-          mapM_ cmmLlvmGen cmm
+          mapM_ (cmmLlvmGen debug_map) cmm
 
 -- -----------------------------------------------------------------------------
 -- | Do LLVM code generation on all these Cmms data sections.
@@ -174,8 +227,8 @@ cmmDataLlvmGens statics
                   (pprLlvmData cfg (concat gss', concat tss))
 
 -- | Complete LLVM code generation phase for a single top-level chunk of Cmm.
-cmmLlvmGen ::RawCmmDecl -> LlvmM ()
-cmmLlvmGen cmm at CmmProc{} = do
+cmmLlvmGen :: LabelMap DebugBlock -> RawCmmDecl -> LlvmM ()
+cmmLlvmGen debug_map cmm at CmmProc{} = do
 
     -- rewrite assignments to global regs
     platform <- getPlatform
@@ -190,11 +243,11 @@ cmmLlvmGen cmm at CmmProc{} = do
     -- pretty print - print as we go, since we produce HDocs, we know
     -- no nesting state needs to be maintained for the SDocs.
     forM_ llvmBC (\decl -> do
-        (hdoc, sdoc) <- pprLlvmCmmDecl decl
+        (hdoc, sdoc) <- pprLlvmCmmDecl debug_map decl
         renderLlvm (hdoc $$ empty) (sdoc $$ empty)
       )
 
-cmmLlvmGen _ = return ()
+cmmLlvmGen _ _ = return ()
 
 -- -----------------------------------------------------------------------------
 -- | Generate meta data nodes
@@ -208,7 +261,7 @@ cmmMetaLlvmPrelude = do
     setUniqMeta uniq tbaaId
     parentId <- maybe (return Nothing) getUniqMeta parent
     -- Build definition
-    return $ MetaUnnamed tbaaId $ MetaStruct $
+    return $ MetaUnnamed tbaaId NotDistinct $ MetaStruct $
           case parentId of
               Just p  -> [ MetaStr name, MetaNode p ]
               -- As of LLVM 4.0, a node without parents should be rendered as
@@ -233,18 +286,19 @@ cmmMetaLlvmPrelude = do
   renderLlvm (ppLlvmMetas cfg metas)
              (ppLlvmMetas cfg metas)
 
-mkNamedMeta :: LMString -> [MetaExpr] -> LlvmM [MetaDecl]
-mkNamedMeta name exprs = do
-    (ids, decls) <- unzip <$> mapM f exprs
-    return $ decls ++ [MetaNamed name ids]
   where
-    f expr = do
-      i <- getMetaUniqueId
-      return (i, MetaUnnamed i expr)
-
-mkModuleFlagsMeta :: [ModuleFlag] -> LlvmM [MetaDecl]
-mkModuleFlagsMeta =
-    mkNamedMeta "llvm.module.flags" . map moduleFlagToMetaExpr
+    mkModuleFlagsMeta :: [ModuleFlag] -> LlvmM [MetaDecl]
+    mkModuleFlagsMeta =
+        mkNamedMeta "llvm.module.flags" . map moduleFlagToMetaExpr
+
+    mkNamedMeta :: LMString -> [MetaExpr] -> LlvmM [MetaDecl]
+    mkNamedMeta name exprs = do
+        (ids, decls) <- unzip <$> mapM f exprs
+        return $ decls ++ [MetaNamed name NotDistinct ids]
+      where
+        f expr = do
+          i <- getMetaUniqueId
+          return (i, MetaUnnamed i NotDistinct expr)
 
 mkStackAlignmentMeta :: Integer -> ModuleFlag
 mkStackAlignmentMeta alignment =


=====================================
compiler/GHC/CmmToLlvm/Base.hs
=====================================
@@ -20,6 +20,7 @@ module GHC.CmmToLlvm.Base (
         markStackReg, checkStackReg,
         funLookup, funInsert, getLlvmVer,
         dumpIfSetLlvm, renderLlvm, markUsedVar, getUsedVars,
+        addMetaDecl, getMetaDecls, addSubprogram, getSubprograms,
         ghcInternalFunctions, getPlatform, getConfig,
 
         getMetaUniqueId,
@@ -68,12 +69,13 @@ import Data.List (find, isPrefixOf)
 import qualified Data.List.NonEmpty as NE
 import Data.Ord (comparing)
 import qualified Control.Monad.IO.Class as IO
+import GHC.Cmm.Dataflow.Label (Label)
 
 -- ----------------------------------------------------------------------------
 -- * Some Data Types
 --
 
-type LlvmCmmDecl = GenCmmDecl [LlvmData] (Maybe RawCmmStatics) (ListGraph LlvmStatement)
+type LlvmCmmDecl = GenCmmDecl [LlvmData] (Label, Maybe RawCmmStatics) (ListGraph LlvmStatement)
 type LlvmBasicBlock = GenBasicBlock LlvmStatement
 
 -- | Global registers live on proc entry
@@ -287,6 +289,9 @@ data LlvmEnv = LlvmEnv
   , envFunMap    :: LlvmEnvMap       -- ^ Global functions so far, with type
   , envAliases   :: UniqSet LMString -- ^ Globals that we had to alias, see [Llvm Forward References]
   , envUsedVars  :: [LlvmVar]        -- ^ Pointers to be added to llvm.used (see @cmmUsedLlvmGens@)
+  , envMetaDecls :: [MetaDecl]     -- ^ Metadata declarations to be included in final output
+  , envSubprograms :: [MetaId]     -- ^ 'MetaId's of the @DISubprogram@ metadata
+                                   -- nodes defined in this @DICompileUnit at .
 
     -- the following get cleared for every function (see @withClearVars@)
   , envVarMap    :: LlvmEnvMap       -- ^ Local variables so far, with type
@@ -339,6 +344,8 @@ runLlvm logger cfg ver out us m = do
                       , envVarMap    = emptyUFM
                       , envStackRegs = []
                       , envUsedVars  = []
+                      , envMetaDecls = []
+                      , envSubprograms = []
                       , envAliases   = emptyUniqSet
                       , envVersion   = ver
                       , envConfig    = cfg
@@ -430,6 +437,24 @@ setUniqMeta f m = modifyEnv $ \env -> env { envUniqMeta = addToUFM (envUniqMeta
 getUniqMeta :: Unique -> LlvmM (Maybe MetaId)
 getUniqMeta s = getEnv (flip lookupUFM s . envUniqMeta)
 
+-- | Add a @DISubprogram@ metadata declaration to the current compilation unit.
+addSubprogram :: MetaId -> MetaExpr -> LlvmM ()
+addSubprogram metaId metaExpr = do
+    modifyEnv $ \env -> env { envSubprograms = metaId : envSubprograms env }
+    addMetaDecl (MetaUnnamed metaId Distinct metaExpr)
+
+getSubprograms :: LlvmM [MetaId]
+getSubprograms = LlvmM $ \env -> return (envSubprograms env, env { envSubprograms = [] })
+
+-- | Add a metadata declaration to the output.
+addMetaDecl :: MetaDecl -> LlvmM ()
+addMetaDecl x = modifyEnv $ \env -> env { envMetaDecls = x : envMetaDecls env }
+
+-- | Retreive the list of metadata declarations found in the
+-- current compilation unit.
+getMetaDecls :: LlvmM [MetaDecl]
+getMetaDecls = LlvmM $ \env -> return (envMetaDecls env, env { envMetaDecls = [] })
+
 -- ----------------------------------------------------------------------------
 -- * Internal functions
 --


=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -60,7 +60,7 @@ genLlvmProc (CmmProc infos lbl live graph) = do
 
     (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
     let info = mapLookup (g_entry graph) infos
-        proc = CmmProc info lbl live (ListGraph lmblocks)
+        proc = CmmProc (g_entry graph, info) lbl live (ListGraph lmblocks)
     return (proc:lmdata)
 
 genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
@@ -76,9 +76,9 @@ newtype UnreachableBlockId = UnreachableBlockId BlockId
 
 -- | Generate code for a list of blocks that make up a complete
 -- procedure. The first block in the list is expected to be the entry
--- point.
+-- point and will get the prologue.
 basicBlocksCodeGen :: LiveGlobalRegUses -> [CmmBlock]
-                      -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
+                   -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
 basicBlocksCodeGen _    []                     = panic "no entry block!"
 basicBlocksCodeGen live cmmBlocks
   = do -- Emit the prologue


=====================================
compiler/GHC/CmmToLlvm/Ppr.hs
=====================================
@@ -14,12 +14,18 @@ import GHC.CmmToLlvm.Base
 import GHC.CmmToLlvm.Data
 import GHC.CmmToLlvm.Config
 
-import GHC.Cmm.CLabel
 import GHC.Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Label ( mapLookup, LabelMap )
+import GHC.Cmm.DebugBlock
 
 import GHC.Data.FastString
 import GHC.Utils.Outputable
 import GHC.Types.Unique
+import GHC.Types.SrcLoc
+import GHC.Types.Tickish ( GenTickish(SourceNote) )
+
+import Data.Maybe ( maybeToList )
 
 -- ----------------------------------------------------------------------------
 -- * Top level
@@ -43,13 +49,13 @@ pprLlvmData cfg (globals, types) =
 -- The HDoc we return is used to produce the final LLVM file, with the
 -- SDoc being returned alongside for use when @Opt_D_dump_llvm@ is set
 -- as we can't (currently) dump HDocs.
-pprLlvmCmmDecl :: LlvmCmmDecl -> LlvmM (HDoc, SDoc)
-pprLlvmCmmDecl (CmmData _ lmdata) = do
+pprLlvmCmmDecl :: LabelMap DebugBlock -> LlvmCmmDecl -> LlvmM (HDoc, SDoc)
+pprLlvmCmmDecl _ (CmmData _ lmdata) = do
   opts <- getConfig
   return ( vcat $ map (pprLlvmData opts) lmdata
          , vcat $ map (pprLlvmData opts) lmdata)
 
-pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks))
+pprLlvmCmmDecl debug_map (CmmProc (label, mb_info) entry_lbl live (ListGraph blks))
   = do let lbl = case mb_info of
                      Nothing -> entry_lbl
                      Just (CmmStaticsRaw info_lbl _) -> info_lbl
@@ -74,9 +80,42 @@ pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks))
                        let infoTy = LMStruct $ map getStatType infoStatics
                        return $ Just $ LMStaticStruc infoStatics infoTy
 
+       -- generate debug information metadata
+       subprogAnnot <-
+           case mapLookup label debug_map >>= dblSourceTick of
+             Just (SourceNote span name) -> do
+               let disName = getLexicalFastString name
+               let defName = llvmDefLabel disName
+               subprogMeta <- getMetaUniqueId
+               fileMeta <- getMetaUniqueId
+               typeMeta <- getMetaUniqueId
+               let fileDef = MetaUnnamed fileMeta NotDistinct
+                             $ MetaDIFile { difFilename = srcSpanFile span
+                                          , difDirectory = fsLit "TODO"
+                                          }
+                   typeMetaDef =
+                       MetaUnnamed typeMeta NotDistinct
+                       $ MetaDISubroutineType [MetaVar $ LMLitVar $ LMNullLit i1]
+                   subprog =
+                       MetaDISubprogram { disName         = disName
+                                        , disLinkageName  = defName
+                                        , disScope        = fileMeta
+                                        , disFile         = fileMeta
+                                        , disLine         = srcSpanStartLine span
+                                        , disType         = typeMeta
+                                        , disIsDefinition = True
+                                        }
+               addMetaDecl fileDef
+               addMetaDecl typeMetaDef
+               addSubprogram subprogMeta subprog
+               return $ Just $ MetaAnnot (fsLit "dbg") (MetaNode subprogMeta)
+             _   -> return Nothing
+
+       let funcMetas = maybeToList subprogAnnot
+
 
        let fun = LlvmFunction funDec funArgs llvmStdFunAttrs funSect
-                              prefix lmblocks
+                              prefix funcMetas lmblocks
            name = decName $ funcDecl fun
            defName = llvmDefLabel name
            funcDecl' = (funcDecl fun) { decName = defName }


=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -2051,6 +2051,16 @@ conceptually.
 See also Note [Floats and FloatDecision] for how we maintain whole groups of
 floats and how far they go.
 
+Note [Controlling Speculative Evaluation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most of the time, speculative evaluation has a positive effect on performance,
+but we have found a case where speculative evaluation of dictionary functions
+leads to a performance regression #25284.
+
+Therefore we have some flags to control it. See the optimization section in
+the User's Guide for the description of these flags and when to use them.
+
 Note [Floats and FloatDecision]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We have a special datatype `Floats` for modelling a telescope of `FloatingBind`
@@ -2275,7 +2285,15 @@ mkNonRecFloat env lev bndr rhs
       }
 
     is_hnf      = exprIsHNF rhs
-    ok_for_spec = exprOkForSpecEval (not . is_rec_call) rhs
+    cfg         = cpe_config env
+
+    ok_for_spec = exprOkForSpecEval call_ok_for_spec rhs
+    -- See Note [Controlling Speculative Evaluation]
+    call_ok_for_spec x
+      | is_rec_call x                           = False
+      | not (cp_specEval cfg)                   = False
+      | not (cp_specEvalDFun cfg) && isDFunId x = False
+      | otherwise                               = True
     is_rec_call = (`elemUnVarSet` cpe_rec_ids env)
 
     -- See Note [Pin evaluatedness on floats]
@@ -2517,6 +2535,11 @@ data CorePrepConfig = CorePrepConfig
   -- ^ Configuration for arity analysis ('exprEtaExpandArity').
   -- See Note [Eta expansion of arguments in CorePrep]
   -- When 'Nothing' (e.g., -O0, -O1), use the cheaper 'exprArity' instead
+  , cp_specEval                :: !Bool
+  -- ^ Whether to perform speculative evaluation
+  -- See Note [Controlling Speculative Evaluation]
+  , cp_specEvalDFun            :: !Bool
+  -- ^ Whether to perform speculative evaluation on DFuns
   }
 
 data CorePrepEnv


=====================================
compiler/GHC/Driver/CodeOutput.hs
=====================================
@@ -131,7 +131,7 @@ codeOutput logger tmpfs llvm_config dflags unit_state this_mod filenm location g
                  NcgCodeOutput  -> outputAsm logger dflags this_mod location filenm dus1
                                              final_stream
                  ViaCCodeOutput -> outputC logger dflags filenm dus1 final_stream pkg_deps
-                 LlvmCodeOutput -> outputLlvm logger llvm_config dflags filenm dus1 final_stream
+                 LlvmCodeOutput -> outputLlvm logger llvm_config dflags location filenm dus1 final_stream
                  JSCodeOutput   -> outputJS logger llvm_config dflags filenm final_stream
         ; stubs_exist <- outputForeignStubs logger tmpfs dflags unit_state this_mod location stubs
         ; return (filenm, stubs_exist, foreign_fps, a)
@@ -224,15 +224,15 @@ outputAsm logger dflags this_mod location filenm dus cmm_stream = do
 ************************************************************************
 -}
 
-outputLlvm :: Logger -> LlvmConfigCache -> DynFlags -> FilePath
+outputLlvm :: Logger -> LlvmConfigCache -> DynFlags -> ModLocation -> FilePath
            -> DUniqSupply -- ^ The deterministic uniq supply to run the CgStream
                           -- See Note [Deterministic Uniques in the CG]
            -> CgStream RawCmmGroup a -> IO a
-outputLlvm logger llvm_config dflags filenm dus cmm_stream = do
+outputLlvm logger llvm_config dflags location filenm dus cmm_stream = do
   lcg_config <- initLlvmCgConfig logger llvm_config dflags
   {-# SCC "llvm_output" #-} doOutput filenm $
     \f -> {-# SCC "llvm_CodeGen" #-}
-      llvmCodeGen logger lcg_config f dus cmm_stream
+      llvmCodeGen logger lcg_config dflags location f dus cmm_stream
 
 {-
 ************************************************************************


=====================================
compiler/GHC/Driver/Config/CoreToStg/Prep.hs
=====================================
@@ -24,6 +24,8 @@ initCorePrepConfig hsc_env = do
       , cp_arityOpts = if gopt Opt_DoCleverArgEtaExpansion dflags
                        then Just (initArityOpts dflags)
                        else Nothing
+      , cp_specEval  = gopt Opt_SpecEval dflags
+      , cp_specEvalDFun = gopt Opt_SpecEvalDictFun dflags
       }
 
 initCorePrepPgmConfig :: DynFlags -> [Var] -> CorePrepPgmConfig


=====================================
compiler/GHC/Driver/DynFlags.hs
=====================================
@@ -1287,6 +1287,8 @@ optLevelFlags -- see Note [Documenting optimisation flags]
 --   RegsGraph suffers performance regression. See #7679
 --  , ([2],     Opt_StaticArgumentTransformation)
 --   Static Argument Transformation needs investigation. See #9374
+    , ([0,1,2], Opt_SpecEval)
+    , ([0,1,2], Opt_SpecEvalDictFun)
     ]
 
 


=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -674,6 +674,9 @@ data GeneralFlag
    | Opt_NumConstantFolding
    | Opt_CoreConstantFolding
    | Opt_FastPAPCalls                  -- #6084
+   | Opt_SpecEval
+   | Opt_SpecEvalDictFun   -- See Note [Controlling Speculative Evaluation]
+
 
    -- Inference flags
    | Opt_DoTagInferenceChecks
@@ -912,6 +915,8 @@ optimisationFlags = EnumSet.fromList
    , Opt_WorkerWrapper
    , Opt_WorkerWrapperUnlift
    , Opt_SolveConstantDicts
+   , Opt_SpecEval
+   , Opt_SpecEvalDictFun
    ]
 
 -- | The set of flags which affect code generation and can change a program's


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2544,6 +2544,8 @@ fFlagsDeps = [
   flagSpec "num-constant-folding"             Opt_NumConstantFolding,
   flagSpec "core-constant-folding"            Opt_CoreConstantFolding,
   flagSpec "fast-pap-calls"                   Opt_FastPAPCalls,
+  flagSpec "spec-eval"                        Opt_SpecEval,
+  flagSpec "spec-eval-dictfun"                Opt_SpecEvalDictFun,
   flagSpec "cmm-control-flow"                 Opt_CmmControlFlow,
   flagSpec "show-warning-groups"              Opt_ShowWarnGroups,
   flagSpec "hide-source-paths"                Opt_HideSourcePaths,


=====================================
compiler/GHC/Llvm.hs
=====================================
@@ -42,6 +42,7 @@ module GHC.Llvm (
 
         -- ** Metadata types
         MetaExpr(..), MetaAnnot(..), MetaDecl(..), MetaId(..),
+        Distinction(..),
         -- *** Module flags
         ModuleFlagBehavior(..),
         ModuleFlag(..),


=====================================
compiler/GHC/Llvm/MetaData.hs
=====================================
@@ -10,6 +10,7 @@ module GHC.Llvm.MetaData
   , ModuleFlagBehavior(..)
   , ModuleFlag(..)
   , moduleFlagToMetaExpr
+  , Distinction(..)
   ) where
 
 import GHC.Prelude
@@ -87,6 +88,24 @@ data MetaExpr = MetaStr !LMString
               | MetaNode !MetaId
               | MetaVar !LlvmVar
               | MetaStruct [MetaExpr]
+              | MetaDIFile { difFilename  :: !LMString
+                           , difDirectory :: !LMString
+                           }
+              | MetaDISubroutineType { distType     :: ![MetaExpr] }
+              | MetaDICompileUnit { dicuLanguage    :: !LMString
+                                  , dicuFile        :: !MetaId
+                                  , dicuProducer    :: !LMString
+                                  , dicuIsOptimized :: !Bool
+                                  , dicuSubprograms :: !MetaExpr
+                                  }
+              | MetaDISubprogram { disName          :: !LMString
+                                 , disLinkageName   :: !LMString
+                                 , disScope         :: !MetaId
+                                 , disFile          :: !MetaId
+                                 , disLine          :: !Int
+                                 , disType          :: !MetaId
+                                 , disIsDefinition  :: !Bool
+                                 }
               deriving (Eq)
 
 -- | Associates some metadata with a specific label for attaching to an
@@ -94,14 +113,17 @@ data MetaExpr = MetaStr !LMString
 data MetaAnnot = MetaAnnot LMString MetaExpr
                deriving (Eq)
 
+-- | Is a metadata node @distinct@?
+data Distinction = Distinct | NotDistinct
+
 -- | Metadata declarations. Metadata can only be declared in global scope.
 data MetaDecl
     -- | Named metadata. Only used for communicating module information to
     -- LLVM. ('!name = !{ [!\<n>] }' form).
-    = MetaNamed !LMString [MetaId]
+    = MetaNamed !LMString Distinction [MetaId]
     -- | Metadata node declaration.
     -- ('!0 = metadata !{ \<metadata expression> }' form).
-    | MetaUnnamed !MetaId !MetaExpr
+    | MetaUnnamed !MetaId Distinction !MetaExpr
 
 ----------------------------------------------------------------
 -- Module flags


=====================================
compiler/GHC/Llvm/Ppr.hs
=====================================
@@ -1,6 +1,8 @@
 
 {-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
 
 --------------------------------------------------------------------------------
 -- | Pretty print LLVM IR Code.
@@ -131,16 +133,19 @@ ppLlvmMetas opts metas = lines_ $ map (ppLlvmMeta opts) metas
 
 -- | Print out an LLVM metadata definition.
 ppLlvmMeta :: IsLine doc => LlvmCgConfig -> MetaDecl -> doc
-ppLlvmMeta opts (MetaUnnamed n m)
-  = ppMetaId n <+> equals <+> ppMetaExpr opts m
+ppLlvmMeta opts (MetaUnnamed n d m)
+  = ppMetaId n <+> equals <+> ppDistinction d <+> ppMetaExpr opts m
 
-ppLlvmMeta _opts (MetaNamed n m)
-  = exclamation <> ftext n <+> equals <+> exclamation <> braces nodes
+ppLlvmMeta _opts (MetaNamed n d m)
+  = exclamation <> ftext n <+> equals <+> ppDistinction d <+> exclamation <> braces nodes
   where
     nodes = hcat $ intersperse comma $ map ppMetaId m
 {-# SPECIALIZE ppLlvmMeta :: LlvmCgConfig -> MetaDecl -> SDoc #-}
 {-# SPECIALIZE ppLlvmMeta :: LlvmCgConfig -> MetaDecl -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
 
+ppDistinction :: IsLine doc => Distinction -> doc
+ppDistinction Distinct    = text "distinct"
+ppDistinction NotDistinct = empty
 
 -- | Print out a list of function definitions.
 ppLlvmFunctions :: IsDoc doc => LlvmCgConfig -> LlvmFunctions -> doc
@@ -160,7 +165,7 @@ ppLlvmFunction opts fun =
                         Nothing -> empty
     in vcat
         [line $ text "define" <+> ppLlvmFunctionHeader (funcDecl fun) (funcArgs fun)
-              <+> attrDoc <+> secDoc <+> prefixDoc
+              <+> attrDoc <+> secDoc <+> prefixDoc <+> ppMetaAnnots opts (funcMetadata fun)
         , line lbrace
         , ppLlvmBlocks opts (funcBody fun)
         , line rbrace
@@ -302,6 +307,44 @@ ppMetaExpr opts = \case
   MetaNode   n                     -> ppMetaId n
   MetaVar    v                     -> ppVar opts v
   MetaStruct es                    -> char '!' <> braces (ppCommaJoin (ppMetaExpr opts) es)
+  MetaDIFile {..} ->
+      specialMetadata "DIFile"
+      [ ("filename" , doubleQuotes $ ftext difFilename)
+      , ("directory", doubleQuotes $ ftext difDirectory)
+      ]
+  MetaDISubroutineType {..} ->
+      specialMetadata "DISubroutineType"
+      [ ("types", ppMetaExpr opts $ MetaStruct distType ) ]
+  MetaDICompileUnit {..} ->
+      specialMetadata "DICompileUnit"
+      [ ("language"   , ftext dicuLanguage)
+      , ("file"       , ppMetaId dicuFile)
+      , ("producer"   , doubleQuotes $ ftext dicuProducer)
+      , ("isOptimized", if dicuIsOptimized
+                            then text "true"
+                            else text "false")
+      , ("subprograms", ppMetaExpr opts $ dicuSubprograms)
+      ]
+  MetaDISubprogram {..} ->
+      specialMetadata "DISubprogram"
+      [ ("name"        , doubleQuotes $ ftext disName)
+      , ("linkageName" , doubleQuotes $ ftext disLinkageName)
+      , ("scope"       , ppMetaId disScope)
+      , ("file"        , ppMetaId disFile)
+      , ("line"        , int disLine)
+      , ("type"        , ppMetaId disType)
+      , ("isDefinition", if disIsDefinition
+                              then text "true"
+                              else text "false")
+      ]
+  where
+    specialMetadata :: IsLine doc => String -> [(String, doc)] -> doc
+    specialMetadata nodeName fields =
+        char '!'
+        <> text nodeName
+        <> parens (hsep $ punctuate comma $ map (\(k,v) -> text k <> colon <+> v) fields)
+
+
 {-# SPECIALIZE ppMetaExpr :: LlvmCgConfig -> MetaExpr -> SDoc #-}
 {-# SPECIALIZE ppMetaExpr :: LlvmCgConfig -> MetaExpr -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
 
@@ -589,15 +632,15 @@ ppShuffle opts v1 v2 idxs =
 
 ppMetaAnnotExpr :: IsLine doc => LlvmCgConfig -> [MetaAnnot] -> LlvmExpression -> doc
 ppMetaAnnotExpr opts meta expr =
-   ppLlvmExpression opts expr <> ppMetaAnnots opts meta
+   ppLlvmExpression opts expr <> comma <+> ppMetaAnnots opts meta
 {-# SPECIALIZE ppMetaAnnotExpr :: LlvmCgConfig -> [MetaAnnot] -> LlvmExpression -> SDoc #-}
 {-# SPECIALIZE ppMetaAnnotExpr :: LlvmCgConfig -> [MetaAnnot] -> LlvmExpression -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
 
 ppMetaAnnots :: IsLine doc => LlvmCgConfig -> [MetaAnnot] -> doc
-ppMetaAnnots opts meta = hcat $ map ppMeta meta
+ppMetaAnnots opts meta = hcat $ punctuate comma $ map ppMeta meta
   where
     ppMeta (MetaAnnot name e)
-        = comma <+> exclamation <> ftext name <+>
+        = exclamation <> ftext name <+>
           case e of
             MetaNode n    -> ppMetaId n
             MetaStruct ms -> exclamation <> braces (ppCommaJoin (ppMetaExpr opts) ms)


=====================================
compiler/GHC/Llvm/Syntax.hs
=====================================
@@ -64,6 +64,9 @@ data LlvmFunction = LlvmFunction {
     -- | Prefix data
     funcPrefix    :: Maybe LlvmStatic,
 
+    -- | Function metadata
+    funcMetadata  :: [MetaAnnot],
+
     -- | The body of the functions.
     funcBody      :: LlvmBlocks
   }


=====================================
docs/users_guide/using-optimisation.rst
=====================================
@@ -405,6 +405,55 @@ as such you shouldn't need to set any of them explicitly. A flag
     intermediate language, where it is able to common up some subexpressions
     that differ in their types, but not their representation.
 
+.. ghc-flag:: -fspec-eval
+    :shortdesc: Enables speculative evaluation.
+    :type: dynamic
+    :category:
+    :reverse: -fno-spec-eval
+
+    :default: on
+    :since: 9.14.1
+
+    Enables speculative evaluation which usually results in fewer allocations.
+    Enabling speculative evaluation should not cause performance regressions.
+    If you encounter any, please open a ticket.
+
+    Note that disabling this flag will switch off speculative evaluation
+    completely, causing :ghc-flag:`-fspec-eval-dictfun` to have
+    no effect.
+
+.. ghc-flag:: -fspec-eval-dictfun
+    :shortdesc: Enables speculative evaluation of dictionary functions.
+    :type: dynamic
+    :category:
+    :reverse: -fno-spec-eval-dictfun
+
+    :default: on
+    :since: 9.14.1
+
+    Enables speculative (strict) evaluation of dictionary functions.
+
+    This is best explained with an example ::
+
+        instance C a => D a where ...
+
+        g :: D a => a -> Int
+        g x = ...
+
+        f :: C a => a -> Int
+        f x = g x
+
+    Function `f` has to pass a `D a` dictionary to `g`, and uses a dictionary
+    function `C a => D a` to compute it. If speculative evaluation for
+    dictionary functions is enabled, this dictionary is computed
+    strictly.
+
+    Speculative evalation of dictionary functions can lead to slightly better
+    performance, because a thunk is avoided. However, it results in unnecessary
+    computation and allocation if the dictionary goes unused. This causes
+    a significant increase in allocation if the dictionary is large.
+    See (:ghc-ticket:`25284`).
+
 .. ghc-flag:: -fdicts-cheap
     :shortdesc: Make dictionary-valued expressions seem cheap to the optimiser.
     :type: dynamic


=====================================
rts/Printer.c
=====================================
@@ -151,13 +151,20 @@ printClosure( const StgClosure *obj )
     case FUN_1_0: case FUN_0_1:
     case FUN_1_1: case FUN_0_2: case FUN_2_0:
     case FUN_STATIC:
-        debugBelch("FUN/%d(",(int)itbl_to_fun_itbl(info)->f.arity);
-        printPtr((StgPtr)obj->header.info);
+        {
+            debugBelch("FUN/%d(",(int)itbl_to_fun_itbl(info)->f.arity);
+            printPtr((StgPtr)obj->header.info);
+
+            InfoProvEnt ipe;
+            if (lookupIPE(obj->header.info, &ipe)) {
+                debugBelch(", %s", ipe.prov.table_name);
+            }
 #if defined(PROFILING)
-        debugBelch(", %s", obj->header.prof.ccs->cc->label);
+            debugBelch(", %s", obj->header.prof.ccs->cc->label);
 #endif
-        printStdObjPayload(obj);
-        break;
+            printStdObjPayload(obj);
+            break;
+        }
 
     case PRIM:
         debugBelch("PRIM(");
@@ -175,13 +182,19 @@ printClosure( const StgClosure *obj )
     case THUNK_1_0: case THUNK_0_1:
     case THUNK_1_1: case THUNK_0_2: case THUNK_2_0:
     case THUNK_STATIC:
+        {
             /* ToDo: will this work for THUNK_STATIC too? */
 #if defined(PROFILING)
             printThunkObject((StgThunk *)obj,GET_PROF_DESC(info));
 #else
             printThunkObject((StgThunk *)obj,"THUNK");
+            InfoProvEnt ipe;
+            if (lookupIPE(obj->header.info, &ipe)) {
+                debugBelch(", %s", ipe.prov.table_name);
+            }
 #endif
             break;
+        }
 
     case THUNK_SELECTOR:
         printStdObjHdr(obj, "THUNK_SELECTOR");


=====================================
testsuite/tests/core-to-stg/T25284/A.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fspec-eval-dictfun #-}
+module A (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary strictly because of speculative evaluation
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b


=====================================
testsuite/tests/core-to-stg/T25284/B.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fno-spec-eval-dictfun #-}
+module B (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary lazily
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b


=====================================
testsuite/tests/core-to-stg/T25284/Cls.hs
=====================================
@@ -0,0 +1,40 @@
+{-# LANGUAGE UndecidableInstances #-}
+
+module Cls where
+
+class HasConst a where constVal :: a
+
+instance Cls.HasConst Word where constVal = 123
+
+instance Cls.HasConst Int where constVal = 456
+
+-- this class has a big dictionary
+class HasConst10 a where
+  constA :: a
+  constInt1 :: a -> Int
+  constInt1 _ = 1
+  constInt2 :: a -> Int
+  constInt2 _ = 2
+  constInt3 :: a -> Int
+  constInt3 _ = 3
+  constInt4 :: a -> Int
+  constInt4 _ = 4
+  constInt5 :: a -> Int
+  constInt5 _ = 5
+  constInt6 :: a -> Int
+  constInt6 _ = 6
+  constInt7 :: a -> Int
+  constInt7 _ = 7
+  constInt8 :: a -> Int
+  constInt8 _ = 8
+  constInt9 :: a -> Int
+  constInt9 _ = 9
+
+instance HasConst a => HasConst10 a where
+    constA = constVal
+
+-- this doesn't use the big dictionary most of the time
+printConst :: forall a. (Show a, HasConst10 a)
+           => a -> Int -> IO ()
+printConst x 5000  = print @a constA >> print (constInt8 x)
+printConst _  _    = pure ()


=====================================
testsuite/tests/core-to-stg/T25284/Main.hs
=====================================
@@ -0,0 +1,57 @@
+{-
+
+  This tests that speculative evaluation for dictionary functions works as
+  expected, with a large dictionary that goes unused.
+
+   - Module A: dictfun speculative evaluation enabled
+   - Module B: dictfun speculative evaluation disabled
+
+  Speculative evaluation causes the unused large dictionary to be allocated
+  strictly in module A, so we expect more allocations than in module B.
+
+ -}
+module Main where
+
+import qualified A
+import qualified B
+import qualified Cls
+
+import Data.Word
+import System.Mem (performGC)
+import GHC.Stats
+import Control.Monad
+
+{-# NOINLINE getAllocated #-}
+getAllocated :: IO Word64
+getAllocated = do
+  performGC
+  allocated_bytes <$> getRTSStats
+
+main :: IO ()
+main = do
+    -- warm up (just in case)
+    _       <- testMain A.testX
+    _       <- testMain B.testX
+
+    -- for real
+    a_alloc <- testMain A.testX
+    b_alloc <- testMain B.testX
+
+    -- expect B to allocate less than A
+    let alloc_ratio :: Double
+        alloc_ratio = fromIntegral b_alloc / fromIntegral a_alloc
+    putStrLn ("expected alloc: " ++ show (alloc_ratio < 0.7))
+
+iter :: (Int -> IO ()) -> Int -> Int -> IO ()
+iter m !i !j
+  | i < j = m i >> iter m (i+1) j
+  | otherwise = pure ()
+
+{-# NOINLINE testMain #-}
+testMain :: (forall b. (Show b, Cls.HasConst b) => b -> Int -> IO ())
+         -> IO Word64
+testMain f = do
+  alloc0 <- getAllocated
+  iter (\i -> f (0::Int) i >> f (0::Word) i) 1 100000
+  alloc1 <- getAllocated
+  pure (alloc1 - alloc0)


=====================================
testsuite/tests/core-to-stg/T25284/T25284.stdout
=====================================
@@ -0,0 +1,17 @@
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+expected alloc: True


=====================================
testsuite/tests/core-to-stg/T25284/all.T
=====================================
@@ -0,0 +1,6 @@
+test('T25284',
+  [js_skip, # allocation counters aren't available on the JS backend
+   extra_files(['Main.hs', 'A.hs', 'B.hs', 'Cls.hs']),
+   extra_run_opts('+RTS -T -RTS')],
+  multimod_compile_and_run,
+  ['Main', ''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/2c31db33f83c765d51b535f2ad02745afd5c1d05...62eab54ba3dfedac4e3f7e76d1fc3d8d6d84d25b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/2c31db33f83c765d51b535f2ad02745afd5c1d05...62eab54ba3dfedac4e3f7e76d1fc3d8d6d84d25b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250110/439312cd/attachment-0001.html>


More information about the ghc-commits mailing list