[Git][ghc/ghc][wip/T23012] 3 commits: Handle top-level Addr# literals in the bytecode compiler
Simon Peyton Jones (@simonpj)
gitlab at gitlab.haskell.org
Tue Feb 21 15:45:56 UTC 2023
Simon Peyton Jones pushed to branch wip/T23012 at Glasgow Haskell Compiler / GHC
Commits:
26243de1 by Alexis King at 2023-02-20T15:27:17-05:00
Handle top-level Addr# literals in the bytecode compiler
Fixes #22376.
- - - - -
0196cc2b by romes at 2023-02-20T15:27:52-05:00
fix: Explicitly flush stdout on plugin
Because of #20791, the plugins tests often fail. This is a temporary
fix to stop the tests from failing due to unflushed outputs on windows
and the explicit flush should be removed when #20791 is fixed.
- - - - -
c583f0bd by Simon Peyton Jones at 2023-02-21T15:47:07+00:00
Fix shadowing bug in prepareAlts
As #23012 showed, GHC.CXOre.Opt.Simpllify.Utils.prepareAlts was
using an OutType to construct an InAlt. When shadowing is in play,
this is outright wrong.
See Note [Shadowing in prepareAlts].
- - - - -
19 changed files:
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/ByteCode/Linker.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Core/Opt/Simplify/Iteration.hs
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Linker/Loader.hs
- compiler/GHC/Linker/Types.hs
- compiler/GHC/StgToByteCode.hs
- + testsuite/tests/bytecode/T22376/A.hs
- + testsuite/tests/bytecode/T22376/B.hs
- + testsuite/tests/bytecode/T22376/T22376.hs
- + testsuite/tests/bytecode/T22376/T22376.stdout
- + testsuite/tests/bytecode/T22376/all.T
- testsuite/tests/plugins/all.T
- testsuite/tests/plugins/echo-plugin/Echo.hs
- testsuite/tests/plugins/hooks-plugin/Hooks/Plugin.hs
- + testsuite/tests/simplCore/should_compile/T23012.hs
- testsuite/tests/simplCore/should_compile/all.T
Changes:
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -97,7 +97,7 @@ assembleBCOs
-> Profile
-> [ProtoBCO Name]
-> [TyCon]
- -> [RemotePtr ()]
+ -> AddrEnv
-> Maybe ModBreaks
-> IO CompiledByteCode
assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do
@@ -105,27 +105,40 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do
-- fixed for an interpreter
itblenv <- mkITbls interp profile tycons
bcos <- mapM (assembleBCO (profilePlatform profile)) proto_bcos
- (bcos',ptrs) <- mallocStrings interp bcos
+ bcos' <- mallocStrings interp bcos
return CompiledByteCode
{ bc_bcos = bcos'
, bc_itbls = itblenv
, bc_ffis = concatMap protoBCOFFIs proto_bcos
- , bc_strs = top_strs ++ ptrs
+ , bc_strs = top_strs
, bc_breaks = modbreaks
}
--- Find all the literal strings and malloc them together. We want to
--- do this because:
+-- Note [Allocating string literals]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Our strategy for handling top-level string literal bindings is described in
+-- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode,
+-- but not all Addr# literals in a program are guaranteed to be lifted to the
+-- top level. Our strategy for handling local Addr# literals is somewhat simpler:
+-- after assembling, we find all the BCONPtrStr arguments in the program, malloc
+-- memory for them, and bake the resulting addresses into the instruction stream
+-- in the form of BCONPtrWord arguments.
--
--- a) It should be done when we compile the module, not each time we relink it
--- b) For -fexternal-interpreter It's more efficient to malloc the strings
--- as a single batch message, especially when compiling in parallel.
+-- Since we do this when assembling, we only allocate the memory when we compile
+-- the module, not each time we relink it. However, we do want to take care to
+-- malloc the memory all in one go, since that is more efficient with
+-- -fexternal-interpreter, especially when compiling in parallel.
--
-mallocStrings :: Interp -> [UnlinkedBCO] -> IO ([UnlinkedBCO], [RemotePtr ()])
+-- Note that, as with top-level string literal bindings, this memory is never
+-- freed, so it just leaks if the BCO is unloaded. See Note [Generating code for
+-- top-level string literal bindings] in GHC.StgToByteCode for some discussion
+-- about why.
+--
+mallocStrings :: Interp -> [UnlinkedBCO] -> IO [UnlinkedBCO]
mallocStrings interp ulbcos = do
let bytestrings = reverse (execState (mapM_ collect ulbcos) [])
ptrs <- interpCmd interp (MallocStrings bytestrings)
- return (evalState (mapM splice ulbcos) ptrs, ptrs)
+ return (evalState (mapM splice ulbcos) ptrs)
where
splice bco at UnlinkedBCO{..} = do
lits <- mapM spliceLit unlinkedBCOLits
@@ -162,7 +175,7 @@ assembleOneBCO interp profile pbco = do
-- TODO: the profile should be bundled with the interpreter: the rts ways are
-- fixed for an interpreter
ubco <- assembleBCO (profilePlatform profile) pbco
- ([ubco'], _ptrs) <- mallocStrings interp [ubco]
+ [ubco'] <- mallocStrings interp [ubco]
return ubco'
assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO
@@ -411,6 +424,10 @@ assembleI platform i = case i of
PUSH_UBX lit nws -> do np <- literal lit
emit bci_PUSH_UBX [Op np, SmallOp nws]
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
+ PUSH_ADDR nm -> do np <- lit [BCONPtrAddr nm]
+ emit bci_PUSH_UBX [Op np, SmallOp 1]
+
PUSH_APPLY_N -> emit bci_PUSH_APPLY_N []
PUSH_APPLY_V -> emit bci_PUSH_APPLY_V []
PUSH_APPLY_F -> emit bci_PUSH_APPLY_F []
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -112,6 +112,10 @@ data BCInstr
-- type, and it appears impossible to get hold of the bits of
-- an addr, even though we need to assemble BCOs.
+ -- Push a top-level Addr#. This is a pseudo-instruction assembled to PUSH_UBX,
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ | PUSH_ADDR Name
+
-- various kinds of application
| PUSH_APPLY_N
| PUSH_APPLY_V
@@ -284,6 +288,7 @@ instance Outputable BCInstr where
ppr (PUSH_UBX16 lit) = text "PUSH_UBX16" <+> ppr lit
ppr (PUSH_UBX32 lit) = text "PUSH_UBX32" <+> ppr lit
ppr (PUSH_UBX lit nw) = text "PUSH_UBX" <+> parens (ppr nw) <+> ppr lit
+ ppr (PUSH_ADDR nm) = text "PUSH_ADDR" <+> ppr nm
ppr PUSH_APPLY_N = text "PUSH_APPLY_N"
ppr PUSH_APPLY_V = text "PUSH_APPLY_V"
ppr PUSH_APPLY_F = text "PUSH_APPLY_F"
@@ -397,6 +402,7 @@ bciStackUse (PUSH_UBX8 _) = 1 -- overapproximation
bciStackUse (PUSH_UBX16 _) = 1 -- overapproximation
bciStackUse (PUSH_UBX32 _) = 1 -- overapproximation on 64bit arch
bciStackUse (PUSH_UBX _ nw) = fromIntegral nw
+bciStackUse PUSH_ADDR{} = 1
bciStackUse PUSH_APPLY_N{} = 1
bciStackUse PUSH_APPLY_V{} = 1
bciStackUse PUSH_APPLY_F{} = 1
=====================================
compiler/GHC/ByteCode/Linker.hs
=====================================
@@ -8,10 +8,7 @@
-- | Bytecode assembler and linker
module GHC.ByteCode.Linker
- ( ClosureEnv
- , emptyClosureEnv
- , extendClosureEnv
- , linkBCO
+ ( linkBCO
, lookupStaticPtr
, lookupIE
, nameToCLabel
@@ -35,6 +32,8 @@ import GHC.Unit.Types
import GHC.Data.FastString
import GHC.Data.SizedSeq
+import GHC.Linker.Types
+
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.Outputable
@@ -53,45 +52,34 @@ import GHC.Exts
Linking interpretables into something we can run
-}
-type ClosureEnv = NameEnv (Name, ForeignHValue)
-
-emptyClosureEnv :: ClosureEnv
-emptyClosureEnv = emptyNameEnv
-
-extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv
-extendClosureEnv cl_env pairs
- = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs]
-
-{-
- Linking interpretables into something we can run
--}
-
linkBCO
:: Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> NameEnv Int
-> RemoteRef BreakArray
-> UnlinkedBCO
-> IO ResolvedBCO
-linkBCO interp ie ce bco_ix breakarray
+linkBCO interp le bco_ix breakarray
(UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do
-- fromIntegral Word -> Word64 should be a no op if Word is Word64
-- otherwise it will result in a cast to longlong on 32bit systems.
- lits <- mapM (fmap fromIntegral . lookupLiteral interp ie) (ssElts lits0)
- ptrs <- mapM (resolvePtr interp ie ce bco_ix breakarray) (ssElts ptrs0)
+ lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (ssElts lits0)
+ ptrs <- mapM (resolvePtr interp le bco_ix breakarray) (ssElts ptrs0)
return (ResolvedBCO isLittleEndian arity insns bitmap
(listArray (0, fromIntegral (sizeSS lits0)-1) lits)
(addListToSS emptySS ptrs))
-lookupLiteral :: Interp -> ItblEnv -> BCONPtr -> IO Word
-lookupLiteral interp ie ptr = case ptr of
+lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word
+lookupLiteral interp le ptr = case ptr of
BCONPtrWord lit -> return lit
BCONPtrLbl sym -> do
Ptr a# <- lookupStaticPtr interp sym
return (W# (int2Word# (addr2Int# a#)))
BCONPtrItbl nm -> do
- Ptr a# <- lookupIE interp ie nm
+ Ptr a# <- lookupIE interp (itbl_env le) nm
+ return (W# (int2Word# (addr2Int# a#)))
+ BCONPtrAddr nm -> do
+ Ptr a# <- lookupAddr interp (addr_env le) nm
return (W# (int2Word# (addr2Int# a#)))
BCONPtrStr _ ->
-- should be eliminated during assembleBCOs
@@ -124,6 +112,20 @@ lookupIE interp ie con_nm =
(unpackFS sym_to_find1 ++ " or " ++
unpackFS sym_to_find2)
+-- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
+lookupAddr :: Interp -> AddrEnv -> Name -> IO (Ptr ())
+lookupAddr interp ae addr_nm = do
+ case lookupNameEnv ae addr_nm of
+ Just (_, AddrPtr ptr) -> return (fromRemotePtr ptr)
+ Nothing -> do -- try looking up in the object files.
+ let sym_to_find = nameToCLabel addr_nm "bytes"
+ -- see Note [Bytes label] in GHC.Cmm.CLabel
+ m <- lookupSymbol interp sym_to_find
+ case m of
+ Just ptr -> return ptr
+ Nothing -> linkFail "GHC.ByteCode.Linker.lookupAddr"
+ (unpackFS sym_to_find)
+
lookupPrimOp :: Interp -> PrimOp -> IO (RemotePtr ())
lookupPrimOp interp primop = do
let sym_to_find = primopToCLabel primop "closure"
@@ -134,18 +136,17 @@ lookupPrimOp interp primop = do
resolvePtr
:: Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> NameEnv Int
-> RemoteRef BreakArray
-> BCOPtr
-> IO ResolvedBCOPtr
-resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of
+resolvePtr interp le bco_ix breakarray ptr = case ptr of
BCOPtrName nm
| Just ix <- lookupNameEnv bco_ix nm
-> return (ResolvedBCORef ix) -- ref to another BCO in this group
- | Just (_, rhv) <- lookupNameEnv ce nm
+ | Just (_, rhv) <- lookupNameEnv (closure_env le) nm
-> return (ResolvedBCOPtr (unsafeForeignRefToRemoteRef rhv))
| otherwise
@@ -161,7 +162,7 @@ resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of
-> ResolvedBCOStaticPtr <$> lookupPrimOp interp op
BCOPtrBCO bco
- -> ResolvedBCOPtrBCO <$> linkBCO interp ie ce bco_ix breakarray bco
+ -> ResolvedBCOPtrBCO <$> linkBCO interp le bco_ix breakarray bco
BCOPtrBreakArray
-> return (ResolvedBCOPtrBreakArray breakarray)
=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -14,6 +14,7 @@ module GHC.ByteCode.Types
, ByteOff(..), WordOff(..)
, UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
, ItblEnv, ItblPtr(..)
+ , AddrEnv, AddrPtr(..)
, CgBreakInfo(..)
, ModBreaks (..), BreakIndex, emptyModBreaks
, CCostCentre
@@ -51,7 +52,7 @@ data CompiledByteCode = CompiledByteCode
{ bc_bcos :: [UnlinkedBCO] -- Bunch of interpretable bindings
, bc_itbls :: ItblEnv -- A mapping from DataCons to their itbls
, bc_ffis :: [FFIInfo] -- ffi blocks we allocated
- , bc_strs :: [RemotePtr ()] -- malloc'd strings
+ , bc_strs :: AddrEnv -- malloc'd top-level strings
, bc_breaks :: Maybe ModBreaks -- breakpoint info (Nothing if we're not
-- creating breakpoints, for some reason)
}
@@ -69,7 +70,7 @@ seqCompiledByteCode CompiledByteCode{..} =
rnf bc_bcos `seq`
seqEltsNameEnv rnf bc_itbls `seq`
rnf bc_ffis `seq`
- rnf bc_strs `seq`
+ seqEltsNameEnv rnf bc_strs `seq`
rnf (fmap seqModBreaks bc_breaks)
newtype ByteOff = ByteOff Int
@@ -131,11 +132,14 @@ voidPrimCallInfo :: NativeCallInfo
voidPrimCallInfo = NativeCallInfo NativePrimCall 0 emptyRegSet 0
type ItblEnv = NameEnv (Name, ItblPtr)
+type AddrEnv = NameEnv (Name, AddrPtr)
-- We need the Name in the range so we know which
-- elements to filter out when unloading a module
newtype ItblPtr = ItblPtr (RemotePtr Heap.StgInfoTable)
deriving (Show, NFData)
+newtype AddrPtr = AddrPtr (RemotePtr ())
+ deriving (NFData)
data UnlinkedBCO
= UnlinkedBCO {
@@ -166,6 +170,12 @@ data BCONPtr
= BCONPtrWord {-# UNPACK #-} !Word
| BCONPtrLbl !FastString
| BCONPtrItbl !Name
+ -- | A reference to a top-level string literal; see
+ -- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ | BCONPtrAddr !Name
+ -- | Only used internally in the assembler in an intermediate representation;
+ -- should never appear in a fully-assembled UnlinkedBCO.
+ -- Also see Note [Allocating string literals] in GHC.ByteCode.Asm.
| BCONPtrStr !ByteString
instance NFData BCONPtr where
=====================================
compiler/GHC/Core/Opt/Simplify/Iteration.hs
=====================================
@@ -3224,9 +3224,11 @@ simplAlts env0 scrut case_bndr alts cont'
; (alt_env', scrut', case_bndr') <- improveSeq fam_envs env2 scrut
case_bndr case_bndr2 alts
- ; (imposs_deflt_cons, in_alts) <- prepareAlts scrut' case_bndr' alts
+ ; (imposs_deflt_cons, in_alts) <- prepareAlts scrut' case_bndr alts
-- NB: it's possible that the returned in_alts is empty: this is handled
- -- by the caller (rebuildCase) in the missingAlt function
+ -- by the caller (rebuildCase) in the missingAlt function
+ -- NB: pass case_bndr::InId, not case_bndr' :: OutId, to prepareAlts
+ -- See Note [Shadowing in prepareAlts] in GHC.Core.Opt.Simplify.Utils
; alts' <- mapM (simplAlt alt_env' (Just scrut') imposs_deflt_cons case_bndr' cont') in_alts
-- ; pprTrace "simplAlts" (ppr case_bndr $$ ppr alts $$ ppr cont') $ return ()
=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -2270,26 +2270,37 @@ h y = case y of
If we inline h into f, the default case of the inlined h can't happen.
If we don't notice this, we may end up filtering out *all* the cases
of the inner case y, which give us nowhere to go!
+
+Note [Shadowing in prepareAlts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note that we pass case_bndr::InId to prepareAlts; an /InId/, not an
+/OutId/. This is vital, because `refineDefaultAlt` uses `tys` to build
+a new /InAlt/. If you pass an OutId, we'll end up appling the
+substitution twice: disaster (#23012).
+
+However this does mean that filling in the default alt might be
+delayed by a simplifier cycle, because an InId has less info than an
+OutId. Test simplCore/should_compile/simpl013 apparently shows this
+up, although I'm not sure exactly how..
-}
-prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts :: OutExpr -> InId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-- The returned alternatives can be empty, none are possible
-prepareAlts scrut case_bndr' alts
- | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
- -- Case binder is needed just for its type. Note that as an
- -- OutId, it has maximum information; this is important.
- -- Test simpl013 is an example
+--
+-- Note that case_bndr is an InId; see Note [Shadowing in prepareAlts]
+prepareAlts scrut case_bndr alts
+ | Just (tc, tys) <- splitTyConApp_maybe (idType case_bndr)
= do { us <- getUniquesM
- ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
- (yes2, alts2) = refineDefaultAlt us (idMult case_bndr') tc tys idcs1 alts1
- -- the multiplicity on case_bndr's is the multiplicity of the
+ ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
+ (yes2, alts2) = refineDefaultAlt us (idMult case_bndr) tc tys idcs1 alts1
+ -- The multiplicity on case_bndr's is the multiplicity of the
-- case expression The newly introduced patterns in
-- refineDefaultAlt must be scaled by this multiplicity
(yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
-- "idcs" stands for "impossible default data constructors"
-- i.e. the constructors that can't match the default case
- ; when yes2 $ tick (FillInCaseDefault case_bndr')
- ; when yes3 $ tick (AltMerge case_bndr')
+ ; when yes2 $ tick (FillInCaseDefault case_bndr)
+ ; when yes3 $ tick (AltMerge case_bndr)
; return (idcs3, alts3) }
| otherwise -- Not a data type, so nothing interesting happens
=====================================
compiler/GHC/Linker/Loader.hs
=====================================
@@ -141,8 +141,11 @@ getLoaderState interp = readMVar (loader_state (interpLoader interp))
emptyLoaderState :: LoaderState
emptyLoaderState = LoaderState
- { closure_env = emptyNameEnv
- , itbl_env = emptyNameEnv
+ { linker_env = LinkerEnv
+ { closure_env = emptyNameEnv
+ , itbl_env = emptyNameEnv
+ , addr_env = emptyNameEnv
+ }
, pkgs_loaded = init_pkgs
, bcos_loaded = emptyModuleEnv
, objs_loaded = emptyModuleEnv
@@ -157,17 +160,16 @@ emptyLoaderState = LoaderState
extendLoadedEnv :: Interp -> [(Name,ForeignHValue)] -> IO ()
extendLoadedEnv interp new_bindings =
- modifyLoaderState_ interp $ \pls at LoaderState{..} -> do
- let new_ce = extendClosureEnv closure_env new_bindings
- return $! pls{ closure_env = new_ce }
+ modifyLoaderState_ interp $ \pls -> do
+ return $! modifyClosureEnv pls $ \ce ->
+ extendClosureEnv ce new_bindings
-- strictness is important for not retaining old copies of the pls
deleteFromLoadedEnv :: Interp -> [Name] -> IO ()
deleteFromLoadedEnv interp to_remove =
modifyLoaderState_ interp $ \pls -> do
- let ce = closure_env pls
- let new_ce = delListFromNameEnv ce to_remove
- return pls{ closure_env = new_ce }
+ return $ modifyClosureEnv pls $ \ce ->
+ delListFromNameEnv ce to_remove
-- | Load the module containing the given Name and get its associated 'HValue'.
--
@@ -185,7 +187,7 @@ loadName interp hsc_env name = do
then throwGhcExceptionIO (ProgramError "")
else return (pls', links, pkgs)
- case lookupNameEnv (closure_env pls) name of
+ case lookupNameEnv (closure_env (linker_env pls)) name of
Just (_,aa) -> return (pls,(aa, links, pkgs))
Nothing -> assertPpr (isExternalName name) (ppr name) $
do let sym_to_find = nameToCLabel name "closure"
@@ -247,10 +249,7 @@ withExtendedLoadedEnv interp new_env action
-- package), so the reset action only removes the names we
-- added earlier.
reset_old_env = liftIO $
- modifyLoaderState_ interp $ \pls ->
- let cur = closure_env pls
- new = delListFromNameEnv cur (map fst new_env)
- in return pls{ closure_env = new }
+ deleteFromLoadedEnv interp (map fst new_env)
-- | Display the loader state.
@@ -594,13 +593,11 @@ loadExpr interp hsc_env span root_ul_bco = do
then throwGhcExceptionIO (ProgramError "")
else do
-- Load the expression itself
- let ie = itbl_env pls
- ce = closure_env pls
-
-- Load the necessary packages and linkables
- let nobreakarray = error "no break array"
+ let le = linker_env pls
+ nobreakarray = error "no break array"
bco_ix = mkNameEnv [(unlinkedBCOName root_ul_bco, 0)]
- resolved <- linkBCO interp ie ce bco_ix nobreakarray root_ul_bco
+ resolved <- linkBCO interp le bco_ix nobreakarray root_ul_bco
bco_opts <- initBCOOpts (hsc_dflags hsc_env)
[root_hvref] <- createBCOs interp bco_opts [resolved]
fhv <- mkFinalizedHValue interp root_hvref
@@ -944,15 +941,16 @@ loadDecls interp hsc_env span cbc at CompiledByteCode{..} = do
then throwGhcExceptionIO (ProgramError "")
else do
-- Link the expression itself
- let ie = plusNameEnv (itbl_env pls) bc_itbls
- ce = closure_env pls
+ let le = linker_env pls
+ le2 = le { itbl_env = plusNameEnv (itbl_env le) bc_itbls
+ , addr_env = plusNameEnv (addr_env le) bc_strs }
-- Link the necessary packages and linkables
bco_opts <- initBCOOpts (hsc_dflags hsc_env)
- new_bindings <- linkSomeBCOs bco_opts interp ie ce [cbc]
+ new_bindings <- linkSomeBCOs bco_opts interp le2 [cbc]
nms_fhvs <- makeForeignNamedHValueRefs interp new_bindings
- let pls2 = pls { closure_env = extendClosureEnv ce nms_fhvs
- , itbl_env = ie }
+ let ce2 = extendClosureEnv (closure_env le2) nms_fhvs
+ !pls2 = pls { linker_env = le2 { closure_env = ce2 } }
return (pls2, (nms_fhvs, links_needed, units_needed))
where
free_names = uniqDSetToList $
@@ -1170,11 +1168,12 @@ dynLinkBCOs bco_opts interp pls bcos = do
cbcs = concatMap byteCodeOfObject unlinkeds
- ies = map bc_itbls cbcs
- gce = closure_env pls
- final_ie = foldr plusNameEnv (itbl_env pls) ies
+ le1 = linker_env pls
+ ie2 = foldr plusNameEnv (itbl_env le1) (map bc_itbls cbcs)
+ ae2 = foldr plusNameEnv (addr_env le1) (map bc_strs cbcs)
+ le2 = le1 { itbl_env = ie2, addr_env = ae2 }
- names_and_refs <- linkSomeBCOs bco_opts interp final_ie gce cbcs
+ names_and_refs <- linkSomeBCOs bco_opts interp le2 cbcs
-- We only want to add the external ones to the ClosureEnv
let (to_add, to_drop) = partition (isExternalName.fst) names_and_refs
@@ -1184,21 +1183,20 @@ dynLinkBCOs bco_opts interp pls bcos = do
-- Wrap finalizers on the ones we want to keep
new_binds <- makeForeignNamedHValueRefs interp to_add
- return pls1 { closure_env = extendClosureEnv gce new_binds,
- itbl_env = final_ie }
+ let ce2 = extendClosureEnv (closure_env le2) new_binds
+ return $! pls1 { linker_env = le2 { closure_env = ce2 } }
-- Link a bunch of BCOs and return references to their values
linkSomeBCOs :: BCOOpts
-> Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> [CompiledByteCode]
-> IO [(Name,HValueRef)]
-- The returned HValueRefs are associated 1-1 with
-- the incoming unlinked BCOs. Each gives the
-- value of the corresponding unlinked BCO
-linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods []
+linkSomeBCOs bco_opts interp le mods = foldr fun do_link mods []
where
fun CompiledByteCode{..} inner accum =
case bc_breaks of
@@ -1211,7 +1209,7 @@ linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods []
let flat = [ (breakarray, bco) | (breakarray, bcos) <- mods, bco <- bcos ]
names = map (unlinkedBCOName . snd) flat
bco_ix = mkNameEnv (zip names [0..])
- resolved <- sequence [ linkBCO interp ie ce bco_ix breakarray bco
+ resolved <- sequence [ linkBCO interp le bco_ix breakarray bco
| (breakarray, bco) <- flat ]
hvrefs <- createBCOs interp bco_opts resolved
return (zip names hvrefs)
@@ -1301,15 +1299,11 @@ unload_wkr interp keep_linkables pls at LoaderState{..} = do
let -- Note that we want to remove all *local*
-- (i.e. non-isExternal) names too (these are the
-- temporary bindings from the command line).
- keep_name :: (Name, a) -> Bool
- keep_name (n,_) = isExternalName n &&
- nameModule n `elemModuleEnv` remaining_bcos_loaded
-
- itbl_env' = filterNameEnv keep_name itbl_env
- closure_env' = filterNameEnv keep_name closure_env
+ keep_name :: Name -> Bool
+ keep_name n = isExternalName n &&
+ nameModule n `elemModuleEnv` remaining_bcos_loaded
- !new_pls = pls { itbl_env = itbl_env',
- closure_env = closure_env',
+ !new_pls = pls { linker_env = filterLinkerEnv keep_name linker_env,
bcos_loaded = remaining_bcos_loaded,
objs_loaded = remaining_objs_loaded }
=====================================
compiler/GHC/Linker/Types.hs
=====================================
@@ -10,6 +10,12 @@ module GHC.Linker.Types
( Loader (..)
, LoaderState (..)
, uninitializedLoader
+ , modifyClosureEnv
+ , LinkerEnv(..)
+ , filterLinkerEnv
+ , ClosureEnv
+ , emptyClosureEnv
+ , extendClosureEnv
, Linkable(..)
, LinkableSet
, mkLinkableSet
@@ -32,12 +38,12 @@ where
import GHC.Prelude
import GHC.Unit ( UnitId, Module )
-import GHC.ByteCode.Types ( ItblEnv, CompiledByteCode )
+import GHC.ByteCode.Types ( ItblEnv, AddrEnv, CompiledByteCode )
import GHC.Fingerprint.Type ( Fingerprint )
import GHCi.RemoteTypes ( ForeignHValue )
import GHC.Types.Var ( Id )
-import GHC.Types.Name.Env ( NameEnv )
+import GHC.Types.Name.Env ( NameEnv, emptyNameEnv, extendNameEnvList, filterNameEnv )
import GHC.Types.Name ( Name )
import GHC.Utils.Outputable
@@ -67,23 +73,16 @@ serves to ensure mutual exclusion between multiple loaded copies of the GHC
library. The Maybe may be Nothing to indicate that the linker has not yet been
initialised.
-The LoaderState maps Names to actual closures (for interpreted code only), for
+The LinkerEnv maps Names to actual closures (for interpreted code only), for
use during linking.
-}
newtype Loader = Loader { loader_state :: MVar (Maybe LoaderState) }
data LoaderState = LoaderState
- { closure_env :: ClosureEnv
+ { linker_env :: !LinkerEnv
-- ^ Current global mapping from Names to their true values
- , itbl_env :: !ItblEnv
- -- ^ The current global mapping from RdrNames of DataCons to
- -- info table addresses.
- -- When a new Unlinked is linked into the running image, or an existing
- -- module in the image is replaced, the itbl_env must be updated
- -- appropriately.
-
, bcos_loaded :: !LinkableSet
-- ^ The currently loaded interpreted modules (home package)
@@ -102,7 +101,44 @@ data LoaderState = LoaderState
uninitializedLoader :: IO Loader
uninitializedLoader = Loader <$> newMVar Nothing
+modifyClosureEnv :: LoaderState -> (ClosureEnv -> ClosureEnv) -> LoaderState
+modifyClosureEnv pls f =
+ let le = linker_env pls
+ ce = closure_env le
+ in pls { linker_env = le { closure_env = f ce } }
+
+data LinkerEnv = LinkerEnv
+ { closure_env :: !ClosureEnv
+ -- ^ Current global mapping from closure Names to their true values
+
+ , itbl_env :: !ItblEnv
+ -- ^ The current global mapping from RdrNames of DataCons to
+ -- info table addresses.
+ -- When a new Unlinked is linked into the running image, or an existing
+ -- module in the image is replaced, the itbl_env must be updated
+ -- appropriately.
+
+ , addr_env :: !AddrEnv
+ -- ^ Like 'closure_env' and 'itbl_env', but for top-level 'Addr#' literals,
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ }
+
+filterLinkerEnv :: (Name -> Bool) -> LinkerEnv -> LinkerEnv
+filterLinkerEnv f le = LinkerEnv
+ { closure_env = filterNameEnv (f . fst) (closure_env le)
+ , itbl_env = filterNameEnv (f . fst) (itbl_env le)
+ , addr_env = filterNameEnv (f . fst) (addr_env le)
+ }
+
type ClosureEnv = NameEnv (Name, ForeignHValue)
+
+emptyClosureEnv :: ClosureEnv
+emptyClosureEnv = emptyNameEnv
+
+extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv
+extendClosureEnv cl_env pairs
+ = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs]
+
type PkgsLoaded = UniqDFM UnitId LoadedPkgInfo
data LoadedPkgInfo
=====================================
compiler/GHC/StgToByteCode.hs
=====================================
@@ -43,6 +43,7 @@ import GHC.Types.Literal
import GHC.Builtin.PrimOps
import GHC.Builtin.PrimOps.Ids (primOpId)
import GHC.Core.Type
+import GHC.Core.TyCo.Compare (eqType)
import GHC.Types.RepType
import GHC.Core.DataCon
import GHC.Core.TyCon
@@ -64,7 +65,7 @@ import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes)
import GHC.Data.Bitmap
import GHC.Data.OrdList
import GHC.Data.Maybe
-import GHC.Types.Var.Env
+import GHC.Types.Name.Env (mkNameEnv)
import GHC.Types.Tickish
import Data.List ( genericReplicate, genericLength, intersperse
@@ -105,7 +106,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
(text "GHC.StgToByteCode"<+>brackets (ppr this_mod))
(const ()) $ do
-- Split top-level binds into strings and others.
- -- See Note [generating code for top-level string literal bindings].
+ -- See Note [Generating code for top-level string literal bindings].
let (strings, lifted_binds) = partitionEithers $ do -- list monad
bnd <- binds
case bnd of
@@ -116,7 +117,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
stringPtrs <- allocateTopStrings interp strings
(BcM_State{..}, proto_bcos) <-
- runBc hsc_env this_mod mb_modBreaks (mkVarEnv stringPtrs) $ do
+ runBc hsc_env this_mod mb_modBreaks $ do
let flattened_binds = concatMap flattenBind (reverse lifted_binds)
mapM schemeTopBind flattened_binds
@@ -127,7 +128,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
"Proto-BCOs" FormatByteCode
(vcat (intersperse (char ' ') (map ppr proto_bcos)))
- cbc <- assembleBCOs interp profile proto_bcos tycs (map snd stringPtrs)
+ cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs
(case modBreaks of
Nothing -> Nothing
Just mb -> Just mb{ modBreaks_breakInfo = breakInfo })
@@ -147,28 +148,49 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
interp = hscInterp hsc_env
profile = targetProfile dflags
+-- | see Note [Generating code for top-level string literal bindings]
allocateTopStrings
:: Interp
-> [(Id, ByteString)]
- -> IO [(Var, RemotePtr ())]
+ -> IO AddrEnv
allocateTopStrings interp topStrings = do
let !(bndrs, strings) = unzip topStrings
ptrs <- interpCmd interp $ MallocStrings strings
- return $ zip bndrs ptrs
-
-{-
-Note [generating code for top-level string literal bindings]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Here is a summary on how the byte code generator deals with top-level string
-literals:
-
-1. Top-level string literal bindings are separated from the rest of the module.
-
-2. The strings are allocated via interpCmd, in allocateTopStrings
-
-3. The mapping from binders to allocated strings (topStrings) are maintained in
- BcM and used when generating code for variable references.
--}
+ return $ mkNameEnv (zipWith mk_entry bndrs ptrs)
+ where
+ mk_entry bndr ptr = let nm = getName bndr
+ in (nm, (nm, AddrPtr ptr))
+
+{- Note [Generating code for top-level string literal bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As described in Note [Compilation plan for top-level string literals]
+in GHC.Core, the core-to-core optimizer can introduce top-level Addr#
+bindings to represent string literals. The creates two challenges for
+the bytecode compiler: (1) compiling the bindings themselves, and
+(2) compiling references to such bindings. Here is a summary on how
+we deal with them:
+
+ 1. Top-level string literal bindings are separated from the rest of
+ the module. Memory for them is allocated immediately, via
+ interpCmd, in allocateTopStrings, and the resulting AddrEnv is
+ recorded in the bc_strs field of the CompiledByteCode result.
+
+ 2. When we encounter a reference to a top-level string literal, we
+ generate a PUSH_ADDR pseudo-instruction, which is assembled to
+ a PUSH_UBX instruction with a BCONPtrAddr argument.
+
+ 3. The loader accumulates string literal bindings from loaded
+ bytecode in the addr_env field of the LinkerEnv.
+
+ 4. The BCO linker resolves BCONPtrAddr references by searching both
+ the addr_env (to find literals defined in bytecode) and the native
+ symbol table (to find literals defined in native code).
+
+This strategy works alright, but it does have one significant problem:
+we never free the memory that we allocate for the top-level strings.
+In theory, we could explicitly free it when BCOs are unloaded, but
+this comes with its own complications; see #22400 for why. For now,
+we just accept the leak, but it would nice to find something better. -}
-- -----------------------------------------------------------------------------
-- Compilation schema for the bytecode generator
@@ -1774,26 +1796,25 @@ pushAtom d p (StgVarArg var)
-- slots on to the top of the stack.
| otherwise -- var must be a global variable
- = do topStrings <- getTopStrings
- platform <- targetPlatform <$> getDynFlags
- case lookupVarEnv topStrings var of
- Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $
- fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr
- Nothing
- -- PUSH_G doesn't tag constructors. So we use PACK here
- -- if we are dealing with nullary constructor.
- | Just con <- isDataConWorkId_maybe var
- -> do
- massert (sz == wordSize platform)
- massert (isNullaryRepDataCon con)
- return (unitOL (PACK con 0), sz)
- | otherwise
- -> do
- let
- massert (sz == wordSize platform)
- return (unitOL (PUSH_G (getName var)), sz)
- where
- !sz = idSizeCon platform var
+ = do platform <- targetPlatform <$> getDynFlags
+ let !szb = idSizeCon platform var
+ massert (szb == wordSize platform)
+
+ -- PUSH_G doesn't tag constructors. So we use PACK here
+ -- if we are dealing with nullary constructor.
+ case isDataConWorkId_maybe var of
+ Just con -> do
+ massert (isNullaryRepDataCon con)
+ return (unitOL (PACK con 0), szb)
+
+ Nothing
+ -- see Note [Generating code for top-level string literal bindings]
+ | isUnliftedType (idType var) -> do
+ massert (idType var `eqType` addrPrimTy)
+ return (unitOL (PUSH_ADDR (getName var)), szb)
+
+ | otherwise -> do
+ return (unitOL (PUSH_G (getName var)), szb)
pushAtom _ _ (StgLitArg lit) = pushLiteral True lit
@@ -2162,8 +2183,6 @@ data BcM_State
-- Should be free()d when it is GCd
, modBreaks :: Maybe ModBreaks -- info about breakpoints
, breakInfo :: IntMap CgBreakInfo
- , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals
- -- See Note [generating code for top-level string literal bindings].
}
newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) deriving (Functor)
@@ -2174,11 +2193,10 @@ ioToBc io = BcM $ \st -> do
return (st, x)
runBc :: HscEnv -> Module -> Maybe ModBreaks
- -> IdEnv (RemotePtr ())
-> BcM r
-> IO (BcM_State, r)
-runBc hsc_env this_mod modBreaks topStrings (BcM m)
- = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty topStrings)
+runBc hsc_env this_mod modBreaks (BcM m)
+ = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty)
thenBc :: BcM a -> (a -> BcM b) -> BcM b
thenBc (BcM expr) cont = BcM $ \st0 -> do
@@ -2247,8 +2265,5 @@ newBreakInfo ix info = BcM $ \st ->
getCurrentModule :: BcM Module
getCurrentModule = BcM $ \st -> return (st, thisModule st)
-getTopStrings :: BcM (IdEnv (RemotePtr ()))
-getTopStrings = BcM $ \st -> return (st, topStrings st)
-
tickFS :: FastString
tickFS = fsLit "ticked"
=====================================
testsuite/tests/bytecode/T22376/A.hs
=====================================
@@ -0,0 +1,6 @@
+module A where
+import B
+
+foo :: String
+foo = f "bc"
+{-# NOINLINE foo #-}
=====================================
testsuite/tests/bytecode/T22376/B.hs
=====================================
@@ -0,0 +1,4 @@
+module B where
+
+f :: String -> String
+f = ("a" ++)
=====================================
testsuite/tests/bytecode/T22376/T22376.hs
=====================================
@@ -0,0 +1,6 @@
+{-# LANGUAGE TemplateHaskell #-}
+import Language.Haskell.TH.Syntax (lift)
+import A
+
+main :: IO ()
+main = putStrLn $(lift foo)
=====================================
testsuite/tests/bytecode/T22376/T22376.stdout
=====================================
@@ -0,0 +1 @@
+abc
=====================================
testsuite/tests/bytecode/T22376/all.T
=====================================
@@ -0,0 +1,2 @@
+test('T22376', [req_interp, extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run,
+ ['T22376', '-O1 -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code'])
=====================================
testsuite/tests/plugins/all.T
=====================================
@@ -294,26 +294,22 @@ test('T20803b',
test('test-echo-in-turn',
[extra_files(['echo-plugin/']),
- pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-turn TOP={top}'),
- when(opsys('linux') and not ghc_dynamic(), expect_broken(20706))],
+ pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-turn TOP={top}')],
makefile_test, [])
test('test-echo-in-line',
[extra_files(['echo-plugin/']),
- pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-line TOP={top}'),
- when(opsys('linux') and not ghc_dynamic(), expect_broken(20706))],
+ pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-line TOP={top}')],
makefile_test, [])
test('test-echo-in-turn-many-args',
[extra_files(['echo-plugin/']),
- pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-turn-many-args TOP={top}'),
- when(opsys('linux') and not ghc_dynamic(), expect_broken(20706))],
+ pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-turn-many-args TOP={top}')],
makefile_test, [])
test('test-echo-in-line-many-args',
[extra_files(['echo-plugin/']),
- pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-line-many-args TOP={top}'),
- when(opsys('linux') and not ghc_dynamic(), expect_broken(20706))],
+ pre_cmd('$MAKE -s --no-print-directory -C echo-plugin package.test-echo-in-line-many-args TOP={top}')],
makefile_test, [])
test('plugins-external',
=====================================
testsuite/tests/plugins/echo-plugin/Echo.hs
=====================================
@@ -5,6 +5,7 @@ import GHC.Tc.Plugin
import GHC.Tc.Utils.Monad
import qualified GHC.Tc.Utils.Monad as Utils
import GHC.Types.Unique.FM ( emptyUFM )
+import System.IO
plugin :: Plugin
plugin = mkPureOptTcPlugin optCallCount
@@ -27,6 +28,10 @@ optCallCount opts = Just $
n <- unsafeTcPluginTcM $ readMutVar c
let msg = if null opts then "" else mconcat opts
tcPluginIO . putStrLn $ "Echo TcPlugin " ++ msg ++ "#" ++ show n
+
+ -- TODO: Remove #20791
+ tcPluginIO $ hFlush stdout
+
unsafeTcPluginTcM $ writeMutVar c (n + 1)
return $ TcPluginOk [] []
=====================================
testsuite/tests/plugins/hooks-plugin/Hooks/Plugin.hs
=====================================
@@ -10,6 +10,7 @@ import GHC.Hs.Lit
import GHC.Driver.Hooks
import GHC.Tc.Utils.Monad
import GHC.Parser.Annotation
+import System.IO
plugin :: Plugin
plugin = defaultPlugin { driverPlugin = hooksP }
@@ -28,6 +29,10 @@ hooksP opts hsc_env = do
fakeRunMeta :: [CommandLineOption] -> MetaHook TcM
fakeRunMeta opts (MetaE r) _ = do
liftIO . putStrLn $ "Options = " ++ show opts
+
+ -- TODO: Remove #20791
+ liftIO $ hFlush stdout
+
pure $ r zero
where zero :: LHsExpr GhcPs
=====================================
testsuite/tests/simplCore/should_compile/T23012.hs
=====================================
@@ -0,0 +1,30 @@
+{-# LANGUAGE BangPatterns, FlexibleInstances, MultiParamTypeClasses #-}
+
+module T23012 where
+
+import Data.Kind (Type)
+
+class Vector v a where
+ nothing :: v a
+ just :: a -> v a
+
+data Proxy (a :: Type) = P
+
+instance Vector Proxy a where
+ nothing = P
+ just _ = P
+
+step :: Maybe a
+step = Nothing
+{-# INLINE[0] step #-}
+
+stream :: Vector v a => v a
+stream = case step of
+ Nothing -> nothing
+ Just !x -> just x
+{-# INLINE[1] stream #-}
+
+data Id a = MkId a
+
+f :: (Proxy (Id a), Proxy a)
+f = (stream, stream)
=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -472,5 +472,6 @@ test('T22715_2', normal, multimod_compile, ['T22715_2', '-v0 -O -fspecialise-agg
test('T22802', normal, compile, ['-O'])
test('T15205', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques'])
test('T22761', normal, multimod_compile, ['T22761', '-O2 -v0'])
+test('T23012', normal, compile, ['-O'])
test('RewriteHigherOrderPatterns', normal, compile, ['-O -ddump-rule-rewrites -dsuppress-all -dsuppress-uniques'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/194cac10a6dce7ee6f33bccba016303aaf1d5a6a...c583f0bd7de3e1c0d821e158a03a62e52b3125db
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/194cac10a6dce7ee6f33bccba016303aaf1d5a6a...c583f0bd7de3e1c0d821e158a03a62e52b3125db
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230221/f5c66ff4/attachment-0001.html>
More information about the ghc-commits
mailing list