[Git][ghc/ghc][wip/romes/rts-linker-direct-symbol-lookup] 2 commits: fixup! rts: lookupSymbolInNativeObj in Windows
Rodrigo Mesquita (@alt-romes)
gitlab at gitlab.haskell.org
Tue Apr 2 13:24:26 UTC 2024
Rodrigo Mesquita pushed to branch wip/romes/rts-linker-direct-symbol-lookup at Glasgow Haskell Compiler / GHC
Commits:
b072d397 by Rodrigo Mesquita at 2024-04-02T14:24:07+01:00
fixup! rts: lookupSymbolInNativeObj in Windows
- - - - -
896de9ca by Rodrigo Mesquita at 2024-04-02T14:24:10+01:00
Use symbol cache in internal interpreter too
This commit makes the symbol cache that was used by the external
interpreter available for the internal interpreter too.
This follows from the analysis in #23415 that suggests the internal
interpreter could benefit from this cache too, and that there is no good
reason not to have the cache for it too. It also makes it a bit more
uniform to have the symbol cache range over both the internal and
external interpreter.
This commit also refactors the cache into a function which is used by
both `lookupSymbol` and also by `lookupSymbolInDLL`, extending the
caching logic to `lookupSymbolInDLL` too.
- - - - -
9 changed files:
- compiler/GHC.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Runtime/Interpreter.hs
- compiler/GHC/Runtime/Interpreter/JS.hs
- compiler/GHC/Runtime/Interpreter/Types.hs
- rts/Linker.c
- rts/linker/LoadNativeObjPosix.c
- rts/linker/PEi386.c
- rts/linker/PEi386.h
Changes:
=====================================
compiler/GHC.hs
=====================================
@@ -394,6 +394,7 @@ import GHC.Types.Name.Ppr
import GHC.Types.TypeEnv
import GHC.Types.BreakInfo
import GHC.Types.PkgQual
+import GHC.Types.Unique.FM
import GHC.Unit
import GHC.Unit.Env
@@ -673,6 +674,7 @@ setTopSessionDynFlags :: GhcMonad m => DynFlags -> m ()
setTopSessionDynFlags dflags = do
hsc_env <- getSession
logger <- getLogger
+ lookup_cache <- liftIO $ newMVar emptyUFM
-- Interpreter
interp <- if
@@ -702,7 +704,7 @@ setTopSessionDynFlags dflags = do
}
s <- liftIO $ newMVar InterpPending
loader <- liftIO Loader.uninitializedLoader
- return (Just (Interp (ExternalInterp (ExtIServ (ExtInterpState conf s))) loader))
+ return (Just (Interp (ExternalInterp (ExtIServ (ExtInterpState conf s))) loader lookup_cache))
-- JavaScript interpreter
| ArchJavaScript <- platformArch (targetPlatform dflags)
@@ -720,7 +722,7 @@ setTopSessionDynFlags dflags = do
, jsInterpFinderOpts = initFinderOpts dflags
, jsInterpFinderCache = hsc_FC hsc_env
}
- return (Just (Interp (ExternalInterp (ExtJS (ExtInterpState cfg s))) loader))
+ return (Just (Interp (ExternalInterp (ExtJS (ExtInterpState cfg s))) loader lookup_cache))
-- Internal interpreter
| otherwise
@@ -728,7 +730,7 @@ setTopSessionDynFlags dflags = do
#if defined(HAVE_INTERNAL_INTERPRETER)
do
loader <- liftIO Loader.uninitializedLoader
- return (Just (Interp InternalInterp loader))
+ return (Just (Interp InternalInterp loader lookup_cache))
#else
return Nothing
#endif
=====================================
compiler/GHC/Driver/Main.hs
=====================================
@@ -2665,7 +2665,7 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr = do
case interp of
-- always generate JS code for the JS interpreter (no bytecode!)
- Interp (ExternalInterp (ExtJS i)) _ ->
+ Interp (ExternalInterp (ExtJS i)) _ _ ->
jsCodeGen hsc_env srcspan i this_mod stg_binds_with_deps binding_id
_ -> do
=====================================
compiler/GHC/Runtime/Interpreter.hs
=====================================
@@ -152,22 +152,22 @@ The main pieces are:
- implementation of Template Haskell (GHCi.TH)
- a few other things needed to run interpreted code
-- top-level iserv directory, containing the codefor the external
- server. This is a fairly simple wrapper, most of the functionality
+- top-level iserv directory, containing the code for the external
+ server. This is a fairly simple wrapper, most of the functionality
is provided by modules in libraries/ghci.
- This module which provides the interface to the server used
by the rest of GHC.
-GHC works with and without -fexternal-interpreter. With the flag, all
-interpreted code is run by the iserv binary. Without the flag,
+GHC works with and without -fexternal-interpreter. With the flag, all
+interpreted code is run by the iserv binary. Without the flag,
interpreted code is run in the same process as GHC.
Things that do not work with -fexternal-interpreter
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
dynCompileExpr cannot work, because we have no way to run code of an
-unknown type in the remote process. This API fails with an error
+unknown type in the remote process. This API fails with an error
message if it is used with -fexternal-interpreter.
Other Notes on Remote GHCi
@@ -441,52 +441,71 @@ initObjLinker :: Interp -> IO ()
initObjLinker interp = interpCmd interp InitLinker
lookupSymbol :: Interp -> FastString -> IO (Maybe (Ptr ()))
-lookupSymbol interp str = case interpInstance interp of
+lookupSymbol interp str = withSymbolCache interp str $
+ case interpInstance interp of
#if defined(HAVE_INTERNAL_INTERPRETER)
- InternalInterp -> fmap fromRemotePtr <$> run (LookupSymbol (unpackFS str))
+ InternalInterp -> fmap fromRemotePtr <$> run (LookupSymbol (unpackFS str))
#endif
-
- ExternalInterp ext -> case ext of
- ExtIServ i -> withIServ i $ \inst -> do
- -- Profiling of GHCi showed a lot of time and allocation spent
- -- making cross-process LookupSymbol calls, so I added a GHC-side
- -- cache which sped things up quite a lot. We have to be careful
- -- to purge this cache when unloading code though.
- cache <- readMVar (instLookupSymbolCache inst)
- case lookupUFM cache str of
- Just p -> return (Just p)
- Nothing -> do
- m <- uninterruptibleMask_ $
- sendMessage inst (LookupSymbol (unpackFS str))
- case m of
- Nothing -> return Nothing
- Just r -> do
- let p = fromRemotePtr r
- cache' = addToUFM cache str p
- modifyMVar_ (instLookupSymbolCache inst) (const (pure cache'))
- return (Just p)
-
- ExtJS {} -> pprPanic "lookupSymbol not supported by the JS interpreter" (ppr str)
+ ExternalInterp ext -> case ext of
+ ExtIServ i -> withIServ i $ \inst -> fmap fromRemotePtr <$> do
+ uninterruptibleMask_ $
+ sendMessage inst (LookupSymbol (unpackFS str))
+ ExtJS {} -> pprPanic "lookupSymbol not supported by the JS interpreter" (ppr str)
lookupSymbolInDLL :: Interp -> RemotePtr LoadedDLL -> FastString -> IO (Maybe (Ptr ()))
-lookupSymbolInDLL interp dll str = case interpInstance interp of
+lookupSymbolInDLL interp dll str = withSymbolCache interp str $
+ case interpInstance interp of
#if defined(HAVE_INTERNAL_INTERPRETER)
- InternalInterp -> fmap fromRemotePtr <$> run (LookupSymbolInDLL dll (unpackFS str))
+ InternalInterp -> fmap fromRemotePtr <$> run (LookupSymbolInDLL dll (unpackFS str))
#endif
- ExternalInterp _ -> panic "lookupSymbolInDLL: not implemented for external interpreter" -- FIXME
+ ExternalInterp ext -> case ext of
+ ExtIServ i -> withIServ i $ \inst -> fmap fromRemotePtr <$> do
+ uninterruptibleMask_ $
+ sendMessage inst (LookupSymbolInDLL dll (unpackFS str))
+ ExtJS {} -> pprPanic "lookupSymbol not supported by the JS interpreter" (ppr str)
lookupClosure :: Interp -> String -> IO (Maybe HValueRef)
lookupClosure interp str =
interpCmd interp (LookupClosure str)
+-- | 'withSymbolCache' tries to find a symbol in the 'interpLookupSymbolCache'
+-- which maps symbols to the address where they are loaded.
+-- When there's a cache hit we simply return the cached address, when there is
+-- a miss we run the action which determines the symbol's address and populate
+-- the cache with the answer.
+withSymbolCache :: Interp
+ -> FastString
+ -- ^ The symbol we are looking up in the cache
+ -> IO (Maybe (Ptr ()))
+ -- ^ An action which determines the address of the symbol we
+ -- are looking up in the cache, which is run if there is a
+ -- cache miss. The result will be cached.
+ -> IO (Maybe (Ptr ()))
+withSymbolCache interp str determine_addr = do
+
+ -- Profiling of GHCi showed a lot of time and allocation spent
+ -- making cross-process LookupSymbol calls, so I added a GHC-side
+ -- cache which sped things up quite a lot. We have to be careful
+ -- to purge this cache when unloading code though.
+ --
+ -- The analysis in #23415 further showed this cache should also benefit the
+ -- internal interpreter's loading times, and needn't be used by the external
+ -- interpreter only.
+ cache <- readMVar (interpLookupSymbolCache interp)
+ case lookupUFM cache str of
+ Just p -> return (Just p)
+ Nothing -> do
+
+ maddr <- determine_addr
+ case maddr of
+ Nothing -> return Nothing
+ Just p -> do
+ let upd_cache cache' = addToUFM cache' str p
+ modifyMVar_ (interpLookupSymbolCache interp) (pure . upd_cache)
+ return (Just p)
+
purgeLookupSymbolCache :: Interp -> IO ()
-purgeLookupSymbolCache interp = case interpInstance interp of
-#if defined(HAVE_INTERNAL_INTERPRETER)
- InternalInterp -> pure ()
-#endif
- ExternalInterp ext -> withExtInterpMaybe ext $ \case
- Nothing -> pure () -- interpreter stopped, nothing to do
- Just inst -> modifyMVar_ (instLookupSymbolCache inst) (const (pure emptyUFM))
+purgeLookupSymbolCache interp = modifyMVar_ (interpLookupSymbolCache interp) (const (pure emptyUFM))
-- | loadDLL loads a dynamic library using the OS's native linker
-- (i.e. dlopen() on Unix, LoadLibrary() on Windows). It takes either
@@ -552,11 +571,9 @@ spawnIServ conf = do
}
pending_frees <- newMVar []
- lookup_cache <- newMVar emptyUFM
let inst = ExtInterpInstance
{ instProcess = process
, instPendingFrees = pending_frees
- , instLookupSymbolCache = lookup_cache
, instExtra = ()
}
pure inst
=====================================
compiler/GHC/Runtime/Interpreter/JS.hs
=====================================
@@ -41,7 +41,6 @@ import GHC.Utils.Panic
import GHC.Utils.Error (logInfo)
import GHC.Utils.Outputable (text)
import GHC.Data.FastString
-import GHC.Types.Unique.FM
import Control.Concurrent
import Control.Monad
@@ -178,11 +177,9 @@ spawnJSInterp cfg = do
}
pending_frees <- newMVar []
- lookup_cache <- newMVar emptyUFM
let inst = ExtInterpInstance
{ instProcess = proc
, instPendingFrees = pending_frees
- , instLookupSymbolCache = lookup_cache
, instExtra = extra
}
=====================================
compiler/GHC/Runtime/Interpreter/Types.hs
=====================================
@@ -51,6 +51,9 @@ data Interp = Interp
, interpLoader :: !Loader
-- ^ Interpreter loader
+
+ , interpLookupSymbolCache :: !(MVar (UniqFM FastString (Ptr ())))
+ -- ^ LookupSymbol cache
}
data InterpInstance
@@ -108,9 +111,6 @@ data ExtInterpInstance c = ExtInterpInstance
-- Finalizers for ForeignRefs can append values to this list
-- asynchronously.
- , instLookupSymbolCache :: !(MVar (UniqFM FastString (Ptr ())))
- -- ^ LookupSymbol cache
-
, instExtra :: !c
-- ^ Instance specific extra fields
}
=====================================
rts/Linker.c
=====================================
@@ -643,7 +643,13 @@ void *lookupSymbolInNativeObj(void *handle, const char *symbol_name)
CHECK(symbol_name[0] == '_');
symbol_name = symbol_name+1;
#endif
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
void *result = dlsym(handle, symbol_name);
+#elif defined(OBJFORMAT_PEi386)
+ void *result = lookupSymbolInDLL_PEi386(symbol_name, handle, NULL, NULL);
+#else
+ barf("lookupSymbolInNativeObj: Unsupported platform");
+#endif
RELEASE_LOCK(&linker_mutex);
return result;
@@ -652,7 +658,6 @@ void *lookupSymbolInNativeObj(void *handle, const char *symbol_name)
const char *addDLL(pathchar* dll_name)
{
-# if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
char *errmsg;
if (loadNativeObj(dll_name, &errmsg)) {
return NULL;
@@ -660,13 +665,6 @@ const char *addDLL(pathchar* dll_name)
ASSERT(errmsg != NULL);
return errmsg;
}
-
-# elif defined(OBJFORMAT_PEi386)
- return addDLL_PEi386(dll_name);
-
-# else
- barf("addDLL: not implemented on this platform");
-# endif
}
/* -----------------------------------------------------------------------------
@@ -1861,12 +1859,19 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc,
#define UNUSED(x) (void)(x)
-#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
void * loadNativeObj (pathchar *path, char **errmsg)
{
- IF_DEBUG(linker, debugBelch("loadNativeObj: path = '%s'\n", path));
+ IF_DEBUG(linker, debugBelch("loadNativeObj: path = '%" PATH_FMT "'\n", path));
ACQUIRE_LOCK(&linker_mutex);
+
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
void *r = loadNativeObj_POSIX(path, errmsg);
+#elif defined(OBJFORMAT_PEi386)
+ void *r;
+ *errmsg = (char*)addDLL_PEi386(path, (HINSTANCE*)&r);
+#else
+ barf("loadNativeObj: not implemented on this platform");
+#endif
#if defined(OBJFORMAT_ELF)
if (!r) {
@@ -1879,15 +1884,6 @@ void * loadNativeObj (pathchar *path, char **errmsg)
RELEASE_LOCK(&linker_mutex);
return r;
}
-#else
-void * STG_NORETURN
-loadNativeObj (pathchar *path, char **errmsg)
-{
- UNUSED(path);
- UNUSED(errmsg);
- barf("loadNativeObj: not implemented on this platform");
-}
-#endif
static HsInt unloadNativeObj_(void *handle)
{
=====================================
rts/linker/LoadNativeObjPosix.c
=====================================
@@ -1,7 +1,10 @@
-#include "CheckUnload.h"
-#include "ForeignExports.h"
#include "LinkerInternals.h"
#include "Rts.h"
+
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+
+#include "CheckUnload.h"
+#include "ForeignExports.h"
#include "RtsUtils.h"
#include "Profiling.h"
@@ -208,4 +211,4 @@ success:
return retval;
}
-
+#endif /* elf + macho */
=====================================
rts/linker/PEi386.c
=====================================
@@ -1141,47 +1141,55 @@ SymbolAddr*
lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent )
{
OpenedDLL* o_dll;
- SymbolAddr* sym;
- for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
- /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
+ for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next)
+ lookupSymbolInDLL_PEi386(lbl, o_dll->instance, o_dll->name, dependent);
+ return NULL;
+}
- sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
- if (sym != NULL) {
- /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
- return sym;
- }
+SymbolAddr*
+lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* dll_name, ObjectCode *dependent)
+{
+ SymbolAddr* sym;
- // TODO: Drop this
- /* Ticket #2283.
- Long description: http://support.microsoft.com/kb/132044
- tl;dr:
- If C/C++ compiler sees __declspec(dllimport) ... foo ...
- it generates call *__imp_foo, and __imp_foo here has exactly
- the same semantics as in __imp_foo = GetProcAddress(..., "foo")
- */
- if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
- sym = GetProcAddress(o_dll->instance,
- lbl + 6 + STRIP_LEADING_UNDERSCORE);
- if (sym != NULL) {
- SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
- if (indirect == NULL) {
- barf("lookupSymbolInDLLs: Failed to allocation indirection");
- }
- *indirect = sym;
- IF_DEBUG(linker,
- debugBelch("warning: %s from %S is linked instead of %s\n",
- lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
- return (void*) indirect;
- }
- }
+ /* debugBelch("look in %ls for %s\n", dll_name, lbl); */
- sym = GetProcAddress(o_dll->instance, lbl);
+ sym = GetProcAddress(instance, lbl+STRIP_LEADING_UNDERSCORE);
+ if (sym != NULL) {
+ /*debugBelch("found %s in %s\n", lbl+1,dll_name);*/
+ return sym;
+ }
+
+ // TODO: Drop this
+ /* Ticket #2283.
+ Long description: http://support.microsoft.com/kb/132044
+ tl;dr:
+ If C/C++ compiler sees __declspec(dllimport) ... foo ...
+ it generates call *__imp_foo, and __imp_foo here has exactly
+ the same semantics as in __imp_foo = GetProcAddress(..., "foo")
+ */
+ if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
+ sym = GetProcAddress(instance,
+ lbl + 6 + STRIP_LEADING_UNDERSCORE);
if (sym != NULL) {
- /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
- return sym;
+ SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
+ if (indirect == NULL) {
+ barf("lookupSymbolInDLLs: Failed to allocation indirection");
+ }
+ *indirect = sym;
+ IF_DEBUG(linker,
+ debugBelch("warning: %s from %S is linked instead of %s\n",
+ lbl+6+STRIP_LEADING_UNDERSCORE, dll_name, lbl));
+ return (void*) indirect;
}
}
+
+ sym = GetProcAddress(instance, lbl);
+ if (sym != NULL) {
+ /*debugBelch("found %s in %s\n", lbl,dll_name);*/
+ return sym;
+ }
+
return NULL;
}
=====================================
rts/linker/PEi386.h
=====================================
@@ -60,6 +60,7 @@ bool ocRunFini_PEi386 ( ObjectCode *oc );
bool ocGetNames_PEi386 ( ObjectCode* oc );
bool ocVerifyImage_PEi386 ( ObjectCode* oc );
SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type);
+SymbolAddr *lookupSymbolInDLL_PEi386 (const SymbolName* lbl, HINSTANCE instance, pathchar* dll_name, ObjectCode *dependent);
/* See Note [mingw-w64 name decoration scheme] */
/* We use myindex to calculate array addresses, rather than
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d445f1b32208c16908707bc8a930c4b856d53b26...896de9ca275c52bff530aafd343ceb246866d2f5
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d445f1b32208c16908707bc8a930c4b856d53b26...896de9ca275c52bff530aafd343ceb246866d2f5
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240402/b7397829/attachment-0001.html>
More information about the ghc-commits
mailing list