[Git][ghc/ghc][wip/backports-9.8-2] rts: Make addDLL a wrapper around loadNativeObj

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Thu Oct 10 21:11:26 UTC 2024



Ben Gamari pushed to branch wip/backports-9.8-2 at Glasgow Haskell Compiler / GHC


Commits:
34601024 by Rodrigo Mesquita at 2024-10-10T17:09:48-04:00
rts: Make addDLL a wrapper around loadNativeObj

Rewrite the implementation of `addDLL` as a wrapper around the more
principled `loadNativeObj` rts linker function. The latter should be
preferred while the former is preserved for backwards compatibility.

`loadNativeObj` was previously only available on ELF platforms, so this
commit further refactors the rts linker to transform loadNativeObj_ELF
into loadNativeObj_POSIX, which is available in ELF and MachO platforms.

The refactor made it possible to remove the `dl_mutex` mutex in favour
of always using `linker_mutex` (rather than a combination of both).

Lastly, we implement `loadNativeObj` for Windows too.

(cherry picked from commit dcfaa190e1e1182a2efe4e2f601affbb832a49bb)

- - - - -


13 changed files:

- libraries/ghci/GHCi/ObjLink.hs
- rts/Linker.c
- rts/LinkerInternals.h
- rts/RtsSymbols.c
- rts/include/rts/Linker.h
- rts/linker/Elf.c
- rts/linker/Elf.h
- + rts/linker/LoadNativeObjPosix.c
- + rts/linker/LoadNativeObjPosix.h
- rts/linker/PEi386.c
- rts/linker/PEi386.h
- rts/rts.cabal.in
- testsuite/tests/ghci/linking/dyn/T3372.hs


Changes:

=====================================
libraries/ghci/GHCi/ObjLink.hs
=====================================
@@ -74,7 +74,7 @@ lookupSymbolInDLL :: Ptr LoadedDLL -> String -> IO (Maybe (Ptr a))
 lookupSymbolInDLL dll str_in = do
    let str = prefixUnderscore str_in
    withCAString str $ \c_str -> do
-     addr <- c_lookupSymbolInDLL dll c_str
+     addr <- c_lookupSymbolInNativeObj dll c_str
      if addr == nullPtr
        then return Nothing
        else return (Just addr)
@@ -99,8 +99,6 @@ prefixUnderscore
 -- searches the standard locations for the appropriate library.
 --
 loadDLL :: String -> IO (Either String (Ptr LoadedDLL))
--- Nothing      => success
--- Just err_msg => failure
 loadDLL str0 = do
   let
      -- On Windows, addDLL takes a filename without an extension, because
@@ -112,7 +110,7 @@ loadDLL str0 = do
   --
   (maybe_handle, maybe_errmsg) <- withFilePath (normalise str) $ \dll ->
     alloca $ \errmsg_ptr -> (,)
-      <$> c_addDLL dll errmsg_ptr
+      <$> c_loadNativeObj dll errmsg_ptr
       <*> peek errmsg_ptr
 
   if maybe_handle == nullPtr
@@ -176,8 +174,8 @@ resolveObjs = do
 -- Foreign declarations to RTS entry points which does the real work;
 -- ---------------------------------------------------------------------------
 
-foreign import ccall unsafe "addDLL"                  c_addDLL                  :: CFilePath -> Ptr CString -> IO (Ptr LoadedDLL)
-foreign import ccall unsafe "lookupSymbolInDLL"       c_lookupSymbolInDLL       :: Ptr LoadedDLL -> CString -> IO (Ptr a)
+foreign import ccall unsafe "loadNativeObj"           c_loadNativeObj           :: CFilePath -> Ptr CString -> IO (Ptr LoadedDLL)
+foreign import ccall unsafe "lookupSymbolInNativeObj" c_lookupSymbolInNativeObj :: Ptr LoadedDLL -> CString -> IO (Ptr a)
 foreign import ccall unsafe "initLinker_"             c_initLinker_             :: CInt -> IO ()
 foreign import ccall unsafe "lookupSymbol"            c_lookupSymbol            :: CString -> IO (Ptr a)
 foreign import ccall unsafe "loadArchive"             c_loadArchive             :: CFilePath -> IO Int


=====================================
rts/Linker.c
=====================================
@@ -77,10 +77,16 @@
 #  include <mach-o/fat.h>
 #endif
 
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+#  include "linker/LoadNativeObjPosix.h"
+#endif
+
 #if defined(dragonfly_HOST_OS)
 #include <sys/tls.h>
 #endif
 
+#define UNUSED(x) (void)(x)
+
 /*
  * Note [iconv and FreeBSD]
  * ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -130,7 +136,7 @@ extern void iconv();
    - Indexing (e.g. ocVerifyImage and ocGetNames)
    - Initialization (e.g. ocResolve)
    - RunInit (e.g. ocRunInit)
-   - Lookup (e.g. lookupSymbol)
+   - Lookup (e.g. lookupSymbol/lookupSymbolInNativeObj)
 
    This is to enable lazy loading of symbols. Eager loading is problematic
    as it means that all symbols must be available, even those which we will
@@ -417,11 +423,8 @@ static int linker_init_done = 0 ;
 
 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
 static void *dl_prog_handle;
-static regex_t re_invalid;
-static regex_t re_realso;
-#if defined(THREADED_RTS)
-Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
-#endif
+regex_t re_invalid;
+regex_t re_realso;
 #endif
 
 void initLinker (void)
@@ -455,9 +458,6 @@ initLinker_ (int retain_cafs)
 
 #if defined(THREADED_RTS)
     initMutex(&linker_mutex);
-#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
-    initMutex(&dl_mutex);
-#endif
 #endif
 
     symhash = allocStrHashTable();
@@ -520,9 +520,6 @@ exitLinker( void ) {
    if (linker_init_done == 1) {
       regfree(&re_invalid);
       regfree(&re_realso);
-#if defined(THREADED_RTS)
-      closeMutex(&dl_mutex);
-#endif
    }
 #endif
    if (linker_init_done == 1) {
@@ -556,87 +553,6 @@ exitLinker( void ) {
 
 #  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
 
-/* Suppose in ghci we load a temporary SO for a module containing
-       f = 1
-   and then modify the module, recompile, and load another temporary
-   SO with
-       f = 2
-   Then as we don't unload the first SO, dlsym will find the
-       f = 1
-   symbol whereas we want the
-       f = 2
-   symbol. We therefore need to keep our own SO handle list, and
-   try SOs in the right order. */
-
-typedef
-   struct _OpenedSO {
-      struct _OpenedSO* next;
-      void *handle;
-   }
-   OpenedSO;
-
-/* A list thereof. */
-static OpenedSO* openedSOs = NULL;
-
-static void *
-internal_dlopen(const char *dll_name, const char **errmsg_ptr)
-{
-   OpenedSO* o_so;
-   void *hdl;
-
-   // omitted: RTLD_NOW
-   // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
-   IF_DEBUG(linker,
-      debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
-
-   //-------------- Begin critical section ------------------
-   // This critical section is necessary because dlerror() is not
-   // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
-   // Also, the error message returned must be copied to preserve it
-   // (see POSIX also)
-
-   ACQUIRE_LOCK(&dl_mutex);
-
-   // When dlopen() loads a profiled dynamic library, it calls the
-   // ctors which will call registerCcsList() to append the defined
-   // CostCentreStacks to CCS_LIST. This execution path starting from
-   // addDLL() was only protected by dl_mutex previously. However,
-   // another thread may be doing other things with the RTS linker
-   // that transitively calls refreshProfilingCCSs() which also
-   // accesses CCS_LIST, and those execution paths are protected by
-   // linker_mutex. So there's a risk of data race that may lead to
-   // segfaults (#24423), and we need to ensure the ctors are also
-   // protected by ccs_mutex.
-#if defined(PROFILING)
-   ACQUIRE_LOCK(&ccs_mutex);
-#endif
-
-   hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
-
-#if defined(PROFILING)
-   RELEASE_LOCK(&ccs_mutex);
-#endif
-
-   if (hdl == NULL) {
-      /* dlopen failed; return a ptr to the error msg. */
-      char *errmsg = dlerror();
-      if (errmsg == NULL) errmsg = "addDLL: unknown error";
-      char *errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
-      strcpy(errmsg_copy, errmsg);
-      *errmsg_ptr = errmsg_copy;
-   } else {
-      o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
-      o_so->handle = hdl;
-      o_so->next   = openedSOs;
-      openedSOs    = o_so;
-   }
-
-   RELEASE_LOCK(&dl_mutex);
-   //--------------- End critical section -------------------
-
-   return hdl;
-}
-
 /*
   Note [RTLD_LOCAL]
   ~~~~~~~~~~~~~~~~~
@@ -657,11 +573,10 @@ internal_dlopen(const char *dll_name, const char **errmsg_ptr)
 
 static void *
 internal_dlsym(const char *symbol) {
-    OpenedSO* o_so;
     void *v;
 
-    // We acquire dl_mutex as concurrent dl* calls may alter dlerror
-    ACQUIRE_LOCK(&dl_mutex);
+    // concurrent dl* calls may alter dlerror
+    ASSERT_LOCK_HELD(&linker_mutex);
 
     // clears dlerror
     dlerror();
@@ -669,20 +584,19 @@ internal_dlsym(const char *symbol) {
     // look in program first
     v = dlsym(dl_prog_handle, symbol);
     if (dlerror() == NULL) {
-        RELEASE_LOCK(&dl_mutex);
         IF_DEBUG(linker, debugBelch("internal_dlsym: found symbol '%s' in program\n", symbol));
         return v;
     }
 
-    for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
-        v = dlsym(o_so->handle, symbol);
-        if (dlerror() == NULL) {
+    for (ObjectCode *nc = loaded_objects; nc; nc = nc->next_loaded_object) {
+        if (nc->type == DYNAMIC_OBJECT) {
+          v = dlsym(nc->dlopen_handle, symbol);
+          if (dlerror() == NULL) {
             IF_DEBUG(linker, debugBelch("internal_dlsym: found symbol '%s' in shared object\n", symbol));
-            RELEASE_LOCK(&dl_mutex);
             return v;
+          }
         }
     }
-    RELEASE_LOCK(&dl_mutex);
 
     IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in special cases\n", symbol));
 #   define SPECIAL_SYMBOL(sym) \
@@ -722,98 +636,42 @@ internal_dlsym(const char *symbol) {
     // we failed to find the symbol
     return NULL;
 }
+#  endif
 
-void *lookupSymbolInDLL(void *handle, const char *symbol_name)
+void *lookupSymbolInNativeObj(void *handle, const char *symbol_name)
 {
+    ACQUIRE_LOCK(&linker_mutex);
+
 #if defined(OBJFORMAT_MACHO)
+    // The Mach-O standard says ccall symbols representing a function are prefixed with _
+    // https://math-atlas.sourceforge.net/devel/assembly/MachORuntime.pdf
     CHECK(symbol_name[0] == '_');
     symbol_name = symbol_name+1;
 #endif
-
-    ACQUIRE_LOCK(&dl_mutex); // dlsym alters dlerror
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
     void *result = dlsym(handle, symbol_name);
-    RELEASE_LOCK(&dl_mutex);
+#elif defined(OBJFORMAT_PEi386)
+    void *result = lookupSymbolInDLL_PEi386(symbol_name, handle, NULL, NULL);
+#else
+    void* result;
+    UNUSED(handle);
+    UNUSED(symbol_name);
+    barf("lookupSymbolInNativeObj: Unsupported platform");
+#endif
+
+    RELEASE_LOCK(&linker_mutex);
     return result;
 }
-#  endif
 
-void *addDLL(pathchar* dll_name, const char **errmsg_ptr)
+const char *addDLL(pathchar* dll_name)
 {
-#  if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
-   /* ------------------- ELF DLL loader ------------------- */
-
-#define NMATCH 5
-   regmatch_t match[NMATCH];
-   void *handle;
-   const char *errmsg;
-   FILE* fp;
-   size_t match_length;
-#define MAXLINE 1000
-   char line[MAXLINE];
-   int result;
-
-   IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
-   handle = internal_dlopen(dll_name, &errmsg);
-
-   if (handle != NULL) {
-      return handle;
-   }
-
-   // GHC #2615
-   // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
-   // contain linker scripts rather than ELF-format object code. This
-   // code handles the situation by recognizing the real object code
-   // file name given in the linker script.
-   //
-   // If an "invalid ELF header" error occurs, it is assumed that the
-   // .so file contains a linker script instead of ELF object code.
-   // In this case, the code looks for the GROUP ( ... ) linker
-   // directive. If one is found, the first file name inside the
-   // parentheses is treated as the name of a dynamic library and the
-   // code attempts to dlopen that file. If this is also unsuccessful,
-   // an error message is returned.
-
-   // see if the error message is due to an invalid ELF header
-   IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
-   result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
-   IF_DEBUG(linker, debugBelch("result = %i\n", result));
-   if (result == 0) {
-      // success -- try to read the named file as a linker script
-      match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
-                                 MAXLINE-1);
-      strncpy(line, (errmsg+(match[1].rm_so)),match_length);
-      line[match_length] = '\0'; // make sure string is null-terminated
-      IF_DEBUG(linker, debugBelch("file name = '%s'\n", line));
-      if ((fp = __rts_fopen(line, "r")) == NULL) {
-         *errmsg_ptr = errmsg; // return original error if open fails
-         return NULL;
-      }
-      // try to find a GROUP or INPUT ( ... ) command
-      while (fgets(line, MAXLINE, fp) != NULL) {
-         IF_DEBUG(linker, debugBelch("input line = %s", line));
-         if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
-            // success -- try to dlopen the first named file
-            IF_DEBUG(linker, debugBelch("match%s\n",""));
-            line[match[2].rm_eo] = '\0';
-            stgFree((void*)errmsg); // Free old message before creating new one
-            handle = internal_dlopen(line+match[2].rm_so, errmsg_ptr);
-            break;
-         }
-         // if control reaches here, no GROUP or INPUT ( ... ) directive
-         // was found and the original error message is returned to the
-         // caller
-      }
-      fclose(fp);
+   char *errmsg;
+   if (loadNativeObj(dll_name, &errmsg)) {
+     return NULL;
+   } else {
+     ASSERT(errmsg != NULL);
+     return errmsg;
    }
-   return handle;
-
-#  elif defined(OBJFORMAT_PEi386)
-   // FIXME
-   return addDLL_PEi386(dll_name, NULL);
-
-#  else
-   barf("addDLL: not implemented on this platform");
-#  endif
 }
 
 /* -----------------------------------------------------------------------------
@@ -1246,10 +1104,10 @@ void freeObjectCode (ObjectCode *oc)
     }
 
     if (oc->type == DYNAMIC_OBJECT) {
-#if defined(OBJFORMAT_ELF)
-        ACQUIRE_LOCK(&dl_mutex);
-        freeNativeCode_ELF(oc);
-        RELEASE_LOCK(&dl_mutex);
+#if defined(OBJFORMAT_ELF) || defined(darwin_HOST_OS)
+        ACQUIRE_LOCK(&linker_mutex);
+        freeNativeCode_POSIX(oc);
+        RELEASE_LOCK(&linker_mutex);
 #else
         barf("freeObjectCode: This shouldn't happen");
 #endif
@@ -1913,12 +1771,20 @@ HsInt purgeObj (pathchar *path)
     return r;
 }
 
+ObjectCode *lookupObjectByPath(pathchar *path) {
+  for (ObjectCode *o = objects; o; o = o->next) {
+     if (0 == pathcmp(o->fileName, path)) {
+         return o;
+     }
+  }
+  return NULL;
+}
+
 OStatus getObjectLoadStatus_ (pathchar *path)
 {
-    for (ObjectCode *o = objects; o; o = o->next) {
-       if (0 == pathcmp(o->fileName, path)) {
-           return o->status;
-       }
+    ObjectCode *oc = lookupObjectByPath(path);
+    if (oc) {
+      return oc->status;
     }
     return OBJECT_NOT_LOADED;
 }
@@ -2003,27 +1869,35 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc,
                        size, kind ));
 }
 
-#define UNUSED(x) (void)(x)
-
-#if defined(OBJFORMAT_ELF)
 void * loadNativeObj (pathchar *path, char **errmsg)
 {
+   IF_DEBUG(linker, debugBelch("loadNativeObj: path = '%" PATH_FMT "'\n", path));
    ACQUIRE_LOCK(&linker_mutex);
-   void *r = loadNativeObj_ELF(path, errmsg);
-   RELEASE_LOCK(&linker_mutex);
-   return r;
-}
+
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+   void *r = loadNativeObj_POSIX(path, errmsg);
+#elif defined(OBJFORMAT_PEi386)
+   void *r = NULL;
+   *errmsg = (char*)addDLL_PEi386(path, (HINSTANCE*)&r);
 #else
-void * STG_NORETURN
-loadNativeObj (pathchar *path, char **errmsg)
-{
-   UNUSED(path);
+   void *r;
    UNUSED(errmsg);
    barf("loadNativeObj: not implemented on this platform");
-}
 #endif
 
-HsInt unloadNativeObj (void *handle)
+#if defined(OBJFORMAT_ELF)
+   if (!r) {
+       // Check if native object may be a linker script and try loading a native
+       // object from it
+       r = loadNativeObjFromLinkerScript_ELF(errmsg);
+   }
+#endif
+
+   RELEASE_LOCK(&linker_mutex);
+   return r;
+}
+
+static HsInt unloadNativeObj_(void *handle)
 {
     bool unloadedAnyObj = false;
 
@@ -2056,11 +1930,18 @@ HsInt unloadNativeObj (void *handle)
     if (unloadedAnyObj) {
         return 1;
     } else {
-        errorBelch("unloadObjNativeObj_ELF: can't find `%p' to unload", handle);
+        errorBelch("unloadObjNativeObj_: can't find `%p' to unload", handle);
         return 0;
     }
 }
 
+HsInt unloadNativeObj(void *handle) {
+  ACQUIRE_LOCK(&linker_mutex);
+  HsInt r = unloadNativeObj_(handle);
+  RELEASE_LOCK(&linker_mutex);
+  return r;
+}
+
 /* -----------------------------------------------------------------------------
  * Segment management
  */


=====================================
rts/LinkerInternals.h
=====================================
@@ -412,10 +412,6 @@ extern Elf_Word shndx_table_uninit_label;
 
 #if defined(THREADED_RTS)
 extern Mutex linker_mutex;
-
-#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
-extern Mutex dl_mutex;
-#endif
 #endif /* THREADED_RTS */
 
 /* Type of an initializer */
@@ -515,9 +511,9 @@ HsInt loadArchive_ (pathchar *path);
 #define USE_CONTIGUOUS_MMAP 0
 #endif
 
-
 HsInt isAlreadyLoaded( pathchar *path );
 OStatus getObjectLoadStatus_ (pathchar *path);
+ObjectCode *lookupObjectByPath(pathchar *path);
 HsInt loadOc( ObjectCode* oc );
 ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize,
                   bool mapped, pathchar *archiveMemberName,


=====================================
rts/RtsSymbols.c
=====================================
@@ -508,6 +508,7 @@ extern char **environ;
       SymI_HasDataProto(stg_block_putmvar)                                  \
       MAIN_CAP_SYM                                                      \
       SymI_HasProto(addDLL)                                             \
+      SymI_HasProto(loadNativeObj)                                      \
       SymI_HasProto(addLibrarySearchPath)                               \
       SymI_HasProto(removeLibrarySearchPath)                            \
       SymI_HasProto(findSystemLibrary)                                  \
@@ -618,7 +619,7 @@ extern char **environ;
       SymI_HasProto(purgeObj)                                           \
       SymI_HasProto(insertSymbol)                                       \
       SymI_HasProto(lookupSymbol)                                       \
-      SymI_HasProto(lookupSymbolInDLL)                                  \
+      SymI_HasProto(lookupSymbolInNativeObj)                            \
       SymI_HasDataProto(stg_makeStablePtrzh)                                \
       SymI_HasDataProto(stg_mkApUpd0zh)                                     \
       SymI_HasDataProto(stg_labelThreadzh)                                  \


=====================================
rts/include/rts/Linker.h
=====================================
@@ -90,10 +90,10 @@ void *loadNativeObj( pathchar *path, char **errmsg );
    Takes the handle returned from loadNativeObj() as an argument. */
 HsInt unloadNativeObj( void *handle );
 
-/* load a dynamic library */
-void *addDLL(pathchar* dll_name, const char **errmsg);
+void *lookupSymbolInNativeObj(void *handle, const char *symbol_name);
 
-void *lookupSymbolInDLL(void *handle, const char *symbol_name);
+/* load a dynamic library */
+const char *addDLL(pathchar* dll_name);
 
 /* add a path to the library search path */
 HsPtr addLibrarySearchPath(pathchar* dll_path);


=====================================
rts/linker/Elf.c
=====================================
@@ -27,11 +27,15 @@
 #include "sm/OSMem.h"
 #include "linker/util.h"
 #include "linker/elf_util.h"
+#include "linker/LoadNativeObjPosix.h"
 
+#include <fs_rts.h>
 #include <link.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
+#include <regex.h>    // regex is already used by dlopen() so this is OK
+                      // to use here without requiring an additional lib
 #if defined(HAVE_DLFCN_H)
 #include <dlfcn.h>
 #endif
@@ -2071,159 +2075,6 @@ int ocRunFini_ELF( ObjectCode *oc )
     return true;
 }
 
-/*
- * Shared object loading
- */
-
-#if defined(HAVE_DLINFO)
-struct piterate_cb_info {
-  ObjectCode *nc;
-  void *l_addr;   /* base virtual address of the loaded code */
-};
-
-static int loadNativeObjCb_(struct dl_phdr_info *info,
-    size_t _size STG_UNUSED, void *data) {
-  struct piterate_cb_info *s = (struct piterate_cb_info *) data;
-
-  // This logic mimicks _dl_addr_inside_object from glibc
-  // For reference:
-  // int
-  // internal_function
-  // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
-  // {
-  //   int n = l->l_phnum;
-  //   const ElfW(Addr) reladdr = addr - l->l_addr;
-  //
-  //   while (--n >= 0)
-  //     if (l->l_phdr[n].p_type == PT_LOAD
-  //         && reladdr - l->l_phdr[n].p_vaddr >= 0
-  //         && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz)
-  //       return 1;
-  //   return 0;
-  // }
-
-  if ((void*) info->dlpi_addr == s->l_addr) {
-    int n = info->dlpi_phnum;
-    while (--n >= 0) {
-      if (info->dlpi_phdr[n].p_type == PT_LOAD) {
-        NativeCodeRange* ncr =
-          stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_");
-        ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr);
-        ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz);
-
-        ncr->next = s->nc->nc_ranges;
-        s->nc->nc_ranges = ncr;
-      }
-    }
-  }
-  return 0;
-}
-#endif /* defined(HAVE_DLINFO) */
-
-static void copyErrmsg(char** errmsg_dest, char* errmsg) {
-  if (errmsg == NULL) errmsg = "loadNativeObj_ELF: unknown error";
-  *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_ELF");
-  strcpy(*errmsg_dest, errmsg);
-}
-
-// need dl_mutex
-void freeNativeCode_ELF (ObjectCode *nc) {
-  dlclose(nc->dlopen_handle);
-
-  NativeCodeRange *ncr = nc->nc_ranges;
-  while (ncr) {
-    NativeCodeRange* last_ncr = ncr;
-    ncr = ncr->next;
-    stgFree(last_ncr);
-  }
-}
-
-void * loadNativeObj_ELF (pathchar *path, char **errmsg)
-{
-   ObjectCode* nc;
-   void *hdl, *retval;
-
-   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF %" PATH_FMT "\n", path));
-
-   retval = NULL;
-   ACQUIRE_LOCK(&dl_mutex);
-
-   /* Loading the same object multiple times will lead to chaos
-    * as we will have two ObjectCodes but one underlying dlopen
-    * handle. Fail if this happens.
-    */
-   if (getObjectLoadStatus_(path) != OBJECT_NOT_LOADED) {
-     copyErrmsg(errmsg, "loadNativeObj_ELF: Already loaded");
-     goto dlopen_fail;
-   }
-
-   nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0);
-
-   foreignExportsLoadingObject(nc);
-   hdl = dlopen(path, RTLD_NOW|RTLD_LOCAL);
-   nc->dlopen_handle = hdl;
-   foreignExportsFinishedLoadingObject();
-   if (hdl == NULL) {
-     /* dlopen failed; save the message in errmsg */
-     copyErrmsg(errmsg, dlerror());
-     goto dlopen_fail;
-   }
-
-#if defined(HAVE_DLINFO)
-   struct link_map *map;
-   if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) {
-     /* dlinfo failed; save the message in errmsg */
-     copyErrmsg(errmsg, dlerror());
-     goto dlinfo_fail;
-   }
-
-   hdl = NULL; // pass handle ownership to nc
-
-   struct piterate_cb_info piterate_info = {
-     .nc = nc,
-     .l_addr = (void *) map->l_addr
-   };
-   dl_iterate_phdr(loadNativeObjCb_, &piterate_info);
-   if (!nc->nc_ranges) {
-     copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj");
-     goto dl_iterate_phdr_fail;
-   }
-   nc->unloadable = true;
-#else
-   nc->nc_ranges = NULL;
-   nc->unloadable = false;
-#endif /* defined (HAVE_DLINFO) */
-
-   insertOCSectionIndices(nc);
-
-   nc->next_loaded_object = loaded_objects;
-   loaded_objects = nc;
-
-   retval = nc->dlopen_handle;
-
-#if defined(PROFILING)
-  // collect any new cost centres that were defined in the loaded object.
-  refreshProfilingCCSs();
-#endif
-
-   goto success;
-
-dl_iterate_phdr_fail:
-   // already have dl_mutex
-   freeNativeCode_ELF(nc);
-dlinfo_fail:
-   if (hdl) dlclose(hdl);
-dlopen_fail:
-success:
-
-   RELEASE_LOCK(&dl_mutex);
-
-   IF_DEBUG(linker, debugBelch("loadNativeObj_ELF result=%p\n", retval));
-
-   return retval;
-}
-
-
 /*
  * PowerPC & X86_64 ELF specifics
  */
@@ -2273,4 +2124,80 @@ int ocAllocateExtras_ELF( ObjectCode *oc )
 
 #endif /* NEED_SYMBOL_EXTRAS */
 
+extern regex_t re_invalid;
+extern regex_t re_realso;
+
+// Try interpreting an object which couldn't be loaded as a linker script and
+// load the first object in the linker GROUP ( ... ) directive (see comment below).
+//
+// Receives the non-NULL error message outputted from an attempt to load an
+// object (eg `loadNativeObj_POSIX` ).
+//
+// Returns the handle to the loaded object first mentioned in the linker script.
+// If this process fails at any point, the function returns NULL and outputs a
+// new error message.
+void * loadNativeObjFromLinkerScript_ELF(char **errmsg)
+{
+   // GHC #2615
+   // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
+   // contain linker scripts rather than ELF-format object code. This
+   // code handles the situation by recognizing the real object code
+   // file name given in the linker script.
+   //
+   // If an "invalid ELF header" error occurs, it is assumed that the
+   // .so file contains a linker script instead of ELF object code.
+   // In this case, the code looks for the GROUP ( ... ) linker
+   // directive. If one is found, the first file name inside the
+   // parentheses is treated as the name of a dynamic library and the
+   // code attempts to dlopen that file. If this is also unsuccessful,
+   // an error message is returned.
+
+#define NMATCH 5
+   regmatch_t match[NMATCH];
+   FILE* fp;
+   size_t match_length;
+#define MAXLINE 1000
+   char line[MAXLINE];
+   int result;
+   void* r = NULL;
+
+   ASSERT_LOCK_HELD(&linker_mutex);
+
+   // see if the error message is due to an invalid ELF header
+   IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", *errmsg));
+   result = regexec(&re_invalid, *errmsg, (size_t) NMATCH, match, 0);
+   IF_DEBUG(linker, debugBelch("result = %i\n", result));
+   if (result == 0) {
+      // success -- try to read the named file as a linker script
+      match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
+                                 MAXLINE-1);
+      strncpy(line, (*errmsg+(match[1].rm_so)),match_length);
+      line[match_length] = '\0'; // make sure string is null-terminated
+      IF_DEBUG(linker, debugBelch("file name = '%s'\n", line));
+      if ((fp = __rts_fopen(line, "r")) == NULL) {
+         // return original error if open fails
+         return NULL;
+      }
+      // try to find a GROUP or INPUT ( ... ) command
+      while (fgets(line, MAXLINE, fp) != NULL) {
+         IF_DEBUG(linker, debugBelch("input line = %s", line));
+         if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
+            // success -- try to dlopen the first named file
+            IF_DEBUG(linker, debugBelch("match%s\n",""));
+            line[match[2].rm_eo] = '\0';
+            stgFree((void*)*errmsg); // Free old message before creating new one
+            r = loadNativeObj_POSIX(line+match[2].rm_so, errmsg);
+            break;
+         }
+         // if control reaches here, no GROUP or INPUT ( ... ) directive
+         // was found and the original error message is returned to the
+         // caller
+      }
+      fclose(fp);
+   }
+
+   return r;
+}
+
+
 #endif /* elf */


=====================================
rts/linker/Elf.h
=====================================
@@ -14,7 +14,6 @@ int ocResolve_ELF        ( ObjectCode* oc );
 int ocRunInit_ELF        ( ObjectCode* oc );
 int ocRunFini_ELF        ( ObjectCode* oc );
 int ocAllocateExtras_ELF ( ObjectCode *oc );
-void freeNativeCode_ELF  ( ObjectCode *nc );
-void *loadNativeObj_ELF  ( pathchar *path, char **errmsg );
+void *loadNativeObjFromLinkerScript_ELF( char **errmsg );
 
 #include "EndPrivate.h"


=====================================
rts/linker/LoadNativeObjPosix.c
=====================================
@@ -0,0 +1,214 @@
+#include "LinkerInternals.h"
+#include "Rts.h"
+
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+
+#include "CheckUnload.h"
+#include "ForeignExports.h"
+#include "RtsUtils.h"
+#include "Profiling.h"
+
+#include "linker/LoadNativeObjPosix.h"
+
+#if defined(HAVE_DLFCN_H)
+#include <dlfcn.h>
+#endif
+
+#if defined(HAVE_DLINFO)
+#include <link.h>
+#endif
+
+#include <string.h>
+
+/*
+ * Shared object loading
+ */
+
+#if defined(HAVE_DLINFO)
+struct piterate_cb_info {
+  ObjectCode *nc;
+  void *l_addr;   /* base virtual address of the loaded code */
+};
+
+static int loadNativeObjCb_(struct dl_phdr_info *info,
+    size_t _size STG_UNUSED, void *data) {
+  struct piterate_cb_info *s = (struct piterate_cb_info *) data;
+
+  // This logic mimicks _dl_addr_inside_object from glibc
+  // For reference:
+  // int
+  // internal_function
+  // _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
+  // {
+  //   int n = l->l_phnum;
+  //   const ElfW(Addr) reladdr = addr - l->l_addr;
+  //
+  //   while (--n >= 0)
+  //     if (l->l_phdr[n].p_type == PT_LOAD
+  //         && reladdr - l->l_phdr[n].p_vaddr >= 0
+  //         && reladdr - l->l_phdr[n].p_vaddr < l->l_phdr[n].p_memsz)
+  //       return 1;
+  //   return 0;
+  // }
+
+  if ((void*) info->dlpi_addr == s->l_addr) {
+    int n = info->dlpi_phnum;
+    while (--n >= 0) {
+      if (info->dlpi_phdr[n].p_type == PT_LOAD) {
+        NativeCodeRange* ncr =
+          stgMallocBytes(sizeof(NativeCodeRange), "loadNativeObjCb_");
+        ncr->start = (void*) ((char*) s->l_addr + info->dlpi_phdr[n].p_vaddr);
+        ncr->end = (void*) ((char*) ncr->start + info->dlpi_phdr[n].p_memsz);
+
+        ncr->next = s->nc->nc_ranges;
+        s->nc->nc_ranges = ncr;
+      }
+    }
+  }
+  return 0;
+}
+#endif /* defined(HAVE_DLINFO) */
+
+static void copyErrmsg(char** errmsg_dest, char* errmsg) {
+  if (errmsg == NULL) errmsg = "loadNativeObj_POSIX: unknown error";
+  *errmsg_dest = stgMallocBytes(strlen(errmsg)+1, "loadNativeObj_POSIX");
+  strcpy(*errmsg_dest, errmsg);
+}
+
+void freeNativeCode_POSIX (ObjectCode *nc) {
+  ASSERT_LOCK_HELD(&linker_mutex);
+
+  dlclose(nc->dlopen_handle);
+
+  NativeCodeRange *ncr = nc->nc_ranges;
+  while (ncr) {
+    NativeCodeRange* last_ncr = ncr;
+    ncr = ncr->next;
+    stgFree(last_ncr);
+  }
+}
+
+void * loadNativeObj_POSIX (pathchar *path, char **errmsg)
+{
+   ObjectCode* nc;
+   void *hdl, *retval;
+
+   ASSERT_LOCK_HELD(&linker_mutex);
+
+   IF_DEBUG(linker, debugBelch("loadNativeObj_POSIX %" PATH_FMT "\n", path));
+
+   retval = NULL;
+
+
+   /* If we load the same object multiple times, just return the
+    * already-loaded handle. Note that this is broken if unloadNativeObj
+    * is used, as we don’t do any reference counting; see #24345.
+    */
+   ObjectCode *existing_oc = lookupObjectByPath(path);
+   if (existing_oc && existing_oc->status != OBJECT_UNLOADED) {
+     if (existing_oc->type == DYNAMIC_OBJECT) {
+       retval = existing_oc->dlopen_handle;
+       goto success;
+     }
+     copyErrmsg(errmsg, "loadNativeObj_POSIX: already loaded as non-dynamic object");
+     goto dlopen_fail;
+   }
+
+   nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0);
+
+   foreignExportsLoadingObject(nc);
+
+   // When dlopen() loads a profiled dynamic library, it calls the ctors which
+   // will call registerCcsList() to append the defined CostCentreStacks to
+   // CCS_LIST. However, another thread may be doing other things with the RTS
+   // linker that transitively calls refreshProfilingCCSs() which also accesses
+   // CCS_LIST. So there's a risk of data race that may lead to segfaults
+   // (#24423), and we need to ensure the ctors are also protected by
+   // ccs_mutex.
+#if defined(PROFILING)
+   ACQUIRE_LOCK(&ccs_mutex);
+#endif
+
+   // If we HAVE_DLINFO, we use RTLD_NOW rather than RTLD_LAZY because we want
+   // to learn eagerly about all external functions. Otherwise, there is no
+   // additional advantage to being eager, so it is better to be lazy and only bind
+   // functions when needed for better performance.
+   int dlopen_mode;
+#if defined(HAVE_DLINFO)
+   dlopen_mode = RTLD_NOW;
+#else
+   dlopen_mode = RTLD_LAZY;
+#endif
+
+   hdl = dlopen(path, dlopen_mode|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
+   nc->dlopen_handle = hdl;
+   nc->status = OBJECT_READY;
+
+#if defined(PROFILING)
+   RELEASE_LOCK(&ccs_mutex);
+#endif
+
+   foreignExportsFinishedLoadingObject();
+
+   if (hdl == NULL) {
+     /* dlopen failed; save the message in errmsg */
+     copyErrmsg(errmsg, dlerror());
+     goto dlopen_fail;
+   }
+
+#if defined(HAVE_DLINFO)
+   struct link_map *map;
+   if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) {
+     /* dlinfo failed; save the message in errmsg */
+     copyErrmsg(errmsg, dlerror());
+     goto dlinfo_fail;
+   }
+
+   hdl = NULL; // pass handle ownership to nc
+
+   struct piterate_cb_info piterate_info = {
+     .nc = nc,
+     .l_addr = (void *) map->l_addr
+   };
+   dl_iterate_phdr(loadNativeObjCb_, &piterate_info);
+   if (!nc->nc_ranges) {
+     copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj");
+     goto dl_iterate_phdr_fail;
+   }
+   nc->unloadable = true;
+#else
+   nc->nc_ranges = NULL;
+   nc->unloadable = false;
+#endif /* defined (HAVE_DLINFO) */
+
+   insertOCSectionIndices(nc);
+
+   nc->next_loaded_object = loaded_objects;
+   loaded_objects = nc;
+
+   retval = nc->dlopen_handle;
+
+#if defined(PROFILING)
+  // collect any new cost centres that were defined in the loaded object.
+  refreshProfilingCCSs();
+#endif
+
+   goto success;
+
+#if defined(HAVE_DLINFO)
+dl_iterate_phdr_fail:
+#endif
+   freeNativeCode_POSIX(nc);
+#if defined(HAVE_DLINFO)
+dlinfo_fail:
+#endif
+   if (hdl) dlclose(hdl);
+dlopen_fail:
+success:
+
+   IF_DEBUG(linker, debugBelch("loadNativeObj_POSIX result=%p\n", retval));
+
+   return retval;
+}
+
+#endif /* elf + macho */


=====================================
rts/linker/LoadNativeObjPosix.h
=====================================
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "Rts.h"
+#include "LinkerInternals.h"
+
+#include "BeginPrivate.h"
+
+void freeNativeCode_POSIX  ( ObjectCode *nc );
+void *loadNativeObj_POSIX  ( pathchar *path, char **errmsg );
+
+#include "EndPrivate.h"


=====================================
rts/linker/PEi386.c
=====================================
@@ -867,6 +867,7 @@ error:
     stgFree(buf);
 
     char* errormsg = stgMallocBytes(sizeof(char) * 80, "addDLL_PEi386");
+    if (loaded) *loaded = NULL;
     snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError());
     /* LoadLibrary failed; return a ptr to the error msg. */
     return errormsg;
@@ -1014,7 +1015,10 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
     stgFree(dllName);
 
     IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%" PATH_FMT "'\n", symbol, dll));
-    const char* result = addDLL(dll);
+    // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL`
+    // is now a wrapper around `loadNativeObj` which acquires a lock which we
+    // already have here.
+    const char* result = addDLL_PEi386(dll, NULL);
 
     stgFree(image);
 
@@ -1138,47 +1142,57 @@ SymbolAddr*
 lookupSymbolInDLLs ( const SymbolName* lbl, ObjectCode *dependent )
 {
     OpenedDLL* o_dll;
-    SymbolAddr* sym;
+    SymbolAddr* res;
 
-    for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
-        /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
+    for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next)
+        if ((res = lookupSymbolInDLL_PEi386(lbl, o_dll->instance, o_dll->name, dependent)))
+            return res;
+    return NULL;
+}
 
-        sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
-        if (sym != NULL) {
-            /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
-            return sym;
-        }
+SymbolAddr*
+lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* dll_name STG_UNUSED, ObjectCode *dependent)
+{
+    SymbolAddr* sym;
 
-        // TODO: Drop this
-        /* Ticket #2283.
-           Long description: http://support.microsoft.com/kb/132044
-           tl;dr:
-             If C/C++ compiler sees __declspec(dllimport) ... foo ...
-             it generates call *__imp_foo, and __imp_foo here has exactly
-             the same semantics as in __imp_foo = GetProcAddress(..., "foo")
-         */
-        if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
-            sym = GetProcAddress(o_dll->instance,
-                                 lbl + 6 + STRIP_LEADING_UNDERSCORE);
-            if (sym != NULL) {
-                SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
-                if (indirect == NULL) {
-                    barf("lookupSymbolInDLLs: Failed to allocation indirection");
-                }
-                *indirect = sym;
-                IF_DEBUG(linker,
-                  debugBelch("warning: %s from %S is linked instead of %s\n",
-                             lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
-                return (void*) indirect;
-               }
-        }
+    /* debugBelch("look in %ls for %s\n", dll_name, lbl); */
 
-        sym = GetProcAddress(o_dll->instance, lbl);
+    sym = GetProcAddress(instance, lbl+STRIP_LEADING_UNDERSCORE);
+    if (sym != NULL) {
+        /*debugBelch("found %s in %ls\n", lbl+STRIP_LEADING_UNDERSCORE,dll_name);*/
+        return sym;
+    }
+
+    // TODO: Drop this
+    /* Ticket #2283.
+       Long description: http://support.microsoft.com/kb/132044
+       tl;dr:
+         If C/C++ compiler sees __declspec(dllimport) ... foo ...
+         it generates call *__imp_foo, and __imp_foo here has exactly
+         the same semantics as in __imp_foo = GetProcAddress(..., "foo")
+     */
+    if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
+        sym = GetProcAddress(instance,
+                             lbl + 6 + STRIP_LEADING_UNDERSCORE);
         if (sym != NULL) {
-            /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
-            return sym;
+            SymbolAddr** indirect = m32_alloc(dependent->rw_m32, sizeof(SymbolAddr*), 8);
+            if (indirect == NULL) {
+                barf("lookupSymbolInDLLs: Failed to allocation indirection");
+            }
+            *indirect = sym;
+            IF_DEBUG(linker,
+              debugBelch("warning: %s from %S is linked instead of %s\n",
+                         lbl+6+STRIP_LEADING_UNDERSCORE, dll_name, lbl));
+            return (void*) indirect;
            }
     }
+
+    sym = GetProcAddress(instance, lbl);
+    if (sym != NULL) {
+        /*debugBelch("found %s in %s\n", lbl,dll_name);*/
+        return sym;
+       }
+
     return NULL;
 }
 


=====================================
rts/linker/PEi386.h
=====================================
@@ -60,6 +60,7 @@ bool ocRunFini_PEi386     ( ObjectCode *oc );
 bool ocGetNames_PEi386    ( ObjectCode* oc );
 bool ocVerifyImage_PEi386 ( ObjectCode* oc );
 SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl, ObjectCode *dependent, SymType *type);
+SymbolAddr *lookupSymbolInDLL_PEi386 (const SymbolName* lbl, HINSTANCE instance, pathchar* dll_name, ObjectCode *dependent);
 
 /* See Note [mingw-w64 name decoration scheme] */
 /* We use myindex to calculate array addresses, rather than


=====================================
rts/rts.cabal.in
=====================================
@@ -624,6 +624,7 @@ library
                  linker/Elf.c
                  linker/InitFini.c
                  linker/LoadArchive.c
+                 linker/LoadNativeObjPosix.c
                  linker/M32Alloc.c
                  linker/MMap.c
                  linker/MachO.c


=====================================
testsuite/tests/ghci/linking/dyn/T3372.hs
=====================================
@@ -1,3 +1,6 @@
+-- Note: This test exercises running concurrent GHCi sessions, but
+-- although this test is expected to pass, running concurrent GHCi
+-- sessions is currently broken in other ways; see #24345.
 {-# LANGUAGE MagicHash #-}
 
 module Main where



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34601024f6164efbf7dfd8ede7e5d820e55007fa

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/34601024f6164efbf7dfd8ede7e5d820e55007fa
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241010/480df10a/attachment-0001.html>


More information about the ghc-commits mailing list