[Git][ghc/ghc][wip/T22166] linker: Fix BFD import libraries

Tamar Christina (@Phyx) gitlab at gitlab.haskell.org
Sun Dec 4 17:26:08 UTC 2022



Tamar Christina pushed to branch wip/T22166 at Glasgow Haskell Compiler / GHC


Commits:
3703b7b5 by Tamar Christina at 2022-12-04T17:25:53+00:00
linker: Fix BFD import libraries

- - - - -


8 changed files:

- hadrian/src/Settings/Packages.hs
- rts/Linker.c
- rts/LinkerInternals.h
- rts/linker/PEi386.c
- testsuite/tests/ghci/linking/dyn/Makefile
- testsuite/tests/ghci/linking/dyn/all.T
- + testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
- + testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a


Changes:

=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -403,7 +403,7 @@ rtsPackageArgs = package rts ? do
         , builder HsCpp ? pure
           [ "-DTOP="             ++ show top ]
 
-        , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW"
+        , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW" ]
 
 -- Compile various performance-critical pieces *without* -fPIC -dynamic
 -- even when building a shared library.  If we don't do this, then the


=====================================
rts/Linker.c
=====================================
@@ -135,8 +135,7 @@ extern void iconv();
    This is to enable lazy loading of symbols. Eager loading is problematic
    as it means that all symbols must be available, even those which we will
    never use. This is especially painful on Windows, where the number of
-   libraries required to link things like mingwex (TODO: We no longer depend
-   on mingwex, so think of a different example here) grows to be quite high.
+   libraries required to link things like QT or WxWidgets grows to be quite high.
 
    We proceed through these stages as follows,
 
@@ -194,8 +193,7 @@ extern void iconv();
 
    1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
       then we don't need to load libB. This means the dependency chain for libraries
-      such as mingw32 and mingwex (TODO: We no longer depend on mingwex, so think of
-      a different example here) can be broken down.
+      such as ucrt can be broken down.
 
    2) The number of duplicate symbols, since now only symbols that are
       true duplicates will display the error.
@@ -228,7 +226,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
 static const char *
 symbolTypeString (SymType type)
 {
-    switch (type) {
+    switch (type & ~SYM_TYPE_DUP_DISCARD) {
         case SYM_TYPE_CODE: return "code";
         case SYM_TYPE_DATA: return "data";
         case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +275,19 @@ int ghciInsertSymbolTable(
       insertStrHashTable(table, key, pinfo);
       return 1;
    }
-   else if (pinfo->type != type)
+   else if (pinfo->type ^ type)
    {
-       debugBelch("Symbol type mismatch.\n");
-       debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
-                  key, obj_name, symbolTypeString(type));
-       debugBelch("      yet was defined by %" PATH_FMT " to be a %s symbol.\n",
-                  pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
-                  symbolTypeString(pinfo->type));
+       /* We were asked to discard the symbol on duplicates, do so quietly.  */
+       if (!(type & SYM_TYPE_DUP_DISCARD))
+       {
+         DebugBreak ();
+         debugBelch("Symbol type mismatch.\n");
+         debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+                    key, obj_name, symbolTypeString(type));
+         debugBelch("      yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+                    pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+                    symbolTypeString(pinfo->type));
+       }
        return 1;
    }
    else if (pinfo->strength == STRENGTH_STRONG)


=====================================
rts/LinkerInternals.h
=====================================
@@ -56,9 +56,12 @@ typedef struct _Section    Section;
 /* What kind of thing a symbol identifies. We need to know this to determine how
  * to process overflowing relocations. See Note [Processing overflowed relocations]. */
 typedef enum _SymType {
-    SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
-    SYM_TYPE_DATA, /* the symbol is data */
-    SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+    SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+    SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+    SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+    SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+                                      however if a duplicate is found with a mismatching
+                                      SymType then discard this one.  */
 } SymType;
 
 


=====================================
rts/linker/PEi386.c
=====================================
@@ -261,6 +261,54 @@
             .asciiz "libfoo_data"
 
 
+   Note [GHC Linking model and import libraries]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   The above describes how import libraries work for static linking.
+   Fundamentally this does not apply to dynamic linking as we do in GHC.
+   The issue is two-folds:
+
+   1. In the linking model above it is expected that the .idata sections be
+      materialized into PLTs during linking.  However in GHC we never create
+      PLTs,  but have out own mechanism for this which is the jump island
+      machinery.   This is required for efficiency.  For one materializing the
+      .idata sections would result in wasting pages.   We'd use one page for
+      every ~100 bytes.  This is extremely wasteful and also fragments the
+      memory.  Secondly the dynamic linker is lazy.  We only perform the final
+      loading if the symbol is used, however with an import library we can
+      discard the actual OC immediately after reading it.   This prevents us from
+      keeping ~1k in memory per symbol for no reason.
+
+   2. GHC itself does not observe symbol visibility correctly during NGC.   This
+      in itself isn't an academic exercise.  The issue stems from GHC using one
+      mechanism for providing two incompatible linking modes:
+      a)  The first mode is generating Haskell shared libraries which are
+           intended to be used by other Haskell code.   This requires us to
+           export the info, data and closures.   For this GHC just re-exports
+           all symbols.  But it doesn't correcly mark data/code.  Symbol
+           visibility is overwritten by telling the linker to export all
+           symbols.
+      b)  The second code is producing code that's supposed to be call-able
+          through a C insterface.   This in reality does not require the
+          export of closures and info tables.  But also does not require the
+          inclusion of the RTS inside the DLL.  Hover this is done today
+          because we don't properly have the RTS as a dynamic library.
+          i.e.  GHC does not only export symbols denoted by foreign export.
+          Also GHC should depend on an RTS library, but at the moment it
+          cannot because of TNTC is incompatible with dynamic linking.
+
+   These two issues mean that for GHC we need to take a different approach
+   to handling import libraries.  For normal C libraries we have proper
+   differentiation between CODE and DATA.   For GHC produced import libraries
+   we do not.   As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+   duplicate symbol is found, and we were going to discard it anyway, just do
+   so quitely.  This works because the RTS symbols themselves are provided by
+   the currently loaded RTS as built-in symbols.
+
+   Secondly we cannot rely on a text symbol being available.   As such we
+   should only depend on the symbols as defined in the .idata sections,
+   otherwise we would not be able to correctly link against GHC produced
+   import libraries.
+
    Note [Memory allocation]
    ~~~~~~~~~~~~~~~~~~~~~~~~
    The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1658,7 +1706,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
       if (   secNumber != IMAGE_SYM_UNDEFINED
           && secNumber > 0
           && section
-          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+          /* Skip all BFD import sections.  */
+          && section->kind != SECTIONKIND_IMPORT
+          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
          /* This symbol is global and defined, viz, exported */
          /* for IMAGE_SYMCLASS_EXTERNAL
                 && !IMAGE_SYM_UNDEFINED,
@@ -1691,12 +1742,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
           IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
       }
       else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
-          setImportSymbol(oc, sname);
+          /* Disassembly of section .idata$5:
+
+             0000000000000000 <__imp_Insert>:
+             ...
+                        0: IMAGE_REL_AMD64_ADDR32NB     .idata$6
+
+             The first two bytes contain the ordinal of the function
+             in the format of lowpart highpart. The two bytes combined
+             for the total range of 16 bits which is the function export limit
+             of DLLs.  See note [GHC Linking model and import libraries].  */
+          sname = (SymbolName*)section->start+2;
+          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+          addr = get_sym_name( getSymShortName (info, sym), oc);
+
+          IF_DEBUG(linker,
+                   debugBelch("addImportSymbol `%s' => `%s'\n",
+                              sname, (char*)addr));
+          /* We're going to free the any data associated with the import
+             library without copying the sections.  So we have to duplicate
+             the symbol name and values before the pointers become invalid.  */
+          sname = strdup (sname);
+          addr  = strdup (addr);
+          type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+          type |= SYM_TYPE_DUP_DISCARD;
+          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+                                     addr, false, type, oc)) {
+             releaseOcInfo (oc);
+             stgFree (oc->image);
+             oc->image = NULL;
+             return false;
+          }
+          setImportSymbol (oc, sname);
+
+          /* Don't process this oc any further. Just exit.  */
+          oc->n_symbols = 0;
+          oc->symbols   = NULL;
+          stgFree (oc->image);
+          oc->image = NULL;
+          releaseOcInfo (oc);
           // There is nothing that we need to resolve in this object since we
           // will never call the import stubs in its text section
           oc->status = OBJECT_DONT_RESOLVE;
-
-          IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+          return true;
       }
       else if (secNumber > 0
                && section


=====================================
testsuite/tests/ghci/linking/dyn/Makefile
=====================================
@@ -88,10 +88,6 @@ compile_libAB_dyn:
 
 .PHONY: compile_libAS_impl_gcc
 compile_libAS_impl_gcc:
-	rm -rf bin_impl_gcc
-	mkdir bin_impl_gcc
-	'$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
-	mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
 	echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
 
 .PHONY: compile_libAS_impl_msvc


=====================================
testsuite/tests/ghci/linking/dyn/all.T
=====================================
@@ -41,9 +41,9 @@ test('T10458',
       extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
      ghci_script, ['T10458.script'])
 
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
-                   expect_broken(18718),
-                   unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+                   unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+                   unless(arch('x86_64'), skip)],
      makefile_test, ['compile_libAS_impl_gcc'])
 
 test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),


=====================================
testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
=====================================
Binary files /dev/null and b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll differ


=====================================
testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
=====================================
Binary files /dev/null and b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a differ



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3703b7b56b9e48b0b65b4a3ba9c2f0006c406ef8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3703b7b56b9e48b0b65b4a3ba9c2f0006c406ef8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221204/4f907a4c/attachment-0001.html>


More information about the ghc-commits mailing list