[Git][ghc/ghc][wip/T22166] linker: Fix BFD import libraries

Tamar Christina (@Phyx) gitlab at gitlab.haskell.org
Sun Dec 4 15:59:03 UTC 2022



Tamar Christina pushed to branch wip/T22166 at Glasgow Haskell Compiler / GHC


Commits:
f296eae7 by Tamar Christina at 2022-12-04T15:55:43+00:00
linker: Fix BFD import libraries

- - - - -


5 changed files:

- rts/Linker.c
- rts/LinkerInternals.h
- rts/linker/PEi386.c
- testsuite/tests/ghci/linking/dyn/Makefile
- testsuite/tests/ghci/linking/dyn/all.T


Changes:

=====================================
rts/Linker.c
=====================================
@@ -228,7 +228,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
 static const char *
 symbolTypeString (SymType type)
 {
-    switch (type) {
+    switch (type & ~SYM_TYPE_DUP_DISCARD) {
         case SYM_TYPE_CODE: return "code";
         case SYM_TYPE_DATA: return "data";
         case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +277,19 @@ int ghciInsertSymbolTable(
       insertStrHashTable(table, key, pinfo);
       return 1;
    }
-   else if (pinfo->type != type)
+   else if (pinfo->type ^ type)
    {
-       debugBelch("Symbol type mismatch.\n");
-       debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
-                  key, obj_name, symbolTypeString(type));
-       debugBelch("      yet was defined by %" PATH_FMT " to be a %s symbol.\n",
-                  pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
-                  symbolTypeString(pinfo->type));
+       /* We were asked to discard the symbol on duplicates, do so quietly.  */
+       if (!(type & SYM_TYPE_DUP_DISCARD))
+       {
+         DebugBreak ();
+         debugBelch("Symbol type mismatch.\n");
+         debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+                    key, obj_name, symbolTypeString(type));
+         debugBelch("      yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+                    pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+                    symbolTypeString(pinfo->type));
+       }
        return 1;
    }
    else if (pinfo->strength == STRENGTH_STRONG)


=====================================
rts/LinkerInternals.h
=====================================
@@ -54,9 +54,12 @@ typedef struct _Section    Section;
 /* What kind of thing a symbol identifies. We need to know this to determine how
  * to process overflowing relocations. See Note [Processing overflowed relocations]. */
 typedef enum _SymType {
-    SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
-    SYM_TYPE_DATA, /* the symbol is data */
-    SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+    SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+    SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+    SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+    SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+                                      however if a duplicate is found with a mismatching
+                                      SymType then discard this one.  */
 } SymType;
 
 


=====================================
rts/linker/PEi386.c
=====================================
@@ -261,6 +261,54 @@
             .asciiz "libfoo_data"
 
 
+   Note [GHC Linking model and import libraries]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   The above describes how import libraries work for static linking.
+   Fundamentally this does not apply to Dynamic linknig as we do in GHC.
+   The issue is two folds:
+
+   1. In the linking model above it is expected that the .idata sections be
+      materialized into PLTs during linking.  However in GHC we never create
+      PLTs,  but have out own mechanism for this which is the jump island
+      machinery.   This is required for efficiency.  For one materializing the
+      idata sections would result in wasting pages.   We'd use one page for
+      every ~100 bytes.  This is extremely wasteful and also fragments the
+      memory.  Secondly the dynamic linker is lazy.  We only perform the final
+      loading if the symbol is used, however with an import library we can
+      discard the actual OC immediately after reading it.   This prevents us from
+      keeping ~1k in memory per symbol for no reason.
+
+   2. GHC itself does not observ symbol visibility correctly during NGC.   This
+      in itself isn't an academic exercise.  The issue stems from GHC using one
+      mechanism for providing two incompatible linking modes:
+      a)  The first mode is generating Haskell shared libraries which are
+           intended to be used by other Haskell code.   This requires us to
+           export the info, data and closures.   For this GHC just re-exports
+           all symbols.  But it doesn't correcly mark data/code.  Symbol
+           visibility is overwritten by telling the linker to export all
+           symbols.
+      b)  The second code is producing code that's supposed to be call-able
+          through a C insterface.   This in reality does not require the
+          export of closures and info tables.  But also does not require the
+          inclusion of the RTS inside the DLL.  Hover this is done today
+          because we don't properly have the RTS as a dynamic library.
+          i.e.  GHC does not only export symbols denoted by foreign export.
+          Also GHC should depend on an RTS library, but at the moment it
+          cannot because of TNTC is incompatible with dynamic linking.
+
+   These two issues mean that for GHC we need to take a different approach
+   to handling import libraries.  For normal C libraries we have proper
+   differentiation between CODE and DATA.   For GHC produced import libraries
+   we do not.   As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+   duplictae symbol is found, and we were going to discard it anyway, just do
+   so quitely.  This works because the RTS symbols themselves are provided by
+   the currently loaded RTS as built-in symbols.
+
+   Secondly we cannot rely on a text symbol being available.   As such we
+   should only depend on the symbols as definited in the .idata sections,
+   otherwise we would not be able to correctly link against GHC produced
+   import libraries.
+
    Note [Memory allocation]
    ~~~~~~~~~~~~~~~~~~~~~~~~
    The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1700,7 +1748,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
       if (   secNumber != IMAGE_SYM_UNDEFINED
           && secNumber > 0
           && section
-          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+          /* Skip all BFD import sections.  */
+          && section->kind != SECTIONKIND_IMPORT
+          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+          && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
          /* This symbol is global and defined, viz, exported */
          /* for IMAGE_SYMCLASS_EXTERNAL
                 && !IMAGE_SYM_UNDEFINED,
@@ -1733,12 +1784,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
           IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
       }
       else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
-          setImportSymbol(oc, sname);
+          /* Disassembly of section .idata$5:
+
+             0000000000000000 <__imp_Insert>:
+             ...
+                        0: IMAGE_REL_AMD64_ADDR32NB     .idata$6
+
+             The first two bytes contain the ordinal of the function
+             in the format of lowpart highpart. The two bytes combined
+             for the total range of 16 bits which is the function export limit
+             of DLLs.  See note [GHC Linking model and import libraries].  */
+          sname = (SymbolName*)section->start+2;
+          COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+          addr = get_sym_name( getSymShortName (info, sym), oc);
+
+          IF_DEBUG(linker,
+                   debugBelch("addImportSymbol `%s' => `%s'\n",
+                              sname, (char*)addr));
+          /* We're going to free the any data associated with the import
+             library without copying the sections.  So we have to duplicate
+             the symbol name and values before the pointers become invalid.  */
+          sname = strdup (sname);
+          addr  = strdup (addr);
+          type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+          type |= SYM_TYPE_DUP_DISCARD;
+          if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+                                     addr, false, type, oc)) {
+             releaseOcInfo (oc);
+             stgFree (oc->image);
+             oc->image = NULL;
+             return false;
+          }
+          setImportSymbol (oc, sname);
+
+          /* Don't process this oc any further. Just exit.  */
+          oc->n_symbols = 0;
+          oc->symbols   = NULL;
+          stgFree (oc->image);
+          oc->image = NULL;
+          releaseOcInfo (oc);
           // There is nothing that we need to resolve in this object since we
           // will never call the import stubs in its text section
           oc->status = OBJECT_DONT_RESOLVE;
-
-          IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+          return true;
       }
       else if (secNumber > 0
                && section


=====================================
testsuite/tests/ghci/linking/dyn/Makefile
=====================================
@@ -88,10 +88,6 @@ compile_libAB_dyn:
 
 .PHONY: compile_libAS_impl_gcc
 compile_libAS_impl_gcc:
-	rm -rf bin_impl_gcc
-	mkdir bin_impl_gcc
-	'$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
-	mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
 	echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
 
 .PHONY: compile_libAS_impl_msvc


=====================================
testsuite/tests/ghci/linking/dyn/all.T
=====================================
@@ -33,9 +33,9 @@ test('T10458',
       extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
      ghci_script, ['T10458.script'])
 
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
-                   expect_broken(18718),
-                   unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+                   unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+                   unless(arch('x86_64'), skip)],
      makefile_test, ['compile_libAS_impl_gcc'])
 
 test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f296eae73fbca3c53a510fec667e2732a009bc1f

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f296eae73fbca3c53a510fec667e2732a009bc1f
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221204/ac207856/attachment-0001.html>


More information about the ghc-commits mailing list