[Git][ghc/ghc][wip/T22166] linker: Fix BFD import libraries
Tamar Christina (@Phyx)
gitlab at gitlab.haskell.org
Sun Dec 4 15:59:03 UTC 2022
Tamar Christina pushed to branch wip/T22166 at Glasgow Haskell Compiler / GHC
Commits:
f296eae7 by Tamar Christina at 2022-12-04T15:55:43+00:00
linker: Fix BFD import libraries
- - - - -
5 changed files:
- rts/Linker.c
- rts/LinkerInternals.h
- rts/linker/PEi386.c
- testsuite/tests/ghci/linking/dyn/Makefile
- testsuite/tests/ghci/linking/dyn/all.T
Changes:
=====================================
rts/Linker.c
=====================================
@@ -228,7 +228,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
static const char *
symbolTypeString (SymType type)
{
- switch (type) {
+ switch (type & ~SYM_TYPE_DUP_DISCARD) {
case SYM_TYPE_CODE: return "code";
case SYM_TYPE_DATA: return "data";
case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +277,19 @@ int ghciInsertSymbolTable(
insertStrHashTable(table, key, pinfo);
return 1;
}
- else if (pinfo->type != type)
+ else if (pinfo->type ^ type)
{
- debugBelch("Symbol type mismatch.\n");
- debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
- key, obj_name, symbolTypeString(type));
- debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
- pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
- symbolTypeString(pinfo->type));
+ /* We were asked to discard the symbol on duplicates, do so quietly. */
+ if (!(type & SYM_TYPE_DUP_DISCARD))
+ {
+ DebugBreak ();
+ debugBelch("Symbol type mismatch.\n");
+ debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+ key, obj_name, symbolTypeString(type));
+ debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+ pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+ symbolTypeString(pinfo->type));
+ }
return 1;
}
else if (pinfo->strength == STRENGTH_STRONG)
=====================================
rts/LinkerInternals.h
=====================================
@@ -54,9 +54,12 @@ typedef struct _Section Section;
/* What kind of thing a symbol identifies. We need to know this to determine how
* to process overflowing relocations. See Note [Processing overflowed relocations]. */
typedef enum _SymType {
- SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
- SYM_TYPE_DATA, /* the symbol is data */
- SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+ however if a duplicate is found with a mismatching
+ SymType then discard this one. */
} SymType;
=====================================
rts/linker/PEi386.c
=====================================
@@ -261,6 +261,54 @@
.asciiz "libfoo_data"
+ Note [GHC Linking model and import libraries]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The above describes how import libraries work for static linking.
+ Fundamentally this does not apply to Dynamic linknig as we do in GHC.
+ The issue is two folds:
+
+ 1. In the linking model above it is expected that the .idata sections be
+ materialized into PLTs during linking. However in GHC we never create
+ PLTs, but have out own mechanism for this which is the jump island
+ machinery. This is required for efficiency. For one materializing the
+ idata sections would result in wasting pages. We'd use one page for
+ every ~100 bytes. This is extremely wasteful and also fragments the
+ memory. Secondly the dynamic linker is lazy. We only perform the final
+ loading if the symbol is used, however with an import library we can
+ discard the actual OC immediately after reading it. This prevents us from
+ keeping ~1k in memory per symbol for no reason.
+
+ 2. GHC itself does not observ symbol visibility correctly during NGC. This
+ in itself isn't an academic exercise. The issue stems from GHC using one
+ mechanism for providing two incompatible linking modes:
+ a) The first mode is generating Haskell shared libraries which are
+ intended to be used by other Haskell code. This requires us to
+ export the info, data and closures. For this GHC just re-exports
+ all symbols. But it doesn't correcly mark data/code. Symbol
+ visibility is overwritten by telling the linker to export all
+ symbols.
+ b) The second code is producing code that's supposed to be call-able
+ through a C insterface. This in reality does not require the
+ export of closures and info tables. But also does not require the
+ inclusion of the RTS inside the DLL. Hover this is done today
+ because we don't properly have the RTS as a dynamic library.
+ i.e. GHC does not only export symbols denoted by foreign export.
+ Also GHC should depend on an RTS library, but at the moment it
+ cannot because of TNTC is incompatible with dynamic linking.
+
+ These two issues mean that for GHC we need to take a different approach
+ to handling import libraries. For normal C libraries we have proper
+ differentiation between CODE and DATA. For GHC produced import libraries
+ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+ duplictae symbol is found, and we were going to discard it anyway, just do
+ so quitely. This works because the RTS symbols themselves are provided by
+ the currently loaded RTS as built-in symbols.
+
+ Secondly we cannot rely on a text symbol being available. As such we
+ should only depend on the symbols as definited in the .idata sections,
+ otherwise we would not be able to correctly link against GHC produced
+ import libraries.
+
Note [Memory allocation]
~~~~~~~~~~~~~~~~~~~~~~~~
The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1700,7 +1748,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
if ( secNumber != IMAGE_SYM_UNDEFINED
&& secNumber > 0
&& section
- && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+ /* Skip all BFD import sections. */
+ && section->kind != SECTIONKIND_IMPORT
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
/* This symbol is global and defined, viz, exported */
/* for IMAGE_SYMCLASS_EXTERNAL
&& !IMAGE_SYM_UNDEFINED,
@@ -1733,12 +1784,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
}
else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
- setImportSymbol(oc, sname);
+ /* Disassembly of section .idata$5:
+
+ 0000000000000000 <__imp_Insert>:
+ ...
+ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6
+
+ The first two bytes contain the ordinal of the function
+ in the format of lowpart highpart. The two bytes combined
+ for the total range of 16 bits which is the function export limit
+ of DLLs. See note [GHC Linking model and import libraries]. */
+ sname = (SymbolName*)section->start+2;
+ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+ addr = get_sym_name( getSymShortName (info, sym), oc);
+
+ IF_DEBUG(linker,
+ debugBelch("addImportSymbol `%s' => `%s'\n",
+ sname, (char*)addr));
+ /* We're going to free the any data associated with the import
+ library without copying the sections. So we have to duplicate
+ the symbol name and values before the pointers become invalid. */
+ sname = strdup (sname);
+ addr = strdup (addr);
+ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+ type |= SYM_TYPE_DUP_DISCARD;
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+ addr, false, type, oc)) {
+ releaseOcInfo (oc);
+ stgFree (oc->image);
+ oc->image = NULL;
+ return false;
+ }
+ setImportSymbol (oc, sname);
+
+ /* Don't process this oc any further. Just exit. */
+ oc->n_symbols = 0;
+ oc->symbols = NULL;
+ stgFree (oc->image);
+ oc->image = NULL;
+ releaseOcInfo (oc);
// There is nothing that we need to resolve in this object since we
// will never call the import stubs in its text section
oc->status = OBJECT_DONT_RESOLVE;
-
- IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+ return true;
}
else if (secNumber > 0
&& section
=====================================
testsuite/tests/ghci/linking/dyn/Makefile
=====================================
@@ -88,10 +88,6 @@ compile_libAB_dyn:
.PHONY: compile_libAS_impl_gcc
compile_libAS_impl_gcc:
- rm -rf bin_impl_gcc
- mkdir bin_impl_gcc
- '$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
- mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
.PHONY: compile_libAS_impl_msvc
=====================================
testsuite/tests/ghci/linking/dyn/all.T
=====================================
@@ -33,9 +33,9 @@ test('T10458',
extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
ghci_script, ['T10458.script'])
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
- expect_broken(18718),
- unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+ unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+ unless(arch('x86_64'), skip)],
makefile_test, ['compile_libAS_impl_gcc'])
test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f296eae73fbca3c53a510fec667e2732a009bc1f
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f296eae73fbca3c53a510fec667e2732a009bc1f
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221204/ac207856/attachment-0001.html>
More information about the ghc-commits
mailing list