[Git][ghc/ghc][wip/T22166] linker: Fix BFD import libraries
Tamar Christina (@Phyx)
gitlab at gitlab.haskell.org
Tue Jan 3 23:04:26 UTC 2023
Tamar Christina pushed to branch wip/T22166 at Glasgow Haskell Compiler / GHC
Commits:
4f9eecf5 by Tamar Christina at 2023-01-03T23:03:12+00:00
linker: Fix BFD import libraries
This commit fixes the BFD style import library support in the runtime
linker. This was accidentally broken during the refactoring to clang
and went unnoticed because clang itself is unable to generate the BFD
style import libraries.
With this change we can not link against both GCC or Clang produced
libraries again and intermix code produced by both compilers.
- - - - -
10 changed files:
- hadrian/src/Settings/Packages.hs
- libraries/base/System/Posix/Internals.hs
- rts/Linker.c
- rts/LinkerInternals.h
- rts/linker/PEi386.c
- testsuite/tests/ghci/linking/dyn/Makefile
- testsuite/tests/ghci/linking/dyn/all.T
- + testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
- + testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
- testsuite/tests/rts/all.T
Changes:
=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -404,7 +404,7 @@ rtsPackageArgs = package rts ? do
, builder HsCpp ? pure
[ "-DTOP=" ++ show top ]
- , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW"
+ , builder HsCpp ? flag UseLibdw ? arg "-DUSE_LIBDW" ]
-- Compile various performance-critical pieces *without* -fPIC -dynamic
-- even when building a shared library. If we don't do this, then the
=====================================
libraries/base/System/Posix/Internals.hs
=====================================
@@ -904,6 +904,7 @@ foreign import capi unsafe "stdio.h value SEEK_END" sEEK_END :: CInt
{-
Note [Windows types]
+~~~~~~~~~~~~~~~~~~~~
Windows' _read and _write have types that differ from POSIX. They take an
unsigned int for length and return a signed int where POSIX uses size_t and
=====================================
rts/Linker.c
=====================================
@@ -135,8 +135,7 @@ extern void iconv();
This is to enable lazy loading of symbols. Eager loading is problematic
as it means that all symbols must be available, even those which we will
never use. This is especially painful on Windows, where the number of
- libraries required to link things like mingwex (TODO: We no longer depend
- on mingwex, so think of a different example here) grows to be quite high.
+ libraries required to link things like QT or WxWidgets grows to be quite high.
We proceed through these stages as follows,
@@ -194,8 +193,7 @@ extern void iconv();
1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
then we don't need to load libB. This means the dependency chain for libraries
- such as mingw32 and mingwex (TODO: We no longer depend on mingwex, so think of
- a different example here) can be broken down.
+ such as ucrt can be broken down.
2) The number of duplicate symbols, since now only symbols that are
true duplicates will display the error.
@@ -228,7 +226,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
static const char *
symbolTypeString (SymType type)
{
- switch (type) {
+ switch (type & ~SYM_TYPE_DUP_DISCARD) {
case SYM_TYPE_CODE: return "code";
case SYM_TYPE_DATA: return "data";
case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
@@ -277,14 +275,18 @@ int ghciInsertSymbolTable(
insertStrHashTable(table, key, pinfo);
return 1;
}
- else if (pinfo->type != type)
+ else if (pinfo->type ^ type)
{
- debugBelch("Symbol type mismatch.\n");
- debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
- key, obj_name, symbolTypeString(type));
- debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
- pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
- symbolTypeString(pinfo->type));
+ /* We were asked to discard the symbol on duplicates, do so quietly. */
+ if (!(type & SYM_TYPE_DUP_DISCARD))
+ {
+ debugBelch("Symbol type mismatch.\n");
+ debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
+ key, obj_name, symbolTypeString(type));
+ debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
+ pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
+ symbolTypeString(pinfo->type));
+ }
return 1;
}
else if (pinfo->strength == STRENGTH_STRONG)
=====================================
rts/LinkerInternals.h
=====================================
@@ -54,11 +54,16 @@ typedef struct _Section Section;
*/
/* What kind of thing a symbol identifies. We need to know this to determine how
- * to process overflowing relocations. See Note [Processing overflowed relocations]. */
+ * to process overflowing relocations. See Note [Processing overflowed relocations].
+ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
+ * with the other values. */
typedef enum _SymType {
- SYM_TYPE_CODE, /* the symbol is a function and can be relocated via a jump island */
- SYM_TYPE_DATA, /* the symbol is data */
- SYM_TYPE_INDIRECT_DATA, /* see Note [_iob_func symbol] */
+ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+ however if a duplicate is found with a mismatching
+ SymType then discard this one. */
} SymType;
=====================================
rts/linker/PEi386.c
=====================================
@@ -261,6 +261,54 @@
.asciiz "libfoo_data"
+ Note [GHC Linking model and import libraries]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The above describes how import libraries work for static linking.
+ Fundamentally this does not apply to dynamic linking as we do in GHC.
+ The issue is two-folds:
+
+ 1. In the linking model above it is expected that the .idata sections be
+ materialized into PLTs during linking. However in GHC we never create
+ PLTs, but have out own mechanism for this which is the jump island
+ machinery. This is required for efficiency. For one materializing the
+ .idata sections would result in wasting pages. We'd use one page for
+ every ~100 bytes. This is extremely wasteful and also fragments the
+ memory. Secondly the dynamic linker is lazy. We only perform the final
+ loading if the symbol is used, however with an import library we can
+ discard the actual OC immediately after reading it. This prevents us from
+ keeping ~1k in memory per symbol for no reason.
+
+ 2. GHC itself does not observe symbol visibility correctly during NGC. This
+ in itself isn't an academic exercise. The issue stems from GHC using one
+ mechanism for providing two incompatible linking modes:
+ a) The first mode is generating Haskell shared libraries which are
+ intended to be used by other Haskell code. This requires us to
+ export the info, data and closures. For this GHC just re-exports
+ all symbols. But it doesn't correcly mark data/code. Symbol
+ visibility is overwritten by telling the linker to export all
+ symbols.
+ b) The second code is producing code that's supposed to be call-able
+ through a C insterface. This in reality does not require the
+ export of closures and info tables. But also does not require the
+ inclusion of the RTS inside the DLL. Hover this is done today
+ because we don't properly have the RTS as a dynamic library.
+ i.e. GHC does not only export symbols denoted by foreign export.
+ Also GHC should depend on an RTS library, but at the moment it
+ cannot because of TNTC is incompatible with dynamic linking.
+
+ These two issues mean that for GHC we need to take a different approach
+ to handling import libraries. For normal C libraries we have proper
+ differentiation between CODE and DATA. For GHC produced import libraries
+ we do not. As such the SYM_TYPE_DUP_DISCARD tells the linker that if a
+ duplicate symbol is found, and we were going to discard it anyway, just do
+ so quitely. This works because the RTS symbols themselves are provided by
+ the currently loaded RTS as built-in symbols.
+
+ Secondly we cannot rely on a text symbol being available. As such we
+ should only depend on the symbols as defined in the .idata sections,
+ otherwise we would not be able to correctly link against GHC produced
+ import libraries.
+
Note [Memory allocation]
~~~~~~~~~~~~~~~~~~~~~~~~
The loading of an object begins in `preloadObjectFile`, which allocates a buffer,
@@ -1658,7 +1706,10 @@ ocGetNames_PEi386 ( ObjectCode* oc )
if ( secNumber != IMAGE_SYM_UNDEFINED
&& secNumber > 0
&& section
- && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY) {
+ /* Skip all BFD import sections. */
+ && section->kind != SECTIONKIND_IMPORT
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY
+ && section->kind != SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD) {
/* This symbol is global and defined, viz, exported */
/* for IMAGE_SYMCLASS_EXTERNAL
&& !IMAGE_SYM_UNDEFINED,
@@ -1691,12 +1742,49 @@ ocGetNames_PEi386 ( ObjectCode* oc )
IF_DEBUG(linker_verbose, debugBelch("bss symbol @ %p %u\n", addr, symValue));
}
else if (section && section->kind == SECTIONKIND_BFD_IMPORT_LIBRARY) {
- setImportSymbol(oc, sname);
+ /* Disassembly of section .idata$5:
+
+ 0000000000000000 <__imp_Insert>:
+ ...
+ 0: IMAGE_REL_AMD64_ADDR32NB .idata$6
+
+ The first two bytes contain the ordinal of the function
+ in the format of lowpart highpart. The two bytes combined
+ for the total range of 16 bits which is the function export limit
+ of DLLs. See note [GHC Linking model and import libraries]. */
+ sname = (SymbolName*)section->start+2;
+ COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
+ addr = get_sym_name( getSymShortName (info, sym), oc);
+
+ IF_DEBUG(linker,
+ debugBelch("addImportSymbol `%s' => `%s'\n",
+ sname, (char*)addr));
+ /* We're going to free the any data associated with the import
+ library without copying the sections. So we have to duplicate
+ the symbol name and values before the pointers become invalid. */
+ sname = strdup (sname);
+ addr = strdup (addr);
+ type = has_code_section ? SYM_TYPE_CODE : SYM_TYPE_DATA;
+ type |= SYM_TYPE_DUP_DISCARD;
+ if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
+ addr, false, type, oc)) {
+ releaseOcInfo (oc);
+ stgFree (oc->image);
+ oc->image = NULL;
+ return false;
+ }
+ setImportSymbol (oc, sname);
+
+ /* Don't process this oc any further. Just exit. */
+ oc->n_symbols = 0;
+ oc->symbols = NULL;
+ stgFree (oc->image);
+ oc->image = NULL;
+ releaseOcInfo (oc);
// There is nothing that we need to resolve in this object since we
// will never call the import stubs in its text section
oc->status = OBJECT_DONT_RESOLVE;
-
- IF_DEBUG(linker_verbose, debugBelch("import symbol %s\n", sname));
+ return true;
}
else if (secNumber > 0
&& section
=====================================
testsuite/tests/ghci/linking/dyn/Makefile
=====================================
@@ -88,10 +88,6 @@ compile_libAB_dyn:
.PHONY: compile_libAS_impl_gcc
compile_libAS_impl_gcc:
- rm -rf bin_impl_gcc
- mkdir bin_impl_gcc
- '$(TEST_HC)' $(MY_TEST_HC_OPTS) -odir "bin_impl_gcc" -shared A.c -o "bin_impl_gcc/$(call DLL,ASimpL)"
- mv bin_impl_gcc/libASimpL.dll.a bin_impl_gcc/libASx.dll.a
echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) T11072.hs -lASx -L./bin_impl_gcc
.PHONY: compile_libAS_impl_msvc
=====================================
testsuite/tests/ghci/linking/dyn/all.T
=====================================
@@ -41,9 +41,9 @@ test('T10458',
extra_hc_opts('-L"$PWD/T10458dir" -lAS')],
ghci_script, ['T10458.script'])
-test('T11072gcc', [extra_files(['A.c', 'T11072.hs']),
- expect_broken(18718),
- unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
+test('T11072gcc', [extra_files(['A.c', 'T11072.hs', 'bin_impl_gcc/']),
+ unless(doing_ghci, skip), unless(opsys('mingw32'), skip),
+ unless(arch('x86_64'), skip)],
makefile_test, ['compile_libAS_impl_gcc'])
test('T11072msvc', [extra_files(['A.c', 'T11072.hs', 'libAS.def', 'i686/', 'x86_64/']),
=====================================
testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll
=====================================
Binary files /dev/null and b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/ASimpL.dll differ
=====================================
testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a
=====================================
Binary files /dev/null and b/testsuite/tests/ghci/linking/dyn/bin_impl_gcc/libASx.dll.a differ
=====================================
testsuite/tests/rts/all.T
=====================================
@@ -410,7 +410,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, [''])
test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])) ]
, compile_and_run, [''])
-test('T12497', [ unless(opsys('mingw32'), skip)
+test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
],
makefile_test, ['T12497'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4f9eecf55fb9c10334478859dd87da76481252e9
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4f9eecf55fb9c10334478859dd87da76481252e9
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230103/88738258/attachment-0001.html>
More information about the ghc-commits
mailing list