[Git][ghc/ghc][master] Linker: add support for extra built-in symbols (#25155)
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Mon Sep 16 14:33:29 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
49ac3fb8 by Sylvain Henry at 2024-09-16T10:33:01-04:00
Linker: add support for extra built-in symbols (#25155)
See added Note [Extra RTS symbols] and new user guide entry.
Co-authored-by: Hamish Mackenzie <Hamish.K.Mackenzie at gmail.com>
Co-authored-by: Moritz Angermann <moritz.angermann at gmail.com>
- - - - -
13 changed files:
- docs/users_guide/ghci.rst
- rts/Linker.c
- rts/LinkerInternals.h
- rts/RtsSymbols.c
- rts/RtsSymbols.h
- rts/rts.cabal
- testsuite/tests/ghci/linking/Makefile
- + testsuite/tests/ghci/linking/T25155.hs
- + testsuite/tests/ghci/linking/T25155.stdout
- + testsuite/tests/ghci/linking/T25155_TH.hs
- + testsuite/tests/ghci/linking/T25155_iserv.hs
- + testsuite/tests/ghci/linking/T25155_iserv_main.c
- testsuite/tests/ghci/linking/all.T
Changes:
=====================================
docs/users_guide/ghci.rst
=====================================
@@ -3487,10 +3487,48 @@ dynamically-linked) from GHC itself. So for example:
This feature is experimental in GHC 8.0.x, but it may become the
default in future releases.
+Building an external interpreter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The source code for the external interpreter program is in `utils/iserv`. It is
+very simple because most of the heavy lifting code is from the `ghci` library.
+
+It is sometimes desirable to customize the external interpreter program. For
+example, it is possible to add symbols to the RTS linker used by the external
+interpreter. This is done simply at link time by linking an additional `.o` that
+defines a `rtsExtraSyms` function returning the extra symbols. Doing it this way
+avoids the need to recompile the RTS with symbols added to its built-in list.
+A typical C file would look like this:
+
+.. code:: C
+
+ #include <RtsSymbols.h>
+
+ #define CODE_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+ (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+ #define DATA_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+ (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
+
+ RtsSymbolVal my_iserv_syms[] = {
+ CODE_SYM(malloc)
+ CODE_SYM(getauxval)
+ CODE_SYM(posix_spawn_file_actions_init)
+ ...
+ { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+ };
+
+ RtsSymbolVal* rtsExtraSyms() {
+ return my_iserv_syms;
+ }
+
+For more information, read the Note [Extra RTS symbols] in the RTS.
+
+
+
.. _external-interpreter-proxy:
Running the interpreter on a different host
--------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When using the flag :ghc-flag:`-fexternal-interpreter` GHC will
spawn and communicate with the separate process using pipes. There
@@ -3520,6 +3558,7 @@ will be executed on the target. As such packages like ``git-embed``,
``file-embed`` and others might not behave as expected if the target
and host do not share the same filesystem.
+
.. _building-ghci-libraries:
Building GHCi libraries
=====================================
rts/Linker.c
=====================================
@@ -472,14 +472,35 @@ initLinker_ (int retain_cafs)
symhash = allocStrHashTable();
/* populate the symbol table with stuff from the RTS */
+ IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
+ IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
symhash, sym->lbl, sym->addr,
sym->strength, sym->type, NULL)) {
barf("ghciInsertSymbolTable failed");
}
- IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
}
+ IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
+
+ /* Add extra symbols. rtsExtraSyms() is a weakly defined symbol in the rts,
+ * that can be overrided by linking in an object with a corresponding
+ * definition later. This is useful to build an external-interpreter or some
+ * other process with extra symbols (mostly libc, or similar).
+ * See Note [Extra RTS symbols]
+ */
+ IF_DEBUG(linker, debugBelch("populating linker symbol table with extra RTS symbols\n"));
+ if(rtsExtraSyms && rtsExtraSyms() != NULL) {
+ for(RtsSymbolVal *sym = rtsExtraSyms(); sym->lbl != NULL; sym++) {
+ IF_DEBUG(linker, debugBelch("initLinker: inserting extra rts symbol %s, %p\n", sym->lbl, sym->addr));
+ if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
+ symhash, sym->lbl, sym->addr,
+ sym->strength, sym->type, NULL)) {
+ barf("ghciInsertSymbolTable failed");
+ }
+ }
+ }
+ IF_DEBUG(linker, debugBelch("done with extra RTS symbols\n"));
// Redirect newCAF to newRetainedCAF if retain_cafs is true.
if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
=====================================
rts/LinkerInternals.h
=====================================
@@ -9,6 +9,7 @@
#pragma once
#include "Rts.h"
+#include "RtsSymbols.h"
#include "Hash.h"
#include "linker/M32Alloc.h"
@@ -33,8 +34,6 @@ void printLoadedObjects(void);
# define OBJFORMAT_WASM32
#endif
-typedef void SymbolAddr;
-typedef char SymbolName;
typedef struct _ObjectCode ObjectCode;
typedef struct _Section Section;
@@ -53,22 +52,6 @@ typedef struct _Section Section;
* to recompile their program as position-independent.
*/
-/* What kind of thing a symbol identifies. We need to know this to determine how
- * to process overflowing relocations. See Note [Processing overflowed relocations].
- * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
- * with the other values. */
-typedef enum _SymType {
- SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
- SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
- SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
- SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
- however if a duplicate is found with a mismatching
- SymType then discard this one. */
- SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
-
-} SymType;
-
-
#if defined(OBJFORMAT_ELF)
# include "linker/ElfTypes.h"
#elif defined(OBJFORMAT_PEi386)
@@ -425,12 +408,6 @@ typedef void (*init_t) (int argc, char **argv, char **env);
/* Type of a finalizer */
typedef void (*fini_t) (void);
-typedef enum _SymStrength {
- STRENGTH_NORMAL,
- STRENGTH_WEAK,
- STRENGTH_STRONG,
-} SymStrength;
-
/* SymbolInfo tracks a symbol's address, the object code from which
it originated, and whether or not it's weak.
=====================================
rts/RtsSymbols.c
=====================================
@@ -1090,3 +1090,104 @@ RtsSymbolVal rtsSyms[] = {
SymI_HasDataProto(nonmoving_write_barrier_enabled)
{ 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
};
+
+
+// Note [Extra RTS symbols]
+// ~~~~~~~~~~~~~~~~~~~~~~~~
+// How does the RTS linker know the location of the exposed RTS functions in the
+// current Haskell process? Especially if it is statically linked, there is no
+// dynamic table containing a symbol->address mapping. What we do is that we
+// compile the following static "rtsSyms" array:
+//
+// RtsSymbolVar rtsSyms[] = {
+// { "rts_foo", &rts_foo, ...}
+// ...
+// };
+//
+// This array associates a predefined set of symbol names (e.g. "rts_foo") to
+// their address (e.g. &rts_foo) as determined by the linker (static or
+// dynamic).
+//
+// Note that also use this mechanism for libraries other than the RTS, e.g.
+// libc.
+//
+// Why do we need this mapping?
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Suppose we have a Haskell program statically linked against the "libfoo"
+// library and using the "foo" symbol. If we try to dynamically link some code
+// using "foo" with the RTS linker, we will load another instance of "libfoo" in
+// memory and use the "foo" symbol in it for the dynamically loaded code.
+// However the statically linked code still uses the statically linked instance
+// of libfoo and its foo symbol.
+//
+// This isn't a problem as long a we use side-effect free functions. However
+// both instances of a "foo" function using global variables may use different
+// global variables!
+//
+// This was a real issue with programs linked against the ghc library and
+// also loading it dynamically: the GHC lib uses global variables (e.g. for the
+// unique counter), and those weren't shared... This wasn't solved by adding
+// GHC's global variables to the rtsSyms list but it could have been (instead a
+// generic mechanism to explicit manage Haskell global variables through the RTS
+// was added and used). However for other libraries, especially foreign ones,
+// the issue remains!
+//
+// Which symbols should go into the list?
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Ideally all problematic symbols (wrt to the double loading issue mentioned
+// above) should go into the list. However:
+// - we can't easily predict all the symbols that will be problematic/needed
+// - symbols in the list prevent some dead code elimination! If symbol "foo"
+// is in the list, even if we never load any code using "foo", we'll carry the
+// code for "foo" and its dependencies.
+//
+// The second point is particularly problematic if some symbols are only needed
+// to run Template Haskell splices at compilation time: we need them in the RTS
+// used by the interpreter but not in the RTS of the target program!
+//
+// Extra symbols
+// ~~~~~~~~~~~~~
+// Since #25155 we also support specifying a second list of "extra" symbols
+// which is empty by default. By overriding the weak `rtsExtraSyms` function,
+// build tools can extend the list of symbols linked into the RTS. This feature
+// coupled with the external-interpreter feature allows specific external
+// interpreter programs to be built easily with extra symbols supported.
+//
+// As a concrete example, one can build an `iserv-proxy-interpreter` program
+// with the following additional Cabal stanza (see [1]):
+//
+// if os(linux) && arch(aarch64)
+// c-sources: cbits/symbols.aarch64-musl.c
+//
+// Where the C file contains (see [2] and [3]):
+//
+// #include <RtsSymbols.h>
+//
+// #define CODE_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+// #define DATA_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
+//
+// RtsSymbolVal my_iserv_syms[] = {
+// CODE_SYM(malloc)
+// CODE_SYM(getauxval)
+// CODE_SYM(posix_spawn_file_actions_init)
+// ...
+// { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+// };
+//
+// RtsSymbolVal* rtsExtraSyms() {
+// return my_iserv_syms;
+// }
+//
+// [1] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/iserv-proxy.cabal#L110-L111
+// [2] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/cbits/symbols.aarch64-musl.c
+// [3] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/cbits/symbols.aarch64-musl.h
+//
+
+static RtsSymbolVal default_extra_syms[] = {
+ { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+};
+
+/* Default empty extra RTS symbols. See Note[Extra RTS symbols] */
+RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void) {
+ return default_extra_syms;
+}
=====================================
rts/RtsSymbols.h
=====================================
@@ -9,8 +9,6 @@
#pragma once
#include "ghcautoconf.h"
-#include "LinkerInternals.h"
-#include <stdbool.h>
#if defined(LEADING_UNDERSCORE)
#define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
@@ -18,6 +16,29 @@
#define MAYBE_LEADING_UNDERSCORE_STR(s) (s)
#endif
+typedef void SymbolAddr;
+typedef char SymbolName;
+
+/* What kind of thing a symbol identifies. We need to know this to determine how
+ * to process overflowing relocations. See Note [Processing overflowed relocations].
+ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
+ * with the other values. */
+typedef enum _SymType {
+ SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+ SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+ SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+ SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+ however if a duplicate is found with a mismatching
+ SymType then discard this one. */
+ SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
+} SymType;
+
+typedef enum _SymStrength {
+ STRENGTH_NORMAL,
+ STRENGTH_WEAK,
+ STRENGTH_STRONG,
+} SymStrength;
+
typedef struct _RtsSymbolVal {
const SymbolName* lbl;
SymbolAddr* addr;
@@ -27,6 +48,8 @@ typedef struct _RtsSymbolVal {
extern RtsSymbolVal rtsSyms[];
+extern RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void);
+
/* See Note [_iob_func symbol]. */
#if defined(mingw32_HOST_OS)
extern const void* __rts_iob_func;
=====================================
rts/rts.cabal
=====================================
@@ -250,7 +250,7 @@ library
include-dirs: include
includes: Rts.h
autogen-includes: ghcautoconf.h ghcplatform.h
- install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h Stg.h
+ install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h RtsSymbols.h Stg.h
ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h
-- ^ from include
DerivedConstants.h
=====================================
testsuite/tests/ghci/linking/Makefile
=====================================
@@ -145,3 +145,8 @@ T15729:
big-obj:
'$(TEST_CC)' -c -Wa,-mbig-obj big-obj-c.c -o big-obj-c.o
echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) big-obj-c.o big-obj.hs
+
+.PHONY: T25155
+T25155:
+ '$(TEST_HC)' T25155_iserv_main.c T25155_iserv.hs -package ghci -no-hs-main -v0 -o T25155_iserv
+ '$(TEST_HC)' -fexternal-interpreter -pgmi ./T25155_iserv -v0 T25155.hs
=====================================
testsuite/tests/ghci/linking/T25155.hs
=====================================
@@ -0,0 +1,8 @@
+{-# LANGUAGE TemplateHaskell #-}
+
+module T25155 where
+
+import Language.Haskell.TH (runIO)
+import T25155_TH
+
+$(runIO (foobar 7) >> pure [])
=====================================
testsuite/tests/ghci/linking/T25155.stdout
=====================================
@@ -0,0 +1 @@
+Called foobar with 7
=====================================
testsuite/tests/ghci/linking/T25155_TH.hs
=====================================
@@ -0,0 +1,3 @@
+module T25155_TH (foobar) where
+
+foreign import ccall "foobar" foobar :: Int -> IO Int
=====================================
testsuite/tests/ghci/linking/T25155_iserv.hs
=====================================
@@ -0,0 +1,6 @@
+module Main (main) where
+
+import GHCi.Server (defaultServer)
+
+main :: IO ()
+main = defaultServer
=====================================
testsuite/tests/ghci/linking/T25155_iserv_main.c
=====================================
@@ -0,0 +1,41 @@
+#include <ghcversion.h>
+#include <rts/PosixSource.h>
+#include <Rts.h>
+#include <RtsSymbols.h>
+#include <HsFFI.h>
+
+int main (int argc, char *argv[])
+{
+ RtsConfig conf = defaultRtsConfig;
+
+ // We never know what symbols GHC will look up in the future, so
+ // we must retain CAFs for running interpreted code.
+ conf.keep_cafs = 1;
+
+ conf.rts_opts_enabled = RtsOptsAll;
+ extern StgClosure ZCMain_main_closure;
+ hs_main(argc, argv, &ZCMain_main_closure, conf);
+}
+
+// Inject "foobar" in the linker symbol table.
+//
+// The target object isn't compiled against any object defining foobar, yet we
+// can use the FFI call to foobar in a TH splice.
+
+#define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+ (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+
+
+void foobar(int x) {
+ printf("Called foobar with %d\n", x);
+}
+
+RtsSymbolVal extra_syms[] = {
+ SymI_HasProto(foobar)
+ { NULL, NULL, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+ };
+
+/* Extend the list of built-in symbols */
+RtsSymbolVal* rtsExtraSyms() {
+ return extra_syms;
+}
=====================================
testsuite/tests/ghci/linking/all.T
=====================================
@@ -76,3 +76,4 @@ test('big-obj', [extra_files(['big-obj-c.c', 'big-obj.hs']),
unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
makefile_test, ['big-obj'])
+test('T25155', [req_c, req_th, req_interp, extra_files(['T25155_iserv_main.c', 'T25155_iserv.hs', 'T25155_TH.hs'])], makefile_test, [])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/49ac3fb8cabc0ed584f3261296feb656d32a9ae7
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/49ac3fb8cabc0ed584f3261296feb656d32a9ae7
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240916/fd7009f5/attachment-0001.html>
More information about the ghc-commits
mailing list