[Git][ghc/ghc][master] Linker: add support for extra built-in symbols (#25155)

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Mon Sep 16 14:33:29 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
49ac3fb8 by Sylvain Henry at 2024-09-16T10:33:01-04:00
Linker: add support for extra built-in symbols (#25155)

See added Note [Extra RTS symbols] and new user guide entry.

Co-authored-by: Hamish Mackenzie <Hamish.K.Mackenzie at gmail.com>
Co-authored-by: Moritz Angermann <moritz.angermann at gmail.com>

- - - - -


13 changed files:

- docs/users_guide/ghci.rst
- rts/Linker.c
- rts/LinkerInternals.h
- rts/RtsSymbols.c
- rts/RtsSymbols.h
- rts/rts.cabal
- testsuite/tests/ghci/linking/Makefile
- + testsuite/tests/ghci/linking/T25155.hs
- + testsuite/tests/ghci/linking/T25155.stdout
- + testsuite/tests/ghci/linking/T25155_TH.hs
- + testsuite/tests/ghci/linking/T25155_iserv.hs
- + testsuite/tests/ghci/linking/T25155_iserv_main.c
- testsuite/tests/ghci/linking/all.T


Changes:

=====================================
docs/users_guide/ghci.rst
=====================================
@@ -3487,10 +3487,48 @@ dynamically-linked) from GHC itself.  So for example:
 This feature is experimental in GHC 8.0.x, but it may become the
 default in future releases.
 
+Building an external interpreter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The source code for the external interpreter program is in `utils/iserv`. It is
+very simple because most of the heavy lifting code is from the `ghci` library.
+
+It is sometimes desirable to customize the external interpreter program. For
+example, it is possible to add symbols to the RTS linker used by the external
+interpreter. This is done simply at link time by linking an additional `.o` that
+defines a `rtsExtraSyms` function returning the extra symbols. Doing it this way
+avoids the need to recompile the RTS with symbols added to its built-in list.
+A typical C file would look like this:
+
+.. code:: C
+
+  #include <RtsSymbols.h>
+
+  #define CODE_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+                        (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+  #define DATA_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+                        (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
+
+  RtsSymbolVal my_iserv_syms[] = {
+      CODE_SYM(malloc)
+      CODE_SYM(getauxval)
+      CODE_SYM(posix_spawn_file_actions_init)
+      ...
+      { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+  };
+
+  RtsSymbolVal* rtsExtraSyms() {
+      return my_iserv_syms;
+  }
+
+For more information, read the Note [Extra RTS symbols] in the RTS.
+
+
+
 .. _external-interpreter-proxy:
 
 Running the interpreter on a different host
--------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When using the flag :ghc-flag:`-fexternal-interpreter` GHC will
 spawn and communicate with the separate process using pipes.  There
@@ -3520,6 +3558,7 @@ will be executed on the target. As such packages like ``git-embed``,
 ``file-embed`` and others might not behave as expected if the target
 and host do not share the same filesystem.
 
+
 .. _building-ghci-libraries:
 
 Building GHCi libraries


=====================================
rts/Linker.c
=====================================
@@ -472,14 +472,35 @@ initLinker_ (int retain_cafs)
     symhash = allocStrHashTable();
 
     /* populate the symbol table with stuff from the RTS */
+    IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
     for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
+        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
         if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
                                     symhash, sym->lbl, sym->addr,
                                     sym->strength, sym->type, NULL)) {
             barf("ghciInsertSymbolTable failed");
         }
-        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
     }
+    IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
+
+    /* Add extra symbols. rtsExtraSyms() is a weakly defined symbol in the rts,
+     * that can be overrided by linking in an object with a corresponding
+     * definition later. This is useful to build an external-interpreter or some
+     * other process with extra symbols (mostly libc, or similar).
+     * See Note [Extra RTS symbols]
+     */
+    IF_DEBUG(linker, debugBelch("populating linker symbol table with extra RTS symbols\n"));
+    if(rtsExtraSyms && rtsExtraSyms() != NULL) {
+        for(RtsSymbolVal *sym = rtsExtraSyms(); sym->lbl != NULL; sym++) {
+            IF_DEBUG(linker, debugBelch("initLinker: inserting extra rts symbol %s, %p\n", sym->lbl, sym->addr));
+            if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
+                                        symhash, sym->lbl, sym->addr,
+                                        sym->strength, sym->type, NULL)) {
+                barf("ghciInsertSymbolTable failed");
+            }
+        }
+    }
+    IF_DEBUG(linker, debugBelch("done with extra RTS symbols\n"));
 
     // Redirect newCAF to newRetainedCAF if retain_cafs is true.
     if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,


=====================================
rts/LinkerInternals.h
=====================================
@@ -9,6 +9,7 @@
 #pragma once
 
 #include "Rts.h"
+#include "RtsSymbols.h"
 #include "Hash.h"
 #include "linker/M32Alloc.h"
 
@@ -33,8 +34,6 @@ void printLoadedObjects(void);
 #  define OBJFORMAT_WASM32
 #endif
 
-typedef void SymbolAddr;
-typedef char SymbolName;
 typedef struct _ObjectCode ObjectCode;
 typedef struct _Section    Section;
 
@@ -53,22 +52,6 @@ typedef struct _Section    Section;
  * to recompile their program as position-independent.
  */
 
-/* What kind of thing a symbol identifies. We need to know this to determine how
- * to process overflowing relocations. See Note [Processing overflowed relocations].
- * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
- * with the other values. */
-typedef enum _SymType {
-    SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
-    SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
-    SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
-    SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
-                                      however if a duplicate is found with a mismatching
-                                      SymType then discard this one.  */
-    SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
-
-} SymType;
-
-
 #if defined(OBJFORMAT_ELF)
 #  include "linker/ElfTypes.h"
 #elif defined(OBJFORMAT_PEi386)
@@ -425,12 +408,6 @@ typedef void (*init_t) (int argc, char **argv, char **env);
 /* Type of a finalizer */
 typedef void (*fini_t) (void);
 
-typedef enum _SymStrength {
-    STRENGTH_NORMAL,
-    STRENGTH_WEAK,
-    STRENGTH_STRONG,
-} SymStrength;
-
 /* SymbolInfo tracks a symbol's address, the object code from which
    it originated, and whether or not it's weak.
 


=====================================
rts/RtsSymbols.c
=====================================
@@ -1090,3 +1090,104 @@ RtsSymbolVal rtsSyms[] = {
       SymI_HasDataProto(nonmoving_write_barrier_enabled)
       { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
 };
+
+
+// Note [Extra RTS symbols]
+// ~~~~~~~~~~~~~~~~~~~~~~~~
+// How does the RTS linker know the location of the exposed RTS functions in the
+// current Haskell process? Especially if it is statically linked, there is no
+// dynamic table containing a symbol->address mapping. What we do is that we
+// compile the following static "rtsSyms" array:
+//
+//  RtsSymbolVar rtsSyms[] = {
+//    { "rts_foo", &rts_foo, ...}
+//    ...
+//  };
+//
+// This array associates a predefined set of symbol names (e.g. "rts_foo") to
+// their address (e.g. &rts_foo) as determined by the linker (static or
+// dynamic).
+//
+// Note that also use this mechanism for libraries other than the RTS, e.g.
+// libc.
+//
+// Why do we need this mapping?
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Suppose we have a Haskell program statically linked against the "libfoo"
+// library and using the "foo" symbol. If we try to dynamically link some code
+// using "foo" with the RTS linker, we will load another instance of "libfoo" in
+// memory and use the "foo" symbol in it for the dynamically loaded code.
+// However the statically linked code still uses the statically linked instance
+// of libfoo and its foo symbol.
+//
+// This isn't a problem as long a we use side-effect free functions. However
+// both instances of a "foo" function using global variables may use different
+// global variables!
+//
+// This was a real issue with programs linked against the ghc library and
+// also loading it dynamically: the GHC lib uses global variables (e.g. for the
+// unique counter), and those weren't shared... This wasn't solved by adding
+// GHC's global variables to the rtsSyms list but it could have been (instead a
+// generic mechanism to explicit manage Haskell global variables through the RTS
+// was added and used). However for other libraries, especially foreign ones,
+// the issue remains!
+//
+// Which symbols should go into the list?
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// Ideally all problematic symbols (wrt to the double loading issue mentioned
+// above) should go into the list. However:
+//   - we can't easily predict all the symbols that will be problematic/needed
+//   - symbols in the list prevent some dead code elimination! If symbol "foo"
+//   is in the list, even if we never load any code using "foo", we'll carry the
+//   code for "foo" and its dependencies.
+//
+// The second point is particularly problematic if some symbols are only needed
+// to run Template Haskell splices at compilation time: we need them in the RTS
+// used by the interpreter but not in the RTS of the target program!
+//
+// Extra symbols
+// ~~~~~~~~~~~~~
+// Since #25155 we also support specifying a second list of "extra" symbols
+// which is empty by default. By overriding the weak `rtsExtraSyms` function,
+// build tools can extend the list of symbols linked into the RTS. This feature
+// coupled with the external-interpreter feature allows specific external
+// interpreter programs to be built easily with extra symbols supported.
+//
+// As a concrete example, one can build an `iserv-proxy-interpreter` program
+// with the following additional Cabal stanza (see [1]):
+//
+//      if os(linux) && arch(aarch64)
+//         c-sources: cbits/symbols.aarch64-musl.c
+//
+// Where the C file contains (see [2] and [3]):
+//
+//      #include <RtsSymbols.h>
+//
+//      #define CODE_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+//      #define DATA_SYM(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
+//
+//      RtsSymbolVal my_iserv_syms[] = {
+//          CODE_SYM(malloc)
+//          CODE_SYM(getauxval)
+//          CODE_SYM(posix_spawn_file_actions_init)
+//          ...
+//          { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+//      };
+//
+//      RtsSymbolVal* rtsExtraSyms() {
+//          return my_iserv_syms;
+//      }
+//
+// [1] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/iserv-proxy.cabal#L110-L111
+// [2] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/cbits/symbols.aarch64-musl.c
+// [3] https://github.com/stable-haskell/iserv-proxy/blob/2ed34002247213fc435d0062350b91bab920626e/cbits/symbols.aarch64-musl.h
+//
+
+static RtsSymbolVal default_extra_syms[] = {
+      { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+};
+
+/* Default empty extra RTS symbols. See Note[Extra RTS symbols] */
+RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void) {
+    return default_extra_syms;
+}


=====================================
rts/RtsSymbols.h
=====================================
@@ -9,8 +9,6 @@
 #pragma once
 
 #include "ghcautoconf.h"
-#include "LinkerInternals.h"
-#include <stdbool.h>
 
 #if defined(LEADING_UNDERSCORE)
 #define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
@@ -18,6 +16,29 @@
 #define MAYBE_LEADING_UNDERSCORE_STR(s) (s)
 #endif
 
+typedef void SymbolAddr;
+typedef char SymbolName;
+
+/* What kind of thing a symbol identifies. We need to know this to determine how
+ * to process overflowing relocations. See Note [Processing overflowed relocations].
+ * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
+ * with the other values. */
+typedef enum _SymType {
+    SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
+    SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
+    SYM_TYPE_INDIRECT_DATA = 1 << 2, /* see Note [_iob_func symbol] */
+    SYM_TYPE_DUP_DISCARD = 1 << 3, /* the symbol is a symbol in a BFD import library
+                                      however if a duplicate is found with a mismatching
+                                      SymType then discard this one.  */
+    SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
+} SymType;
+
+typedef enum _SymStrength {
+    STRENGTH_NORMAL,
+    STRENGTH_WEAK,
+    STRENGTH_STRONG,
+} SymStrength;
+
 typedef struct _RtsSymbolVal {
     const SymbolName* lbl;
     SymbolAddr* addr;
@@ -27,6 +48,8 @@ typedef struct _RtsSymbolVal {
 
 extern RtsSymbolVal rtsSyms[];
 
+extern RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void);
+
 /* See Note [_iob_func symbol].  */
 #if defined(mingw32_HOST_OS)
 extern const void* __rts_iob_func;


=====================================
rts/rts.cabal
=====================================
@@ -250,7 +250,7 @@ library
       include-dirs: include
       includes: Rts.h
       autogen-includes: ghcautoconf.h ghcplatform.h
-      install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h Stg.h
+      install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h RtsSymbols.h Stg.h
                         ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h
                         -- ^ from include
                         DerivedConstants.h


=====================================
testsuite/tests/ghci/linking/Makefile
=====================================
@@ -145,3 +145,8 @@ T15729:
 big-obj:
 	'$(TEST_CC)' -c -Wa,-mbig-obj big-obj-c.c -o big-obj-c.o
 	echo "main" | '$(TEST_HC)' $(TEST_HC_OPTS_INTERACTIVE) big-obj-c.o big-obj.hs
+
+.PHONY: T25155
+T25155:
+	'$(TEST_HC)' T25155_iserv_main.c T25155_iserv.hs -package ghci -no-hs-main -v0 -o T25155_iserv
+	'$(TEST_HC)' -fexternal-interpreter -pgmi ./T25155_iserv -v0 T25155.hs


=====================================
testsuite/tests/ghci/linking/T25155.hs
=====================================
@@ -0,0 +1,8 @@
+{-# LANGUAGE TemplateHaskell #-}
+
+module T25155 where
+
+import Language.Haskell.TH (runIO)
+import T25155_TH
+
+$(runIO (foobar 7) >> pure [])


=====================================
testsuite/tests/ghci/linking/T25155.stdout
=====================================
@@ -0,0 +1 @@
+Called foobar with 7


=====================================
testsuite/tests/ghci/linking/T25155_TH.hs
=====================================
@@ -0,0 +1,3 @@
+module T25155_TH (foobar) where
+
+foreign import ccall "foobar" foobar :: Int -> IO Int


=====================================
testsuite/tests/ghci/linking/T25155_iserv.hs
=====================================
@@ -0,0 +1,6 @@
+module Main (main) where
+
+import GHCi.Server (defaultServer)
+
+main :: IO ()
+main = defaultServer


=====================================
testsuite/tests/ghci/linking/T25155_iserv_main.c
=====================================
@@ -0,0 +1,41 @@
+#include <ghcversion.h>
+#include <rts/PosixSource.h>
+#include <Rts.h>
+#include <RtsSymbols.h>
+#include <HsFFI.h>
+
+int main (int argc, char *argv[])
+{
+    RtsConfig conf = defaultRtsConfig;
+
+    // We never know what symbols GHC will look up in the future, so
+    // we must retain CAFs for running interpreted code.
+    conf.keep_cafs = 1;
+
+    conf.rts_opts_enabled = RtsOptsAll;
+    extern StgClosure ZCMain_main_closure;
+    hs_main(argc, argv, &ZCMain_main_closure, conf);
+}
+
+// Inject "foobar" in the linker symbol table.
+//
+// The target object isn't compiled against any object defining foobar, yet we
+// can use the FFI call to foobar in a TH splice.
+
+#define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
+
+
+void foobar(int x) {
+    printf("Called foobar with %d\n", x);
+}
+
+RtsSymbolVal extra_syms[] = {
+    SymI_HasProto(foobar)
+    { NULL, NULL, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
+  };
+
+/* Extend the list of built-in symbols */
+RtsSymbolVal* rtsExtraSyms() {
+    return extra_syms;
+}


=====================================
testsuite/tests/ghci/linking/all.T
=====================================
@@ -76,3 +76,4 @@ test('big-obj', [extra_files(['big-obj-c.c', 'big-obj.hs']),
                     unless(doing_ghci, skip), unless(opsys('mingw32'), skip)],
      makefile_test, ['big-obj'])
 
+test('T25155', [req_c, req_th, req_interp, extra_files(['T25155_iserv_main.c', 'T25155_iserv.hs', 'T25155_TH.hs'])], makefile_test, [])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/49ac3fb8cabc0ed584f3261296feb656d32a9ae7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/49ac3fb8cabc0ed584f3261296feb656d32a9ae7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240916/fd7009f5/attachment-0001.html>


More information about the ghc-commits mailing list