[Git][ghc/ghc][wip/supersven/ghc-9.10-riscv-ncg] 8 commits: Add RTS linker for RISCV64

Mon Aug 5 06:39:30 UTC 2024


Sven Tennie pushed to branch wip/supersven/ghc-9.10-riscv-ncg at Glasgow Haskell Compiler / GHC


Commits:
2b03e102 by Sven Tennie at 2024-08-05T08:37:32+02:00
Add RTS linker for RISCV64

This architecture wasn't supported before.

Co-authored-by: Moritz Angermann <moritz.angermann at gmail.com>

- - - - -
dc252e4d by Sven Tennie at 2024-08-05T08:37:49+02:00
Ignore divbyzero test for RISCV64

The architecture's behaviour differs from the test's expectations. See
comment in code why this is okay.

- - - - -
d73b06ee by Sven Tennie at 2024-08-05T08:37:49+02:00
Enable MulMayOflo_full test for RISCV64

It works and thus can be tested.

- - - - -
a37e2832 by Sven Tennie at 2024-08-05T08:37:49+02:00
LibffiAdjustor: Ensure code caches are flushed (RISCV64)

RISCV64 needs a specific code flushing sequence (involving fence.i) when
new code is created/loaded.

- - - - -
636a7359 by Sven Tennie at 2024-08-05T08:37:49+02:00
Add additional linker symbols for builtins (RISCV64)

We're relying on some GCC/Clang builtins. These need to be visible to
the linker (and not be stripped away.)

- - - - -
e95177db by Sven Tennie at 2024-08-05T08:37:49+02:00
Add GHCi support for RISCV64

As we got a RTS linker for this architecture now, we can enable GHCi for
it.

- - - - -
d5a3e8ab by Sven Tennie at 2024-08-05T08:37:49+02:00
Set codeowners of the RISCV64 NCG

- - - - -
253bd61c by Sven Tennie at 2024-08-05T08:37:49+02:00
Add test for C calling convention

Ensure that parameters and return values are correctly processed. A
dedicated test (like this) helps to get the subtleties of calling
conventions easily right.

- - - - -


25 changed files:

- CODEOWNERS
- hadrian/bindist/config.mk.in
- rts/LinkerInternals.h
- rts/RtsSymbols.c
- rts/adjustor/LibffiAdjustor.c
- rts/linker/Elf.c
- rts/linker/ElfTypes.h
- rts/linker/SymbolExtras.c
- rts/linker/SymbolExtras.h
- rts/linker/elf_plt.c
- rts/linker/elf_plt.h
- + rts/linker/elf_plt_riscv64.c
- + rts/linker/elf_plt_riscv64.h
- rts/linker/elf_reloc.c
- rts/linker/elf_reloc.h
- rts/linker/elf_reloc_aarch64.c
- rts/linker/elf_reloc_aarch64.h
- + rts/linker/elf_reloc_riscv64.c
- + rts/linker/elf_reloc_riscv64.h
- rts/rts.cabal
- + testsuite/tests/codeGen/should_run/CCallConv.hs
- + testsuite/tests/codeGen/should_run/CCallConv.stdout
- + testsuite/tests/codeGen/should_run/CCallConv_c.c
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/rts/all.T


Changes:

=====================================
CODEOWNERS
=====================================
@@ -40,6 +40,7 @@
 /compiler/GHC/HsToCore/Foreign/Wasm.hs @TerrorJack
 /compiler/GHC/Tc/Deriv/            @RyanGlScott
 /compiler/GHC/CmmToAsm/            @simonmar @bgamari @AndreasK
+/compiler/GHC/CmmToAsm/RV64/       @supersven @angerman
 /compiler/GHC/CmmToAsm/Wasm/       @TerrorJack
 /compiler/GHC/CmmToLlvm/           @angerman
 /compiler/GHC/StgToCmm/            @simonmar @osa1


=====================================
hadrian/bindist/config.mk.in
=====================================
@@ -152,7 +152,7 @@ GhcWithSMP := $(strip $(if $(filter YESNO, $(ArchSupportsSMP)$(GhcUnregisterised
 # Whether to include GHCi in the compiler.  Depends on whether the RTS linker
 # has support for this OS/ARCH combination.
 OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu)))
-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64)))
+ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64)))
 
 ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES"
 GhcWithInterpreter=YES


=====================================
rts/LinkerInternals.h
=====================================
@@ -208,7 +208,7 @@ typedef struct _Segment {
     int n_sections;
 } Segment;
 
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
 #define NEED_SYMBOL_EXTRAS 1
 #endif
 
@@ -220,8 +220,9 @@ typedef struct _Segment {
 #define NEED_M32 1
 #endif
 
-/* Jump Islands are sniplets of machine code required for relative
- * address relocations on the PowerPC, x86_64 and ARM.
+/* Jump Islands are sniplets of machine code required for relative address
+ * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
+ * like a GOT for locals where SymbolExtra represents one entry.
  */
 typedef struct {
 #if defined(powerpc_HOST_ARCH)
@@ -237,6 +238,8 @@ typedef struct {
     uint8_t     jumpIsland[8];
 #elif defined(arm_HOST_ARCH)
     uint8_t     jumpIsland[16];
+#elif defined(riscv64_HOST_ARCH)
+    uint64_t    addr;
 #endif
 } SymbolExtra;
 


=====================================
rts/RtsSymbols.c
=====================================
@@ -980,6 +980,17 @@ extern char **environ;
 #define RTS_LIBGCC_SYMBOLS
 #endif
 
+#if defined(riscv64_HOST_ARCH)
+// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as
+// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate
+// to __builtin-s in libraries/ghc-prim/cbits/ctz.c.
+#define RTS_ARCH_LIBGCC_SYMBOLS \
+  SymI_NeedsProto(__clzdi2) \
+  SymI_NeedsProto(__ctzdi2)
+#else
+#define RTS_ARCH_LIBGCC_SYMBOLS
+#endif
+
 // Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
 #if defined(HAVE_ARM_OUTLINE_ATOMICS)
 #include "ARMOutlineAtomicsSymbols.h"
@@ -1032,6 +1043,7 @@ RTS_DARWIN_ONLY_SYMBOLS
 RTS_OPENBSD_ONLY_SYMBOLS
 RTS_LIBC_SYMBOLS
 RTS_LIBGCC_SYMBOLS
+RTS_ARCH_LIBGCC_SYMBOLS
 RTS_FINI_ARRAY_SYMBOLS
 RTS_LIBFFI_SYMBOLS
 RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
@@ -1074,6 +1086,7 @@ RtsSymbolVal rtsSyms[] = {
       RTS_DARWIN_ONLY_SYMBOLS
       RTS_OPENBSD_ONLY_SYMBOLS
       RTS_LIBGCC_SYMBOLS
+      RTS_ARCH_LIBGCC_SYMBOLS
       RTS_FINI_ARRAY_SYMBOLS
       RTS_LIBFFI_SYMBOLS
       RTS_ARM_OUTLINE_ATOMIC_SYMBOLS


=====================================
rts/adjustor/LibffiAdjustor.c
=====================================
@@ -12,6 +12,7 @@
 #include "Adjustor.h"
 
 #include "rts/ghc_ffi.h"
+#include <stdint.h>
 #include <string.h>
 
 // Note that ffi_alloc_prep_closure is a non-standard libffi closure
@@ -187,5 +188,21 @@ createAdjustor (int cconv,
         barf("createAdjustor: failed to allocate memory");
     }
 
-    return (void*)code;
+#if defined(riscv64_HOST_ARCH)
+    // Synchronize the memory and instruction cache to prevent illegal
+    // instruction exceptions.
+
+    // We expect two instructions for address loading, one for the jump.
+    int instrCount = 3;
+    // On Linux the parameters of __builtin___clear_cache are currently unused.
+    // Add them anyways for future compatibility. (I.e. the parameters couldn't
+    // be checked during development.)
+    // TODO: Check the upper boundary e.g. with a debugger.
+    __builtin___clear_cache((void *)code,
+                            (void *)((uint64_t *) code + instrCount));
+    // Memory barrier to ensure nothing circumvents the fence.i / cache flush.
+    SEQ_CST_FENCE();
+#endif
+
+    return (void *)code;
 }


=====================================
rts/linker/Elf.c
=====================================
@@ -103,7 +103,8 @@
 
 #include "elf_got.h"
 
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
+#  define NEED_GOT
 #  define NEED_PLT
 #  include "elf_plt.h"
 #  include "elf_reloc.h"
@@ -430,10 +431,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
       case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
 #endif
 #if defined(EM_RISCV)
-      case EM_RISCV:  IF_DEBUG(linker,debugBelch( "riscv" ));
-          errorBelch("%s: RTS linker not implemented on riscv",
-                     oc->fileName);
-          return 0;
+      case EM_RISCV:  IF_DEBUG(linker,debugBelch( "riscv" )); break;
 #endif
 #if defined(EM_LOONGARCH)
       case EM_LOONGARCH:  IF_DEBUG(linker,debugBelch( "loongarch64" ));
@@ -1130,9 +1128,10 @@ end:
    return result;
 }
 
-// the aarch64 linker uses relocacteObjectCodeAarch64,
-// see elf_reloc_aarch64.{h,c}
-#if !defined(aarch64_HOST_ARCH)
+// the aarch64 and riscv64 linkers use relocateObjectCodeAarch64() and
+// relocateObjectCodeRISCV64() (respectively), see elf_reloc_aarch64.{h,c} and
+// elf_reloc_riscv64.{h,c}
+#if !defined(aarch64_HOST_ARCH) && !defined(riscv64_HOST_ARCH)
 
 /* Do ELF relocations which lack an explicit addend.  All x86-linux
    and arm-linux relocations appear to be of this form. */
@@ -1359,7 +1358,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
                /* try to locate an existing stub for this target */
                if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
                    /* didn't find any. Need to create one */
-                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
+                   if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 0)) {
                        errorBelch("Unable to create veneer for ARM_CALL\n");
                        return 0;
                    }
@@ -1451,7 +1450,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
                /* try to locate an existing stub for this target */
                if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
                    /* didn't find any. Need to create one */
-                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
+                   if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 1)) {
                        errorBelch("Unable to create veneer for ARM_THM_CALL\n");
                        return 0;
                    }
@@ -1991,7 +1990,7 @@ ocResolve_ELF ( ObjectCode* oc )
     (void) shnum;
     (void) shdr;
 
-#if defined(aarch64_HOST_ARCH)
+#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
     /* use new relocation design */
     if(relocateObjectCode( oc ))
         return 0;
@@ -2014,6 +2013,9 @@ ocResolve_ELF ( ObjectCode* oc )
 
 #if defined(powerpc_HOST_ARCH)
     ocFlushInstructionCache( oc );
+#elif defined(riscv64_HOST_ARCH)
+    /* New-style pseudo-polymorph (by architecture) call */
+    flushInstructionCache( oc );
 #endif
 
     return ocMprotect_Elf(oc);


=====================================
rts/linker/ElfTypes.h
=====================================
@@ -150,6 +150,7 @@ typedef
 struct _Stub {
     void * addr;
     void * target;
+    void* got_addr;
     /* flags can hold architecture specific information they are used during
      * lookup of stubs as well. Thus two stubs for the same target with
      * different flags are considered unequal.


=====================================
rts/linker/SymbolExtras.c
=====================================
@@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc)
 }
 
 
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
 SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
                               unsigned long symbolNumber,
                               unsigned long target )
@@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
     extra->addr = target;
     memcpy(extra->jumpIsland, jmp, 8);
 #endif /* x86_64_HOST_ARCH */
-
+#if defined(riscv64_HOST_ARCH)
+    // Fake GOT entry (used like GOT, but located in symbol extras)
+    extra->addr = target;
+#endif
     return extra;
 }
-#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
+#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */
 #endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
 #endif // NEED_SYMBOL_EXTRAS


=====================================
rts/linker/SymbolExtras.h
=====================================
@@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectCode const* oc,
                                  unsigned long target,
                                  bool fromThumb,
                                  bool toThumb );
-#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
 SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
                               unsigned long symbolNumber,
                               unsigned long target );


=====================================
rts/linker/elf_plt.c
=====================================
@@ -5,7 +5,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
 #if defined(OBJFORMAT_ELF)
 
 #define STRINGIFY(x) #x
@@ -49,11 +49,13 @@ findStub(Section * section,
 bool
 makeStub(Section * section,
           void* * addr,
+          void* got_addr,
           uint8_t flags) {
 
     Stub * s = calloc(1, sizeof(Stub));
     ASSERT(s != NULL);
     s->target = *addr;
+    s->got_addr = got_addr;
     s->flags  = flags;
     s->next = NULL;
     s->addr = (uint8_t *)section->info->stub_offset + 8


=====================================
rts/linker/elf_plt.h
=====================================
@@ -4,8 +4,9 @@
 
 #include "elf_plt_arm.h"
 #include "elf_plt_aarch64.h"
+#include "elf_plt_riscv64.h"
 
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
 
 #if defined(OBJFORMAT_ELF)
 
@@ -21,6 +22,8 @@
 #define __suffix__ Arm
 #elif defined(__mips__)
 #define __suffix__ Mips
+#elif defined(__riscv)
+#define __suffix__ RISCV64
 #else
 #error "unknown architecture"
 #endif
@@ -34,10 +37,10 @@ unsigned  numberOfStubsForSection( ObjectCode *oc, unsigned sectionIndex);
 #define STUB_SIZE          ADD_SUFFIX(stubSize)
 
 bool findStub(Section * section, void* * addr, uint8_t flags);
-bool makeStub(Section * section, void* * addr, uint8_t flags);
+bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags);
 
 void freeStubs(Section * section);
 
 #endif // OBJECTFORMAT_ELF
 
-#endif // arm/aarch64_HOST_ARCH
+#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH


=====================================
rts/linker/elf_plt_riscv64.c
=====================================
@@ -0,0 +1,90 @@
+#include "Rts.h"
+#include "elf_compat.h"
+#include "elf_plt_riscv64.h"
+#include "rts/Messages.h"
+#include "linker/ElfTypes.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+const size_t instSizeRISCV64 = 4;
+const size_t stubSizeRISCV64 = 3 * instSizeRISCV64;
+
+bool needStubForRelRISCV64(Elf_Rel *rel) {
+  switch (ELF64_R_TYPE(rel->r_info)) {
+  case R_RISCV_CALL:
+  case R_RISCV_CALL_PLT:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool needStubForRelaRISCV64(Elf_Rela *rela) {
+  switch (ELF64_R_TYPE(rela->r_info)) {
+  case R_RISCV_CALL:
+  case R_RISCV_CALL_PLT:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// After the global offset table (GOT) has been set up, we can use these three
+// instructions to jump to the target address / function:
+//
+//  1. AUIPC ip, %pcrel_hi(addr)
+//  2. LD ip, %pcrel_lo(addr)(ip)
+//  3. JARL x0, ip, 0
+//
+// We could use the absolute address of the target (because we know it), but
+// that would require loading a 64-bit constant which is a nightmare to do in
+// riscv64 assembly. (See
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table)
+//
+// So far, PC-relative addressing seems to be good enough. If it ever turns out
+// to be not, one could (additionally for out-of-range cases?) encode absolute
+// addressing here.
+bool makeStubRISCV64(Stub *s) {
+  uint32_t *P = (uint32_t *)s->addr;
+  int32_t addr = (uint64_t)s->got_addr - (uint64_t)P;
+
+  uint64_t hi = (addr + 0x800) >> 12;
+  uint64_t lo = addr - (hi << 12);
+
+  IF_DEBUG(
+      linker,
+      debugBelch(
+          "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x "
+          ", hi = 0x%lx, lo = 0x%lx\n",
+          P, s->got_addr, s->target, addr, hi, lo));
+
+  // AUIPC ip, %pcrel_hi(addr)
+  uint32_t auipcInst = 0b0010111; // opcode
+  auipcInst |= 0x1f << 7;         // rd = ip (x31)
+  auipcInst |= hi << 12;          // imm[31:12]
+
+  // LD ip, %pcrel_lo(addr)(ip)
+  uint32_t ldInst = 0b0000011; // opcode
+  ldInst |= 0x1f << 7;         // rd = ip (x31)
+  ldInst |= 0x1f << 15;        // rs = ip (x31)
+  ldInst |= 0b11 << 12;        // funct3 = 0x3 (LD)
+  ldInst |= lo << 20;          // imm[11:0]
+
+  // JARL x0, ip, 0
+  uint32_t jalrInst = 0b1100111; // opcode
+  jalrInst |= 0x1f << 15;        // rs = ip (x31)
+
+  P[0] = auipcInst;
+  P[1] = ldInst;
+  P[2] = jalrInst;
+
+  return EXIT_SUCCESS;
+}
+
+#endif
+#endif


=====================================
rts/linker/elf_plt_riscv64.h
=====================================
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+extern const size_t stubSizeRISCV64;
+bool needStubForRelRISCV64(Elf_Rel * rel);
+bool needStubForRelaRISCV64(Elf_Rela * rel);
+bool makeStubRISCV64(Stub * s);
+
+#endif


=====================================
rts/linker/elf_reloc.c
=====================================
@@ -4,13 +4,18 @@
 
 #if defined(OBJFORMAT_ELF)
 
-/* we currently only use this abstraction for elf/aarch64 */
-#if defined(aarch64_HOST_ARCH)
+/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */
+#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH)
 
 bool
 relocateObjectCode(ObjectCode * oc) {
     return ADD_SUFFIX(relocateObjectCode)(oc);
 }
+
+
+void flushInstructionCache(ObjectCode * oc){
+   return ADD_SUFFIX(flushInstructionCache)(oc);
+}
 #endif
 
 #endif


=====================================
rts/linker/elf_reloc.h
=====================================
@@ -5,9 +5,10 @@
 #if defined(OBJFORMAT_ELF)
 
 #include "elf_reloc_aarch64.h"
+#include "elf_reloc_riscv64.h"
 
 bool
 relocateObjectCode(ObjectCode * oc);
 
-
+void flushInstructionCache(ObjectCode *oc);
 #endif /* OBJETFORMAT_ELF */


=====================================
rts/linker/elf_reloc_aarch64.c
=====================================
@@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel * rel,
                 /* check if we already have that stub */
                 if(findStub(section, (void**)&S, 0)) {
                     /* did not find it. Crete a new stub. */
-                    if(makeStub(section, (void**)&S, 0)) {
+                    if(makeStub(section, (void**)&S, NULL, 0)) {
                         abort(/* could not find or make stub */);
                     }
                 }
@@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * oc) {
     return EXIT_SUCCESS;
 }
 
+void flushInstructionCacheAarch64(ObjectCode * oc STG_UNUSED) {
+  // Looks like we don't need this on Aarch64.
+  /* no-op */
+}
+
 #endif /* OBJECTFORMAT_ELF */
 #endif /* aarch64_HOST_ARCH */


=====================================
rts/linker/elf_reloc_aarch64.h
=====================================
@@ -7,4 +7,5 @@
 bool
 relocateObjectCodeAarch64(ObjectCode * oc);
 
+void flushInstructionCacheAarch64(ObjectCode *oc);
 #endif /* OBJETFORMAT_ELF */


=====================================
rts/linker/elf_reloc_riscv64.c
=====================================
@@ -0,0 +1,693 @@
+#include "elf_reloc_riscv64.h"
+#include "LinkerInternals.h"
+#include "Rts.h"
+#include "Stg.h"
+#include "SymbolExtras.h"
+#include "linker/ElfTypes.h"
+#include "elf_plt.h"
+#include "elf_util.h"
+#include "rts/Messages.h"
+#include "util.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+typedef uint64_t addr_t;
+
+/* regular instructions are 32bit */
+typedef uint32_t inst_t;
+
+/* compressed instructions are 16bit */
+typedef uint16_t cinst_t;
+
+// TODO: These instances could be static. They are not yet, because we might
+// need their debugging symbols.
+char *relocationTypeToString(Elf64_Xword type);
+int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel);
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend);
+void write8le(uint8_t *p, uint8_t v);
+uint8_t read8le(const uint8_t *P);
+void write16le(cinst_t *p, uint16_t v);
+uint16_t read16le(const cinst_t *P);
+uint32_t read32le(const inst_t *P);
+void write32le(inst_t *p, uint32_t v);
+uint64_t read64le(const uint64_t *P);
+void write64le(uint64_t *p, uint64_t v);
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end);
+void setCJType(cinst_t *loc, uint32_t val);
+void setCBType(cinst_t *loc, uint32_t val);
+void setBType(inst_t *loc, uint32_t val);
+void setSType(inst_t *loc, uint32_t val);
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+                      int64_t addend, ObjectCode *oc);
+void setJType(inst_t *loc, uint32_t val);
+void setIType(inst_t *loc, int32_t val);
+void checkInt(inst_t *loc, int32_t v, int n);
+void setUType(inst_t *loc, int32_t val);
+
+
+char *relocationTypeToString(Elf64_Xword type) {
+  switch (ELF64_R_TYPE(type)) {
+  case R_RISCV_NONE:
+    return "R_RISCV_NONE";
+  case R_RISCV_32:
+    return "R_RISCV_32";
+  case R_RISCV_64:
+    return "R_RISCV_64";
+  case R_RISCV_RELATIVE:
+    return "R_RISCV_RELATIVE";
+  case R_RISCV_COPY:
+    return "R_RISCV_COPY";
+  case R_RISCV_JUMP_SLOT:
+    return "R_RISCV_JUMP_SLOT";
+  case R_RISCV_TLS_DTPMOD32:
+    return "R_RISCV_TLS_DTPMOD32";
+  case R_RISCV_TLS_DTPMOD64:
+    return "R_RISCV_TLS_DTPMOD64";
+  case R_RISCV_TLS_DTPREL32:
+    return "R_RISCV_TLS_DTPREL32";
+  case R_RISCV_TLS_DTPREL64:
+    return "R_RISCV_TLS_DTPREL64";
+  case R_RISCV_TLS_TPREL32:
+    return "R_RISCV_TLS_TPREL32";
+  case R_RISCV_TLS_TPREL64:
+    return "R_RISCV_TLS_TPREL64";
+  case R_RISCV_BRANCH:
+    return "R_RISCV_BRANCH";
+  case R_RISCV_JAL:
+    return "R_RISCV_JAL";
+  case R_RISCV_CALL:
+    return "R_RISCV_CALL";
+  case R_RISCV_CALL_PLT:
+    return "R_RISCV_CALL_PLT";
+  case R_RISCV_GOT_HI20:
+    return "R_RISCV_GOT_HI20";
+  case R_RISCV_PCREL_HI20:
+    return "R_RISCV_PCREL_HI20";
+  case R_RISCV_LO12_I:
+    return "R_RISCV_LO12_I";
+  case R_RISCV_PCREL_LO12_I:
+    return "R_RISCV_PCREL_LO12_I";
+  case R_RISCV_HI20:
+    return "R_RISCV_HI20";
+  case R_RISCV_LO12_S:
+    return "R_RISCV_LO12_S";
+  case R_RISCV_PCREL_LO12_S:
+    return "R_RISCV_PCREL_LO12_S";
+  case R_RISCV_RELAX:
+    return "R_RISCV_RELAX";
+  case R_RISCV_RVC_BRANCH:
+    return "R_RISCV_RVC_BRANCH";
+  case R_RISCV_RVC_JUMP:
+    return "R_RISCV_RVC_JUMP";
+  default:
+    return "Unknown relocation type";
+  }
+}
+
+STG_NORETURN
+int32_t decodeAddendRISCV64(Section *section STG_UNUSED,
+                            Elf_Rel *rel STG_UNUSED) {
+  barf("decodeAddendRISCV64: Relocations with explicit addend are not supported."
+       " Please open a ticket; providing the causing code/binary.");
+}
+
+// Make sure that V can be represented as an N bit signed integer.
+void checkInt(inst_t *loc, int32_t v, int n) {
+  if (v != signExtend32(v, n)) {
+    barf("Relocation at 0x%x is out of range. value: 0x%x (%d), "
+               "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n",
+               *loc, v, v, signExtend32(v, n), signExtend32(v, n), n, n);
+  }
+}
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write8le(uint8_t *p, uint8_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint8_t read8le(const uint8_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write16le(cinst_t *p, uint16_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint16_t read16le(const cinst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint32_t read32le(const inst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write32le(inst_t *p, uint32_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint64_t read64le(const uint64_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write64le(uint64_t *p, uint64_t v) { *p = v; }
+
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
+  return (v & ((1ULL << (begin + 1)) - 1)) >> end;
+}
+
+// Set immediate val in the instruction at *loc. In U-type instructions the
+// upper 20bits carry the upper 20bits of the immediate.
+void setUType(inst_t *loc, int32_t val) {
+  const unsigned bits = 32;
+  uint32_t hi = val + 0x800;
+  checkInt(loc, signExtend32(hi, bits) >> 12, 20);
+  IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val));
+
+  uint32_t imm = hi & 0xFFFFF000;
+  write32le(loc, (read32le(loc) & 0xFFF) | imm);
+}
+
+// Set immediate val in the instruction at *loc. In I-type instructions the
+// upper 12bits carry the lower 12bit of the immediate.
+void setIType(inst_t *loc, int32_t val) {
+  uint64_t hi = (val + 0x800) >> 12;
+  uint64_t lo = val - (hi << 12);
+
+  IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo));
+  IF_DEBUG(linker, debugBelch("setIType: loc %p  *loc 0x%x  val 0x%x\n", loc,
+                              *loc, val));
+
+  uint32_t imm = lo & 0xfff;
+  uint32_t instr = (read32le(loc) & 0xfffff) | (imm << 20);
+
+  IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", instr));
+  write32le(loc, instr);
+  IF_DEBUG(linker, debugBelch("setIType: loc %p  *loc' 0x%x  val 0x%x\n", loc,
+                              *loc, val));
+}
+
+// Set immediate val in the instruction at *loc. In S-type instructions the
+// lower 12 bits of the immediate are at bits 7 to 11 ([0:4]) and 25 to 31
+// ([5:11]).
+void setSType(inst_t *loc, uint32_t val) {
+  uint64_t hi = (val + 0x800) >> 12;
+  uint64_t lo = val - (hi << 12);
+
+  uint32_t imm = lo;
+  uint32_t instr = (read32le(loc) & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
+         (extractBits(imm, 4, 0) << 7);
+
+  write32le(loc, instr);
+}
+
+// Set immediate val in the instruction at *loc. In J-type instructions the
+// immediate has 20bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 20
+// [30:21] -> [10:1]
+// 20 -> 11
+// [19:12] -> [19:12]
+//
+// N.B. bit 0 of the immediate is missing!
+void setJType(inst_t *loc, uint32_t val) {
+  checkInt(loc, val, 21);
+
+  uint32_t insn = read32le(loc) & 0xFFF;
+  uint32_t imm20 = extractBits(val, 20, 20) << 31;
+  uint32_t imm10_1 = extractBits(val, 10, 1) << 21;
+  uint32_t imm11 = extractBits(val, 11, 11) << 20;
+  uint32_t imm19_12 = extractBits(val, 19, 12) << 12;
+  insn |= imm20 | imm10_1 | imm11 | imm19_12;
+
+  write32le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. In B-type instructions the
+// immediate has 12bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 12
+// [30:25] -> [10:5]
+// [11:8] -> [4:1]
+// 7 -> 11
+//
+// N.B. bit 0 of the immediate is missing!
+void setBType(inst_t *loc, uint32_t val) {
+  checkInt(loc, val, 13);
+
+  uint32_t insn = read32le(loc) & 0x1FFF07F;
+  uint32_t imm12 = extractBits(val, 12, 12) << 31;
+  uint32_t imm10_5 = extractBits(val, 10, 5) << 25;
+  uint32_t imm4_1 = extractBits(val, 4, 1) << 8;
+  uint32_t imm11 = extractBits(val, 11, 11) << 7;
+  insn |= imm12 | imm10_5 | imm4_1 | imm11;
+
+  write32le(loc, insn);
+}
+
+
+// Set immediate val in the instruction at *loc. CB-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 8bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 8
+// [11:10] -> [4:3]
+// [6:5] -> [7:6]
+// [4:3] -> [2:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCBType(cinst_t *loc, uint32_t val) {
+  checkInt((inst_t *)loc, val, 9);
+  uint16_t insn = read16le(loc) & 0xE383;
+  uint16_t imm8 = extractBits(val, 8, 8) << 12;
+  uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
+  uint16_t imm7_6 = extractBits(val, 7, 6) << 5;
+  uint16_t imm2_1 = extractBits(val, 2, 1) << 3;
+  uint16_t imm5 = extractBits(val, 5, 5) << 2;
+  insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
+
+  write16le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. CJ-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 11bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 11
+// 11 -> 4
+// [10:9] ->[9:8]
+// 8 -> 10
+// 7 -> 6
+// 6 -> 7
+// [5:3] -> [3:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCJType(cinst_t *loc, uint32_t val) {
+  checkInt((inst_t *)loc, val, 12);
+  uint16_t insn = read16le(loc) & 0xE003;
+  uint16_t imm11 = extractBits(val, 11, 11) << 12;
+  uint16_t imm4 = extractBits(val, 4, 4) << 11;
+  uint16_t imm9_8 = extractBits(val, 9, 8) << 9;
+  uint16_t imm10 = extractBits(val, 10, 10) << 8;
+  uint16_t imm6 = extractBits(val, 6, 6) << 7;
+  uint16_t imm7 = extractBits(val, 7, 7) << 6;
+  uint16_t imm3_1 = extractBits(val, 3, 1) << 3;
+  uint16_t imm5 = extractBits(val, 5, 5) << 2;
+  insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
+
+  write16le(loc, insn);
+}
+
+// Encode the addend according to the relocaction into the instruction.
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) {
+  // instruction to rewrite (P: Position of the relocation)
+  addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+  IF_DEBUG(linker,
+           debugBelch(
+               "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / "
+               "%d) P 0x%lx\n",
+               relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info),
+               ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend,
+               addend, addend, P));
+  switch (ELF64_R_TYPE(rel->r_info)) {
+  case R_RISCV_32_PCREL:
+  case R_RISCV_32:
+    write32le((inst_t *)P, addend);
+    break;
+  case R_RISCV_64:
+    write64le((uint64_t *)P, addend);
+    break;
+  case R_RISCV_GOT_HI20:
+  case R_RISCV_PCREL_HI20:
+  case R_RISCV_HI20: {
+    setUType((inst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_PCREL_LO12_I:
+  case R_RISCV_LO12_I: {
+    setIType((inst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_RVC_JUMP: {
+    setCJType((cinst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_RVC_BRANCH: {
+    setCBType((cinst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_BRANCH: {
+    setBType((inst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_CALL:
+  case R_RISCV_CALL_PLT: {
+    // We could relax more (in some cases) but right now most important is to
+    // make it work.
+    setUType((inst_t *)P, addend);
+    setIType(((inst_t *)P) + 1, addend);
+    break;
+  }
+  case R_RISCV_JAL: {
+    setJType((inst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_ADD8:
+    write8le((uint8_t *)P, read8le((uint8_t *)P) + addend);
+    break;
+  case R_RISCV_ADD16:
+    write16le((cinst_t *)P, read16le((cinst_t *)P) + addend);
+    break;
+  case R_RISCV_ADD32:
+    write32le((inst_t *)P, read32le((inst_t *)P) + addend);
+    break;
+  case R_RISCV_ADD64:
+    write64le((uint64_t *)P, read64le((uint64_t *)P) + addend);
+    break;
+  case R_RISCV_SUB6: {
+    uint8_t keep = *((uint8_t *)P) & 0xc0;
+    uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f;
+
+    write8le((uint8_t *)P, keep | imm);
+    break;
+  }
+  case R_RISCV_SUB8:
+    write8le((uint8_t *)P, read8le((uint8_t *)P) - addend);
+    break;
+  case R_RISCV_SUB16:
+    write16le((cinst_t *)P, read16le((cinst_t *)P) - addend);
+    break;
+  case R_RISCV_SUB32:
+    write32le((inst_t *)P, read32le((inst_t *)P) - addend);
+    break;
+  case R_RISCV_SUB64:
+    write64le((uint64_t *)P, read64le((uint64_t *)P) - addend);
+    break;
+  case R_RISCV_SET6: {
+    uint8_t keep = *((uint8_t *)P) & 0xc0;
+    uint8_t imm = (addend & 0x3f) & 0x3f;
+
+    write8le((uint8_t *)P, keep | imm);
+    break;
+  }
+  case R_RISCV_SET8:
+    write8le((uint8_t *)P, addend);
+    break;
+  case R_RISCV_SET16:
+    write16le((cinst_t *)P, addend);
+    break;
+  case R_RISCV_SET32:
+    write32le((inst_t *)P, addend);
+    break;
+  case R_RISCV_PCREL_LO12_S:
+  case R_RISCV_TPREL_LO12_S:
+  case R_RISCV_LO12_S: {
+    setSType((inst_t *)P, addend);
+    break;
+  }
+  case R_RISCV_RELAX:
+  case R_RISCV_ALIGN:
+    // Implementing relaxations (rewriting instructions to more efficient ones)
+    // could be implemented in future. As the code already is aligned and we do
+    // not change the instruction sizes, we should get away with not aligning
+    // (though, that is cheating.) To align or change the instruction count, we
+    // would need machinery to squeeze or extend memory at the current location.
+    break;
+  default:
+    barf("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info));
+  }
+  return EXIT_SUCCESS;
+}
+
+/**
+ * Compute the *new* addend for a relocation, given a pre-existing addend.
+ * @param section The section the relocation is in.
+ * @param rel     The Relocation struct.
+ * @param symbol  The target symbol.
+ * @param addend  The existing addend. Either explicit or implicit.
+ * @return The new computed addend.
+ */
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+                      int64_t addend, ObjectCode *oc) {
+  Section * section = &oc->sections[relaTab->targetSectionIndex];
+
+  // instruction to rewrite (P: Position of the relocation)
+  addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+
+  CHECK(0x0 != P);
+  CHECK((uint64_t)section->start <= P);
+  CHECK(P <= (uint64_t)section->start + section->size);
+  // S: Value of the symbol in the symbol table
+  addr_t S = (addr_t)symbol->addr;
+  /* GOT slot for the symbol (G + GOT) */
+  addr_t GOT_S = (addr_t)symbol->got_addr;
+
+  // A: Addend field in the relocation entry associated with the symbol
+  int64_t A = addend;
+
+  IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n",
+                              relocationTypeToString(rel->r_info), P, S,
+                              symbol->name, GOT_S, A, relNo));
+  switch (ELF64_R_TYPE(rel->r_info)) {
+  case R_RISCV_32:
+    return S + A;
+  case R_RISCV_64:
+    return S + A;
+  case R_RISCV_HI20:
+    return S + A;
+  case R_RISCV_JUMP_SLOT:
+    return S;
+  case R_RISCV_JAL:
+    return S + A - P;
+  case R_RISCV_PCREL_HI20:
+    return S + A - P;
+  case R_RISCV_LO12_I:
+    return S + A;
+    // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}),
+    // the symbol actually points the corresponding R_RISCV_PCREL_HI20
+    // relocation, and the target VA is calculated using PCREL_HI20's symbol.
+  case R_RISCV_PCREL_LO12_S:
+    FALLTHROUGH;
+  case R_RISCV_PCREL_LO12_I: {
+    // Lookup related HI20 relocation and use that value. I'm still confused why
+    // relocations aren't self-contained, but this is how LLVM does it. And,
+    // calculating the lower 12 bit without any relationship to the GOT entry's
+    // address makes no sense either.
+      for (int64_t i = relNo; i >= 0 ; i--) {
+        Elf_Rela *rel_prime = &relaTab->relocations[i];
+
+        addr_t P_prime =
+            (addr_t)((uint8_t *)section->start + rel_prime->r_offset);
+
+        if (P_prime != S) {
+          // S points to the P of the corresponding *_HI20 relocation.
+          continue;
+        }
+
+        ElfSymbol *symbol_prime =
+            findSymbol(oc, relaTab->sectionHeader->sh_link,
+                       ELF64_R_SYM((Elf64_Xword)rel_prime->r_info));
+
+        CHECK(0x0 != symbol_prime);
+
+        /* take explicit addend */
+        int64_t addend_prime = rel_prime->r_addend;
+
+        uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info);
+
+        if (type_prime == R_RISCV_PCREL_HI20 ||
+            type_prime == R_RISCV_GOT_HI20 ||
+            type_prime == R_RISCV_TLS_GD_HI20 ||
+            type_prime == R_RISCV_TLS_GOT_HI20) {
+          IF_DEBUG(linker,
+                   debugBelch(
+                       "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, "
+                       "sym-name: %s) -> %s (P: 0x%lx, S: %p, sym-name: %s, relNo: %ld)",
+                       relocationTypeToString(rel->r_info), P, S, symbol->name,
+                       relocationTypeToString(rel_prime->r_info), P_prime,
+                       symbol_prime->addr, symbol_prime->name, i));
+          int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime,
+                                         symbol_prime, addend_prime, oc);
+          IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n",
+                                      result, result));
+          return result;
+        }
+    }
+    debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n",
+               relocationTypeToString(rel->r_info), P, S, symbol->name);
+    abort();
+  }
+
+  case R_RISCV_RVC_JUMP:
+    return S + A - P;
+  case R_RISCV_RVC_BRANCH:
+    return S + A - P;
+  case R_RISCV_BRANCH:
+    return S + A - P;
+  case R_RISCV_CALL:
+  case R_RISCV_CALL_PLT: {
+    addr_t GOT_Target;
+    if (GOT_S != 0) {
+      // 1. Public symbol with GOT entry.
+      GOT_Target = GOT_S;
+    } else {
+      // 2. Fake GOT entry with symbol extra entry.
+      SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+      addr_t* FAKE_GOT_S = &symbolExtra->addr;
+      IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , "
+                                  "entry = %p\n",
+                                  symbolExtra, FAKE_GOT_S));
+      GOT_Target = (addr_t) FAKE_GOT_S;
+    }
+
+    if (findStub(section, (void **)&S, 0)) {
+      /* did not find it. Crete a new stub. */
+      if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) {
+        abort(/* could not find or make stub */);
+      }
+    }
+    IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = "
+                                "0x%lx (S + A) - P = 0x%lx \n",
+                                S, A, P, (S + A) - P));
+    return (S + A) - P;
+  }
+  case R_RISCV_ADD8:
+    FALLTHROUGH;
+  case R_RISCV_ADD16:
+    FALLTHROUGH;
+  case R_RISCV_ADD32:
+    FALLTHROUGH;
+  case R_RISCV_ADD64:
+    return S + A; // Add V when the value is set
+  case R_RISCV_SUB6:
+    FALLTHROUGH;
+  case R_RISCV_SUB8:
+    FALLTHROUGH;
+  case R_RISCV_SUB16:
+    FALLTHROUGH;
+  case R_RISCV_SUB32:
+    FALLTHROUGH;
+  case R_RISCV_SUB64:
+    return S + A; // Subtract from V when value is set
+  case R_RISCV_SET6:
+    FALLTHROUGH;
+  case R_RISCV_SET8:
+    FALLTHROUGH;
+  case R_RISCV_SET16:
+    FALLTHROUGH;
+  case R_RISCV_SET32:
+    return S + A;
+  case R_RISCV_RELAX:
+    // This "relocation" has no addend.
+    FALLTHROUGH;
+  case R_RISCV_ALIGN:
+    // I guess we don't need to implement this relaxation. Otherwise, this
+    // should return the number of blank bytes to insert via NOPs.
+    return 0;
+  case R_RISCV_32_PCREL:
+    return S + A - P;
+  case R_RISCV_GOT_HI20: {
+    // TODO: Allocating extra memory for every symbol just to play this trick
+    // seems to be a bit obscene. (GOT relocations hitting local symbols
+    // happens, but not very often.) It would be better to allocate only what we
+    // really need.
+
+    // There are two cases here: 1. The symbol is public and has an entry in the
+    // GOT. 2. It's local and has no corresponding GOT entry. The first case is
+    // easy: We simply calculate the addend with the GOT address. In the second
+    // case we create a symbol extra entry and pretend it's the GOT.
+    if (GOT_S != 0) {
+      // 1. Public symbol with GOT entry.
+      return GOT_S + A - P;
+    } else {
+      // 2. Fake GOT entry with symbol extra entry.
+      SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+      addr_t* FAKE_GOT_S = &symbolExtra->addr;
+      addr_t res = (addr_t) FAKE_GOT_S + A - P;
+      IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , "
+                                  "entry = %p , reloc-addend = 0x%lu ",
+                                  symbolExtra, FAKE_GOT_S, res));
+      return res;
+    }
+  }
+  default:
+    barf("Unimplemented relocation: 0x%lx\n (%lu)",
+               ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info));
+  }
+  barf("This should never happen!");
+}
+
+// Iterate over all relocations and perform them.
+bool relocateObjectCodeRISCV64(ObjectCode *oc) {
+  for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL;
+       relTab = relTab->next) {
+    /* only relocate interesting sections */
+    if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind)
+      continue;
+
+    Section *targetSection = &oc->sections[relTab->targetSectionIndex];
+
+    for (unsigned i = 0; i < relTab->n_relocations; i++) {
+      Elf_Rel *rel = &relTab->relocations[i];
+
+      ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link,
+                                     ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+      CHECK(0x0 != symbol);
+
+      // This always fails, because we don't support Rel locations, yet: Do we
+      // need this case? Leaving it in to spot the potential bug when it
+      // appears.
+      /* decode implicit addend */
+      int64_t addend = decodeAddendRISCV64(targetSection, rel);
+
+      addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc);
+      encodeAddendRISCV64(targetSection, rel, addend);
+    }
+  }
+  for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL;
+       relaTab = relaTab->next) {
+    /* only relocate interesting sections */
+    if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind)
+      continue;
+
+    Section *targetSection = &oc->sections[relaTab->targetSectionIndex];
+
+    for (unsigned i = 0; i < relaTab->n_relocations; i++) {
+
+      Elf_Rela *rel = &relaTab->relocations[i];
+
+      ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link,
+                                     ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+      CHECK(0x0 != symbol);
+
+      /* take explicit addend */
+      int64_t addend = rel->r_addend;
+
+      addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc);
+      encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend);
+    }
+  }
+  return EXIT_SUCCESS;
+}
+
+void flushInstructionCacheRISCV64(ObjectCode *oc) {
+  // Synchronize the memory and instruction cache to prevent illegal instruction
+  // exceptions. On Linux the parameters of __builtin___clear_cache are
+  // currently unused. Add them anyways for future compatibility. (I.e. the
+  // parameters couldn't be checked during development.)
+
+  /* The main object code */
+  void *codeBegin = oc->image + oc->misalignment;
+  __builtin___clear_cache(codeBegin, (void*) ((uint64_t*) codeBegin + oc->fileSize));
+
+  /* Jump Islands */
+  __builtin___clear_cache((void *)oc->symbol_extras,
+                          (void *)(oc->symbol_extras + oc->n_symbol_extras));
+
+  // Memory barrier to ensure nothing circumvents the fence.i / cache flushes.
+  SEQ_CST_FENCE();
+}
+
+#endif /* OBJECTFORMAT_ELF */
+#endif /* riscv64_HOST_ARCH */


=====================================
rts/linker/elf_reloc_riscv64.h
=====================================
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+bool
+relocateObjectCodeRISCV64(ObjectCode * oc);
+
+void flushInstructionCacheRISCV64(ObjectCode *oc);
+#endif /* OBJETFORMAT_ELF */


=====================================
rts/rts.cabal
=====================================
@@ -468,9 +468,11 @@ library
                  linker/elf_got.c
                  linker/elf_plt.c
                  linker/elf_plt_aarch64.c
+                 linker/elf_plt_riscv64.c
                  linker/elf_plt_arm.c
                  linker/elf_reloc.c
                  linker/elf_reloc_aarch64.c
+                 linker/elf_reloc_riscv64.c
                  linker/elf_tlsgd.c
                  linker/elf_util.c
                  sm/BlockAlloc.c


=====================================
testsuite/tests/codeGen/should_run/CCallConv.hs
=====================================
@@ -0,0 +1,123 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+-- | This test ensures that sub-word signed and unsigned parameters are correctly
+-- handed over to C functions. I.e. it asserts the calling-convention.
+--
+-- The number of parameters is currently shaped for the RISCV64 calling-convention.
+-- You may need to add more parameters to the C functions in case there are more
+-- registers reserved for parameters in your architecture.
+module Main where
+
+import Data.Word
+import GHC.Exts
+import GHC.Int
+
+foreign import ccall "fun8"
+  fun8 ::
+    Int8# -> -- a0
+    Word8# -> -- a1
+    Int8# -> -- a2
+    Int8# -> -- a3
+    Int8# -> -- a4
+    Int8# -> -- a5
+    Int8# -> -- a6
+    Int8# -> -- a7
+    Word8# -> -- s0
+    Int8# -> -- s1
+    Int64# -- result
+
+foreign import ccall "fun16"
+  fun16 ::
+    Int16# -> -- a0
+    Word16# -> -- a1
+    Int16# -> -- a2
+    Int16# -> -- a3
+    Int16# -> -- a4
+    Int16# -> -- a5
+    Int16# -> -- a6
+    Int16# -> -- a7
+    Word16# -> -- s0
+    Int16# -> -- s1
+    Int64# -- result
+
+foreign import ccall "fun32"
+  fun32 ::
+    Int32# -> -- a0
+    Word32# -> -- a1
+    Int32# -> -- a2
+    Int32# -> -- a3
+    Int32# -> -- a4
+    Int32# -> -- a5
+    Int32# -> -- a6
+    Int32# -> -- a7
+    Word32# -> -- s0
+    Int32# -> -- s1
+    Int64# -- result
+
+foreign import ccall "funFloat"
+  funFloat ::
+    Float# -> -- a0
+    Float# -> -- a1
+    Float# -> -- a2
+    Float# -> -- a3
+    Float# -> -- a4
+    Float# -> -- a5
+    Float# -> -- a6
+    Float# -> -- a7
+    Float# -> -- s0
+    Float# -> -- s1
+    Float# -- result
+
+foreign import ccall "funDouble"
+  funDouble ::
+    Double# -> -- a0
+    Double# -> -- a1
+    Double# -> -- a2
+    Double# -> -- a3
+    Double# -> -- a4
+    Double# -> -- a5
+    Double# -> -- a6
+    Double# -> -- a7
+    Double# -> -- s0
+    Double# -> -- s1
+    Double# -- result
+
+main :: IO ()
+main =
+  -- N.B. the values here aren't choosen by accident: -1 means all bits one in
+  -- twos-complement, which is the same as the max word value.
+  let i8 :: Int8# = intToInt8# (-1#)
+      w8 :: Word8# = wordToWord8# (255##)
+      res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8
+      expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1)
+      i16 :: Int16# = intToInt16# (-1#)
+      w16 :: Word16# = wordToWord16# (65535##)
+      res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16
+      expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1)
+      i32 :: Int32# = intToInt32# (-1#)
+      w32 :: Word32# = wordToWord32# (4294967295##)
+      res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32
+      expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1)
+      resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#)
+      resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##)
+   in do
+        print $ "fun8 result:" ++ show (I64# res8)
+        assertEqual expected_res8 (I64# res8)
+        print $ "fun16 result:" ++ show (I64# res16)
+        assertEqual expected_res16 (I64# res16)
+        print $ "fun32 result:" ++ show (I64# res32)
+        assertEqual expected_res32 (I64# res32)
+        print $ "funFloat result:" ++ show resFloat
+        assertEqual (14.5 :: Float) resFloat
+        print $ "funDouble result:" ++ show resDouble
+        assertEqual (14.5 :: Double) resDouble
+
+assertEqual :: (Eq a, Show a) => a -> a -> IO ()
+assertEqual a b =
+  if a == b
+    then pure ()
+    else error $ show a ++ " =/= " ++ show b


=====================================
testsuite/tests/codeGen/should_run/CCallConv.stdout
=====================================
@@ -0,0 +1,60 @@
+"fun8 result:502"
+"fun16 result:131062"
+"fun32 result:8589934582"
+"funFloat result:14.5"
+"funDouble result:14.5"
+fun32:
+a0: 0xffffffff -1
+a1: 0xffffffff 4294967295
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffffffff 4294967295
+fun16:
+a0: 0xffffffff -1
+a1: 0xffff 65535
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffff 65535
+fun8:
+a0: 0xffffffff -1
+a1: 0xff 255
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xff 255
+funFloat:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000
+funDouble:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000


=====================================
testsuite/tests/codeGen/should_run/CCallConv_c.c
=====================================
@@ -0,0 +1,91 @@
+#include "stdint.h"
+#include "stdio.h"
+
+int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5,
+             int8_t a6, int8_t a7, int8_t s0, uint8_t s1) {
+  printf("fun8:\n");
+  printf("a0: %#x %hhd\n", a0, a0);
+  printf("a1: %#x %hhu\n", a1, a1);
+  printf("a2: %#x %hhd\n", a2, a2);
+  printf("a3: %#x %hhd\n", a3, a3);
+  printf("a4: %#x %hhd\n", a4, a4);
+  printf("a5: %#x %hhd\n", a5, a5);
+  printf("a6: %#x %hhd\n", a6, a6);
+  printf("a7: %#x %hhd\n", a7, a7);
+  printf("s0: %#x %hhd\n", s0, s0);
+  printf("s1: %#x %hhu\n", s1, s1);
+
+  return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4,
+              int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) {
+  printf("fun16:\n");
+  printf("a0: %#x %hd\n", a0, a0);
+  printf("a1: %#x %hu\n", a1, a1);
+  printf("a2: %#x %hd\n", a2, a2);
+  printf("a3: %#x %hd\n", a3, a3);
+  printf("a4: %#x %hd\n", a4, a4);
+  printf("a5: %#x %hd\n", a5, a5);
+  printf("a6: %#x %hd\n", a6, a6);
+  printf("a7: %#x %hd\n", a7, a7);
+  printf("s0: %#x %hd\n", s0, s0);
+  printf("s1: %#x %hu\n", s1, s1);
+
+  return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4,
+              int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) {
+  printf("fun32:\n");
+  printf("a0: %#x %d\n", a0, a0);
+  printf("a1: %#x %u\n", a1, a1);
+  printf("a2: %#x %d\n", a2, a2);
+  printf("a3: %#x %d\n", a3, a3);
+  printf("a4: %#x %d\n", a4, a4);
+  printf("a5: %#x %d\n", a5, a5);
+  printf("a6: %#x %d\n", a6, a6);
+  printf("a7: %#x %d\n", a7, a7);
+  printf("s0: %#x %d\n", s0, s0);
+  printf("s1: %#x %u\n", s1, s1);
+
+  // Ensure the addition happens in long int (not just int) precission.
+  // Otherwise, the result is truncated during the operation.
+  int64_t force_int64_precission = 0;
+  return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 +
+         s1;
+}
+
+float funFloat(float a0, float a1, float a2, float a3, float a4, float a5,
+             float a6, float a7, float s0, float s1) {
+  printf("funFloat:\n");
+  printf("a0: %f\n", a0);
+  printf("a1: %f\n", a1);
+  printf("a2: %f\n", a2);
+  printf("a3: %f\n", a3);
+  printf("a4: %f\n", a4);
+  printf("a5: %f\n", a5);
+  printf("a6: %f\n", a6);
+  printf("a7: %f\n", a7);
+  printf("s0: %f\n", s0);
+  printf("s1: %f\n", s1);
+
+  return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+double funDouble(double a0, double a1, double a2, double a3, double a4, double a5,
+             double a6, double a7, double s0, double s1) {
+  printf("funDouble:\n");
+  printf("a0: %f\n", a0);
+  printf("a1: %f\n", a1);
+  printf("a2: %f\n", a2);
+  printf("a3: %f\n", a3);
+  printf("a4: %f\n", a4);
+  printf("a5: %f\n", a5);
+  printf("a6: %f\n", a6);
+  printf("a7: %f\n", a7);
+  printf("s0: %f\n", s0);
+  printf("s1: %f\n", s1);
+
+  return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -231,10 +231,10 @@ test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info'])
 # not implemeted correctly (according to
 # Note [MO_S_MulMayOflo significant width]) and may require fixing/adjustment.
 test('MulMayOflo_full',
-     [ extra_files(['MulMayOflo.hs']),
+     [ extra_files(['MulMayOflo.hs', 'MulMayOflo_full.cmm']),
        when(unregisterised(), skip),
        unless(
-         arch('aarch64') or arch('x86_64') or arch('i386'),
+         arch('aarch64') or arch('x86_64') or arch('i386') or arch('riscv64'),
          expect_broken(23742)
        ),
         ignore_stdout],
@@ -243,3 +243,8 @@ test('MulMayOflo_full',
 test('T24264run', normal, compile_and_run, [''])
 test('T24295a', normal, compile_and_run, ['-O -floopification'])
 test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])
+
+test('CCallConv',
+     [],
+     multi_compile_and_run,
+     ['CCallConv', [('CCallConv_c.c', '')], ''])


=====================================
testsuite/tests/rts/all.T
=====================================
@@ -65,6 +65,10 @@ test('divbyzero',
       # each devision. Neither gcc, nor llvm do this as of right now.  Microsoft
       # apparently does so though?
       when(arch('aarch64'), skip),
+      # RISCV64 has no exception for this, too. Instead all bits of the result
+      # are set. As with Aarch64 neither GCC nor LLVM translate this result to
+      # an exception.
+      when(arch('riscv64'), skip),
       # Apparently the output can be different on different
       # Linux setups, so just ignore it. As long as we get
       # the right exit code we're OK.



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b473a3516a42c3e857eddb4f89e5330aa7e1143...253bd61c29cc6b129f031ee6c44be82b32cda35c

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b473a3516a42c3e857eddb4f89e5330aa7e1143...253bd61c29cc6b129f031ee6c44be82b32cda35c
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240805/e17cb31e/attachment-0001.html>