[Git][ghc/ghc][wip/supersven/ghc-9.10-riscv-ncg] 8 commits: Add RTS linker for RISCV64
Sven Tennie (@supersven)
gitlab at gitlab.haskell.org
Mon Aug 5 06:39:30 UTC 2024
Sven Tennie pushed to branch wip/supersven/ghc-9.10-riscv-ncg at Glasgow Haskell Compiler / GHC
Commits:
2b03e102 by Sven Tennie at 2024-08-05T08:37:32+02:00
Add RTS linker for RISCV64
This architecture wasn't supported before.
Co-authored-by: Moritz Angermann <moritz.angermann at gmail.com>
- - - - -
dc252e4d by Sven Tennie at 2024-08-05T08:37:49+02:00
Ignore divbyzero test for RISCV64
The architecture's behaviour differs from the test's expectations. See
comment in code why this is okay.
- - - - -
d73b06ee by Sven Tennie at 2024-08-05T08:37:49+02:00
Enable MulMayOflo_full test for RISCV64
It works and thus can be tested.
- - - - -
a37e2832 by Sven Tennie at 2024-08-05T08:37:49+02:00
LibffiAdjustor: Ensure code caches are flushed (RISCV64)
RISCV64 needs a specific code flushing sequence (involving fence.i) when
new code is created/loaded.
- - - - -
636a7359 by Sven Tennie at 2024-08-05T08:37:49+02:00
Add additional linker symbols for builtins (RISCV64)
We're relying on some GCC/Clang builtins. These need to be visible to
the linker (and not be stripped away.)
- - - - -
e95177db by Sven Tennie at 2024-08-05T08:37:49+02:00
Add GHCi support for RISCV64
As we got a RTS linker for this architecture now, we can enable GHCi for
it.
- - - - -
d5a3e8ab by Sven Tennie at 2024-08-05T08:37:49+02:00
Set codeowners of the RISCV64 NCG
- - - - -
253bd61c by Sven Tennie at 2024-08-05T08:37:49+02:00
Add test for C calling convention
Ensure that parameters and return values are correctly processed. A
dedicated test (like this) helps to get the subtleties of calling
conventions easily right.
- - - - -
25 changed files:
- CODEOWNERS
- hadrian/bindist/config.mk.in
- rts/LinkerInternals.h
- rts/RtsSymbols.c
- rts/adjustor/LibffiAdjustor.c
- rts/linker/Elf.c
- rts/linker/ElfTypes.h
- rts/linker/SymbolExtras.c
- rts/linker/SymbolExtras.h
- rts/linker/elf_plt.c
- rts/linker/elf_plt.h
- + rts/linker/elf_plt_riscv64.c
- + rts/linker/elf_plt_riscv64.h
- rts/linker/elf_reloc.c
- rts/linker/elf_reloc.h
- rts/linker/elf_reloc_aarch64.c
- rts/linker/elf_reloc_aarch64.h
- + rts/linker/elf_reloc_riscv64.c
- + rts/linker/elf_reloc_riscv64.h
- rts/rts.cabal
- + testsuite/tests/codeGen/should_run/CCallConv.hs
- + testsuite/tests/codeGen/should_run/CCallConv.stdout
- + testsuite/tests/codeGen/should_run/CCallConv_c.c
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/rts/all.T
Changes:
=====================================
CODEOWNERS
=====================================
@@ -40,6 +40,7 @@
/compiler/GHC/HsToCore/Foreign/Wasm.hs @TerrorJack
/compiler/GHC/Tc/Deriv/ @RyanGlScott
/compiler/GHC/CmmToAsm/ @simonmar @bgamari @AndreasK
+/compiler/GHC/CmmToAsm/RV64/ @supersven @angerman
/compiler/GHC/CmmToAsm/Wasm/ @TerrorJack
/compiler/GHC/CmmToLlvm/ @angerman
/compiler/GHC/StgToCmm/ @simonmar @osa1
=====================================
hadrian/bindist/config.mk.in
=====================================
@@ -152,7 +152,7 @@ GhcWithSMP := $(strip $(if $(filter YESNO, $(ArchSupportsSMP)$(GhcUnregisterised
# Whether to include GHCi in the compiler. Depends on whether the RTS linker
# has support for this OS/ARCH combination.
OsSupportsGHCi=$(strip $(patsubst $(TargetOS_CPP), YES, $(findstring $(TargetOS_CPP), mingw32 linux solaris2 freebsd dragonfly netbsd openbsd darwin kfreebsdgnu)))
-ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64)))
+ArchSupportsGHCi=$(strip $(patsubst $(TargetArch_CPP), YES, $(findstring $(TargetArch_CPP), i386 x86_64 powerpc powerpc64 powerpc64le sparc sparc64 arm aarch64 riscv64)))
ifeq "$(OsSupportsGHCi)$(ArchSupportsGHCi)" "YESYES"
GhcWithInterpreter=YES
=====================================
rts/LinkerInternals.h
=====================================
@@ -208,7 +208,7 @@ typedef struct _Segment {
int n_sections;
} Segment;
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
#define NEED_SYMBOL_EXTRAS 1
#endif
@@ -220,8 +220,9 @@ typedef struct _Segment {
#define NEED_M32 1
#endif
-/* Jump Islands are sniplets of machine code required for relative
- * address relocations on the PowerPC, x86_64 and ARM.
+/* Jump Islands are sniplets of machine code required for relative address
+ * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
+ * like a GOT for locals where SymbolExtra represents one entry.
*/
typedef struct {
#if defined(powerpc_HOST_ARCH)
@@ -237,6 +238,8 @@ typedef struct {
uint8_t jumpIsland[8];
#elif defined(arm_HOST_ARCH)
uint8_t jumpIsland[16];
+#elif defined(riscv64_HOST_ARCH)
+ uint64_t addr;
#endif
} SymbolExtra;
=====================================
rts/RtsSymbols.c
=====================================
@@ -980,6 +980,17 @@ extern char **environ;
#define RTS_LIBGCC_SYMBOLS
#endif
+#if defined(riscv64_HOST_ARCH)
+// See https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html as
+// reference for the following built-ins. __clzdi2 and __ctzdi2 probably relate
+// to __builtin-s in libraries/ghc-prim/cbits/ctz.c.
+#define RTS_ARCH_LIBGCC_SYMBOLS \
+ SymI_NeedsProto(__clzdi2) \
+ SymI_NeedsProto(__ctzdi2)
+#else
+#define RTS_ARCH_LIBGCC_SYMBOLS
+#endif
+
// Symbols defined by libgcc/compiler-rt for AArch64's outline atomics.
#if defined(HAVE_ARM_OUTLINE_ATOMICS)
#include "ARMOutlineAtomicsSymbols.h"
@@ -1032,6 +1043,7 @@ RTS_DARWIN_ONLY_SYMBOLS
RTS_OPENBSD_ONLY_SYMBOLS
RTS_LIBC_SYMBOLS
RTS_LIBGCC_SYMBOLS
+RTS_ARCH_LIBGCC_SYMBOLS
RTS_FINI_ARRAY_SYMBOLS
RTS_LIBFFI_SYMBOLS
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
@@ -1074,6 +1086,7 @@ RtsSymbolVal rtsSyms[] = {
RTS_DARWIN_ONLY_SYMBOLS
RTS_OPENBSD_ONLY_SYMBOLS
RTS_LIBGCC_SYMBOLS
+ RTS_ARCH_LIBGCC_SYMBOLS
RTS_FINI_ARRAY_SYMBOLS
RTS_LIBFFI_SYMBOLS
RTS_ARM_OUTLINE_ATOMIC_SYMBOLS
=====================================
rts/adjustor/LibffiAdjustor.c
=====================================
@@ -12,6 +12,7 @@
#include "Adjustor.h"
#include "rts/ghc_ffi.h"
+#include <stdint.h>
#include <string.h>
// Note that ffi_alloc_prep_closure is a non-standard libffi closure
@@ -187,5 +188,21 @@ createAdjustor (int cconv,
barf("createAdjustor: failed to allocate memory");
}
- return (void*)code;
+#if defined(riscv64_HOST_ARCH)
+ // Synchronize the memory and instruction cache to prevent illegal
+ // instruction exceptions.
+
+ // We expect two instructions for address loading, one for the jump.
+ int instrCount = 3;
+ // On Linux the parameters of __builtin___clear_cache are currently unused.
+ // Add them anyways for future compatibility. (I.e. the parameters couldn't
+ // be checked during development.)
+ // TODO: Check the upper boundary e.g. with a debugger.
+ __builtin___clear_cache((void *)code,
+ (void *)((uint64_t *) code + instrCount));
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flush.
+ SEQ_CST_FENCE();
+#endif
+
+ return (void *)code;
}
=====================================
rts/linker/Elf.c
=====================================
@@ -103,7 +103,8 @@
#include "elf_got.h"
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
+# define NEED_GOT
# define NEED_PLT
# include "elf_plt.h"
# include "elf_reloc.h"
@@ -430,10 +431,7 @@ ocVerifyImage_ELF ( ObjectCode* oc )
case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
#endif
#if defined(EM_RISCV)
- case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" ));
- errorBelch("%s: RTS linker not implemented on riscv",
- oc->fileName);
- return 0;
+ case EM_RISCV: IF_DEBUG(linker,debugBelch( "riscv" )); break;
#endif
#if defined(EM_LOONGARCH)
case EM_LOONGARCH: IF_DEBUG(linker,debugBelch( "loongarch64" ));
@@ -1130,9 +1128,10 @@ end:
return result;
}
-// the aarch64 linker uses relocacteObjectCodeAarch64,
-// see elf_reloc_aarch64.{h,c}
-#if !defined(aarch64_HOST_ARCH)
+// the aarch64 and riscv64 linkers use relocateObjectCodeAarch64() and
+// relocateObjectCodeRISCV64() (respectively), see elf_reloc_aarch64.{h,c} and
+// elf_reloc_riscv64.{h,c}
+#if !defined(aarch64_HOST_ARCH) && !defined(riscv64_HOST_ARCH)
/* Do ELF relocations which lack an explicit addend. All x86-linux
and arm-linux relocations appear to be of this form. */
@@ -1359,7 +1358,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
/* try to locate an existing stub for this target */
if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
/* didn't find any. Need to create one */
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 0)) {
errorBelch("Unable to create veneer for ARM_CALL\n");
return 0;
}
@@ -1451,7 +1450,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
/* try to locate an existing stub for this target */
if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
/* didn't find any. Need to create one */
- if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
+ if(makeStub(&oc->sections[target_shndx], (void**)&S, NULL, 1)) {
errorBelch("Unable to create veneer for ARM_THM_CALL\n");
return 0;
}
@@ -1991,7 +1990,7 @@ ocResolve_ELF ( ObjectCode* oc )
(void) shnum;
(void) shdr;
-#if defined(aarch64_HOST_ARCH)
+#if defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
/* use new relocation design */
if(relocateObjectCode( oc ))
return 0;
@@ -2014,6 +2013,9 @@ ocResolve_ELF ( ObjectCode* oc )
#if defined(powerpc_HOST_ARCH)
ocFlushInstructionCache( oc );
+#elif defined(riscv64_HOST_ARCH)
+ /* New-style pseudo-polymorph (by architecture) call */
+ flushInstructionCache( oc );
#endif
return ocMprotect_Elf(oc);
=====================================
rts/linker/ElfTypes.h
=====================================
@@ -150,6 +150,7 @@ typedef
struct _Stub {
void * addr;
void * target;
+ void* got_addr;
/* flags can hold architecture specific information they are used during
* lookup of stubs as well. Thus two stubs for the same target with
* different flags are considered unequal.
=====================================
rts/linker/SymbolExtras.c
=====================================
@@ -153,7 +153,7 @@ void ocProtectExtras(ObjectCode* oc)
}
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
unsigned long symbolNumber,
unsigned long target )
@@ -189,9 +189,12 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
extra->addr = target;
memcpy(extra->jumpIsland, jmp, 8);
#endif /* x86_64_HOST_ARCH */
-
+#if defined(riscv64_HOST_ARCH)
+ // Fake GOT entry (used like GOT, but located in symbol extras)
+ extra->addr = target;
+#endif
return extra;
}
-#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH */
+#endif /* powerpc_HOST_ARCH || x86_64_HOST_ARCH || riscv64_HOST_ARCH */
#endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
#endif // NEED_SYMBOL_EXTRAS
=====================================
rts/linker/SymbolExtras.h
=====================================
@@ -16,7 +16,7 @@ SymbolExtra* makeArmSymbolExtra( ObjectCode const* oc,
unsigned long target,
bool fromThumb,
bool toThumb );
-#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+#elif defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
unsigned long symbolNumber,
unsigned long target );
=====================================
rts/linker/elf_plt.c
=====================================
@@ -5,7 +5,7 @@
#include <stdint.h>
#include <stdlib.h>
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
#if defined(OBJFORMAT_ELF)
#define STRINGIFY(x) #x
@@ -49,11 +49,13 @@ findStub(Section * section,
bool
makeStub(Section * section,
void* * addr,
+ void* got_addr,
uint8_t flags) {
Stub * s = calloc(1, sizeof(Stub));
ASSERT(s != NULL);
s->target = *addr;
+ s->got_addr = got_addr;
s->flags = flags;
s->next = NULL;
s->addr = (uint8_t *)section->info->stub_offset + 8
=====================================
rts/linker/elf_plt.h
=====================================
@@ -4,8 +4,9 @@
#include "elf_plt_arm.h"
#include "elf_plt_aarch64.h"
+#include "elf_plt_riscv64.h"
-#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined (riscv64_HOST_ARCH)
#if defined(OBJFORMAT_ELF)
@@ -21,6 +22,8 @@
#define __suffix__ Arm
#elif defined(__mips__)
#define __suffix__ Mips
+#elif defined(__riscv)
+#define __suffix__ RISCV64
#else
#error "unknown architecture"
#endif
@@ -34,10 +37,10 @@ unsigned numberOfStubsForSection( ObjectCode *oc, unsigned sectionIndex);
#define STUB_SIZE ADD_SUFFIX(stubSize)
bool findStub(Section * section, void* * addr, uint8_t flags);
-bool makeStub(Section * section, void* * addr, uint8_t flags);
+bool makeStub(Section * section, void* * addr, void* got_addr, uint8_t flags);
void freeStubs(Section * section);
#endif // OBJECTFORMAT_ELF
-#endif // arm/aarch64_HOST_ARCH
+#endif // arm/aarch64_HOST_ARCH/riscv64_HOST_ARCH
=====================================
rts/linker/elf_plt_riscv64.c
=====================================
@@ -0,0 +1,90 @@
+#include "Rts.h"
+#include "elf_compat.h"
+#include "elf_plt_riscv64.h"
+#include "rts/Messages.h"
+#include "linker/ElfTypes.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+const size_t instSizeRISCV64 = 4;
+const size_t stubSizeRISCV64 = 3 * instSizeRISCV64;
+
+bool needStubForRelRISCV64(Elf_Rel *rel) {
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool needStubForRelaRISCV64(Elf_Rela *rela) {
+ switch (ELF64_R_TYPE(rela->r_info)) {
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// After the global offset table (GOT) has been set up, we can use these three
+// instructions to jump to the target address / function:
+//
+// 1. AUIPC ip, %pcrel_hi(addr)
+// 2. LD ip, %pcrel_lo(addr)(ip)
+// 3. JARL x0, ip, 0
+//
+// We could use the absolute address of the target (because we know it), but
+// that would require loading a 64-bit constant which is a nightmare to do in
+// riscv64 assembly. (See
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/5ffe5b5aeedb37b1c1c0c3d94641267d9ad4795a/riscv-elf.adoc#procedure-linkage-table)
+//
+// So far, PC-relative addressing seems to be good enough. If it ever turns out
+// to be not, one could (additionally for out-of-range cases?) encode absolute
+// addressing here.
+bool makeStubRISCV64(Stub *s) {
+ uint32_t *P = (uint32_t *)s->addr;
+ int32_t addr = (uint64_t)s->got_addr - (uint64_t)P;
+
+ uint64_t hi = (addr + 0x800) >> 12;
+ uint64_t lo = addr - (hi << 12);
+
+ IF_DEBUG(
+ linker,
+ debugBelch(
+ "makeStubRISCV64: P = %p, got_addr = %p, target = %p, addr = 0x%x "
+ ", hi = 0x%lx, lo = 0x%lx\n",
+ P, s->got_addr, s->target, addr, hi, lo));
+
+ // AUIPC ip, %pcrel_hi(addr)
+ uint32_t auipcInst = 0b0010111; // opcode
+ auipcInst |= 0x1f << 7; // rd = ip (x31)
+ auipcInst |= hi << 12; // imm[31:12]
+
+ // LD ip, %pcrel_lo(addr)(ip)
+ uint32_t ldInst = 0b0000011; // opcode
+ ldInst |= 0x1f << 7; // rd = ip (x31)
+ ldInst |= 0x1f << 15; // rs = ip (x31)
+ ldInst |= 0b11 << 12; // funct3 = 0x3 (LD)
+ ldInst |= lo << 20; // imm[11:0]
+
+ // JARL x0, ip, 0
+ uint32_t jalrInst = 0b1100111; // opcode
+ jalrInst |= 0x1f << 15; // rs = ip (x31)
+
+ P[0] = auipcInst;
+ P[1] = ldInst;
+ P[2] = jalrInst;
+
+ return EXIT_SUCCESS;
+}
+
+#endif
+#endif
=====================================
rts/linker/elf_plt_riscv64.h
=====================================
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+extern const size_t stubSizeRISCV64;
+bool needStubForRelRISCV64(Elf_Rel * rel);
+bool needStubForRelaRISCV64(Elf_Rela * rel);
+bool makeStubRISCV64(Stub * s);
+
+#endif
=====================================
rts/linker/elf_reloc.c
=====================================
@@ -4,13 +4,18 @@
#if defined(OBJFORMAT_ELF)
-/* we currently only use this abstraction for elf/aarch64 */
-#if defined(aarch64_HOST_ARCH)
+/* we currently only use this abstraction for elf/aarch64 and elf/riscv64 */
+#if defined(aarch64_HOST_ARCH) | defined(riscv64_HOST_ARCH)
bool
relocateObjectCode(ObjectCode * oc) {
return ADD_SUFFIX(relocateObjectCode)(oc);
}
+
+
+void flushInstructionCache(ObjectCode * oc){
+ return ADD_SUFFIX(flushInstructionCache)(oc);
+}
#endif
#endif
=====================================
rts/linker/elf_reloc.h
=====================================
@@ -5,9 +5,10 @@
#if defined(OBJFORMAT_ELF)
#include "elf_reloc_aarch64.h"
+#include "elf_reloc_riscv64.h"
bool
relocateObjectCode(ObjectCode * oc);
-
+void flushInstructionCache(ObjectCode *oc);
#endif /* OBJETFORMAT_ELF */
=====================================
rts/linker/elf_reloc_aarch64.c
=====================================
@@ -240,7 +240,7 @@ computeAddend(Section * section, Elf_Rel * rel,
/* check if we already have that stub */
if(findStub(section, (void**)&S, 0)) {
/* did not find it. Crete a new stub. */
- if(makeStub(section, (void**)&S, 0)) {
+ if(makeStub(section, (void**)&S, NULL, 0)) {
abort(/* could not find or make stub */);
}
}
@@ -339,5 +339,10 @@ relocateObjectCodeAarch64(ObjectCode * oc) {
return EXIT_SUCCESS;
}
+void flushInstructionCacheAarch64(ObjectCode * oc STG_UNUSED) {
+ // Looks like we don't need this on Aarch64.
+ /* no-op */
+}
+
#endif /* OBJECTFORMAT_ELF */
#endif /* aarch64_HOST_ARCH */
=====================================
rts/linker/elf_reloc_aarch64.h
=====================================
@@ -7,4 +7,5 @@
bool
relocateObjectCodeAarch64(ObjectCode * oc);
+void flushInstructionCacheAarch64(ObjectCode *oc);
#endif /* OBJETFORMAT_ELF */
=====================================
rts/linker/elf_reloc_riscv64.c
=====================================
@@ -0,0 +1,693 @@
+#include "elf_reloc_riscv64.h"
+#include "LinkerInternals.h"
+#include "Rts.h"
+#include "Stg.h"
+#include "SymbolExtras.h"
+#include "linker/ElfTypes.h"
+#include "elf_plt.h"
+#include "elf_util.h"
+#include "rts/Messages.h"
+#include "util.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if defined(riscv64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+typedef uint64_t addr_t;
+
+/* regular instructions are 32bit */
+typedef uint32_t inst_t;
+
+/* compressed instructions are 16bit */
+typedef uint16_t cinst_t;
+
+// TODO: These instances could be static. They are not yet, because we might
+// need their debugging symbols.
+char *relocationTypeToString(Elf64_Xword type);
+int32_t decodeAddendRISCV64(Section *section, Elf_Rel *rel);
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend);
+void write8le(uint8_t *p, uint8_t v);
+uint8_t read8le(const uint8_t *P);
+void write16le(cinst_t *p, uint16_t v);
+uint16_t read16le(const cinst_t *P);
+uint32_t read32le(const inst_t *P);
+void write32le(inst_t *p, uint32_t v);
+uint64_t read64le(const uint64_t *P);
+void write64le(uint64_t *p, uint64_t v);
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end);
+void setCJType(cinst_t *loc, uint32_t val);
+void setCBType(cinst_t *loc, uint32_t val);
+void setBType(inst_t *loc, uint32_t val);
+void setSType(inst_t *loc, uint32_t val);
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+ int64_t addend, ObjectCode *oc);
+void setJType(inst_t *loc, uint32_t val);
+void setIType(inst_t *loc, int32_t val);
+void checkInt(inst_t *loc, int32_t v, int n);
+void setUType(inst_t *loc, int32_t val);
+
+
+char *relocationTypeToString(Elf64_Xword type) {
+ switch (ELF64_R_TYPE(type)) {
+ case R_RISCV_NONE:
+ return "R_RISCV_NONE";
+ case R_RISCV_32:
+ return "R_RISCV_32";
+ case R_RISCV_64:
+ return "R_RISCV_64";
+ case R_RISCV_RELATIVE:
+ return "R_RISCV_RELATIVE";
+ case R_RISCV_COPY:
+ return "R_RISCV_COPY";
+ case R_RISCV_JUMP_SLOT:
+ return "R_RISCV_JUMP_SLOT";
+ case R_RISCV_TLS_DTPMOD32:
+ return "R_RISCV_TLS_DTPMOD32";
+ case R_RISCV_TLS_DTPMOD64:
+ return "R_RISCV_TLS_DTPMOD64";
+ case R_RISCV_TLS_DTPREL32:
+ return "R_RISCV_TLS_DTPREL32";
+ case R_RISCV_TLS_DTPREL64:
+ return "R_RISCV_TLS_DTPREL64";
+ case R_RISCV_TLS_TPREL32:
+ return "R_RISCV_TLS_TPREL32";
+ case R_RISCV_TLS_TPREL64:
+ return "R_RISCV_TLS_TPREL64";
+ case R_RISCV_BRANCH:
+ return "R_RISCV_BRANCH";
+ case R_RISCV_JAL:
+ return "R_RISCV_JAL";
+ case R_RISCV_CALL:
+ return "R_RISCV_CALL";
+ case R_RISCV_CALL_PLT:
+ return "R_RISCV_CALL_PLT";
+ case R_RISCV_GOT_HI20:
+ return "R_RISCV_GOT_HI20";
+ case R_RISCV_PCREL_HI20:
+ return "R_RISCV_PCREL_HI20";
+ case R_RISCV_LO12_I:
+ return "R_RISCV_LO12_I";
+ case R_RISCV_PCREL_LO12_I:
+ return "R_RISCV_PCREL_LO12_I";
+ case R_RISCV_HI20:
+ return "R_RISCV_HI20";
+ case R_RISCV_LO12_S:
+ return "R_RISCV_LO12_S";
+ case R_RISCV_PCREL_LO12_S:
+ return "R_RISCV_PCREL_LO12_S";
+ case R_RISCV_RELAX:
+ return "R_RISCV_RELAX";
+ case R_RISCV_RVC_BRANCH:
+ return "R_RISCV_RVC_BRANCH";
+ case R_RISCV_RVC_JUMP:
+ return "R_RISCV_RVC_JUMP";
+ default:
+ return "Unknown relocation type";
+ }
+}
+
+STG_NORETURN
+int32_t decodeAddendRISCV64(Section *section STG_UNUSED,
+ Elf_Rel *rel STG_UNUSED) {
+ barf("decodeAddendRISCV64: Relocations with explicit addend are not supported."
+ " Please open a ticket; providing the causing code/binary.");
+}
+
+// Make sure that V can be represented as an N bit signed integer.
+void checkInt(inst_t *loc, int32_t v, int n) {
+ if (v != signExtend32(v, n)) {
+ barf("Relocation at 0x%x is out of range. value: 0x%x (%d), "
+ "sign-extended value: 0x%x (%d), max bits 0x%x (%d)\n",
+ *loc, v, v, signExtend32(v, n), signExtend32(v, n), n, n);
+ }
+}
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write8le(uint8_t *p, uint8_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint8_t read8le(const uint8_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write16le(cinst_t *p, uint16_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint16_t read16le(const cinst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint32_t read32le(const inst_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write32le(inst_t *p, uint32_t v) { *p = v; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+uint64_t read64le(const uint64_t *p) { return *p; }
+
+// RISCV is little-endian by definition: We can rely on (implicit) casts.
+void write64le(uint64_t *p, uint64_t v) { *p = v; }
+
+uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
+ return (v & ((1ULL << (begin + 1)) - 1)) >> end;
+}
+
+// Set immediate val in the instruction at *loc. In U-type instructions the
+// upper 20bits carry the upper 20bits of the immediate.
+void setUType(inst_t *loc, int32_t val) {
+ const unsigned bits = 32;
+ uint32_t hi = val + 0x800;
+ checkInt(loc, signExtend32(hi, bits) >> 12, 20);
+ IF_DEBUG(linker, debugBelch("setUType: hi 0x%x val 0x%x\n", hi, val));
+
+ uint32_t imm = hi & 0xFFFFF000;
+ write32le(loc, (read32le(loc) & 0xFFF) | imm);
+}
+
+// Set immediate val in the instruction at *loc. In I-type instructions the
+// upper 12bits carry the lower 12bit of the immediate.
+void setIType(inst_t *loc, int32_t val) {
+ uint64_t hi = (val + 0x800) >> 12;
+ uint64_t lo = val - (hi << 12);
+
+ IF_DEBUG(linker, debugBelch("setIType: hi 0x%lx lo 0x%lx\n", hi, lo));
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc 0x%x val 0x%x\n", loc,
+ *loc, val));
+
+ uint32_t imm = lo & 0xfff;
+ uint32_t instr = (read32le(loc) & 0xfffff) | (imm << 20);
+
+ IF_DEBUG(linker, debugBelch("setIType: insn 0x%x\n", instr));
+ write32le(loc, instr);
+ IF_DEBUG(linker, debugBelch("setIType: loc %p *loc' 0x%x val 0x%x\n", loc,
+ *loc, val));
+}
+
+// Set immediate val in the instruction at *loc. In S-type instructions the
+// lower 12 bits of the immediate are at bits 7 to 11 ([0:4]) and 25 to 31
+// ([5:11]).
+void setSType(inst_t *loc, uint32_t val) {
+ uint64_t hi = (val + 0x800) >> 12;
+ uint64_t lo = val - (hi << 12);
+
+ uint32_t imm = lo;
+ uint32_t instr = (read32le(loc) & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
+ (extractBits(imm, 4, 0) << 7);
+
+ write32le(loc, instr);
+}
+
+// Set immediate val in the instruction at *loc. In J-type instructions the
+// immediate has 20bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 20
+// [30:21] -> [10:1]
+// 20 -> 11
+// [19:12] -> [19:12]
+//
+// N.B. bit 0 of the immediate is missing!
+void setJType(inst_t *loc, uint32_t val) {
+ checkInt(loc, val, 21);
+
+ uint32_t insn = read32le(loc) & 0xFFF;
+ uint32_t imm20 = extractBits(val, 20, 20) << 31;
+ uint32_t imm10_1 = extractBits(val, 10, 1) << 21;
+ uint32_t imm11 = extractBits(val, 11, 11) << 20;
+ uint32_t imm19_12 = extractBits(val, 19, 12) << 12;
+ insn |= imm20 | imm10_1 | imm11 | imm19_12;
+
+ write32le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. In B-type instructions the
+// immediate has 12bits which are pretty scattered:
+// instr bit -> imm bit
+// 31 -> 12
+// [30:25] -> [10:5]
+// [11:8] -> [4:1]
+// 7 -> 11
+//
+// N.B. bit 0 of the immediate is missing!
+void setBType(inst_t *loc, uint32_t val) {
+ checkInt(loc, val, 13);
+
+ uint32_t insn = read32le(loc) & 0x1FFF07F;
+ uint32_t imm12 = extractBits(val, 12, 12) << 31;
+ uint32_t imm10_5 = extractBits(val, 10, 5) << 25;
+ uint32_t imm4_1 = extractBits(val, 4, 1) << 8;
+ uint32_t imm11 = extractBits(val, 11, 11) << 7;
+ insn |= imm12 | imm10_5 | imm4_1 | imm11;
+
+ write32le(loc, insn);
+}
+
+
+// Set immediate val in the instruction at *loc. CB-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 8bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 8
+// [11:10] -> [4:3]
+// [6:5] -> [7:6]
+// [4:3] -> [2:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCBType(cinst_t *loc, uint32_t val) {
+ checkInt((inst_t *)loc, val, 9);
+ uint16_t insn = read16le(loc) & 0xE383;
+ uint16_t imm8 = extractBits(val, 8, 8) << 12;
+ uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
+ uint16_t imm7_6 = extractBits(val, 7, 6) << 5;
+ uint16_t imm2_1 = extractBits(val, 2, 1) << 3;
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
+ insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
+
+ write16le(loc, insn);
+}
+
+// Set immediate val in the instruction at *loc. CJ-type instructions have a
+// lenght of 16 bits (half-word, compared to the usual 32bit/word instructions.)
+// The immediate has 11bits which are pretty scattered:
+// instr bit -> imm bit
+// 12 -> 11
+// 11 -> 4
+// [10:9] ->[9:8]
+// 8 -> 10
+// 7 -> 6
+// 6 -> 7
+// [5:3] -> [3:1]
+// 2 -> 5
+//
+// N.B. bit 0 of the immediate is missing!
+void setCJType(cinst_t *loc, uint32_t val) {
+ checkInt((inst_t *)loc, val, 12);
+ uint16_t insn = read16le(loc) & 0xE003;
+ uint16_t imm11 = extractBits(val, 11, 11) << 12;
+ uint16_t imm4 = extractBits(val, 4, 4) << 11;
+ uint16_t imm9_8 = extractBits(val, 9, 8) << 9;
+ uint16_t imm10 = extractBits(val, 10, 10) << 8;
+ uint16_t imm6 = extractBits(val, 6, 6) << 7;
+ uint16_t imm7 = extractBits(val, 7, 7) << 6;
+ uint16_t imm3_1 = extractBits(val, 3, 1) << 3;
+ uint16_t imm5 = extractBits(val, 5, 5) << 2;
+ insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
+
+ write16le(loc, insn);
+}
+
+// Encode the addend according to the relocaction into the instruction.
+bool encodeAddendRISCV64(Section *section, Elf_Rel *rel, int32_t addend) {
+ // instruction to rewrite (P: Position of the relocation)
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+ IF_DEBUG(linker,
+ debugBelch(
+ "Relocation type %s 0x%lx (%lu) symbol 0x%lx addend 0x%x (%u / "
+ "%d) P 0x%lx\n",
+ relocationTypeToString(rel->r_info), ELF64_R_TYPE(rel->r_info),
+ ELF64_R_TYPE(rel->r_info), ELF64_R_SYM(rel->r_info), addend,
+ addend, addend, P));
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_32_PCREL:
+ case R_RISCV_32:
+ write32le((inst_t *)P, addend);
+ break;
+ case R_RISCV_64:
+ write64le((uint64_t *)P, addend);
+ break;
+ case R_RISCV_GOT_HI20:
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_HI20: {
+ setUType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_LO12_I: {
+ setIType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RVC_JUMP: {
+ setCJType((cinst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RVC_BRANCH: {
+ setCBType((cinst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_BRANCH: {
+ setBType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT: {
+ // We could relax more (in some cases) but right now most important is to
+ // make it work.
+ setUType((inst_t *)P, addend);
+ setIType(((inst_t *)P) + 1, addend);
+ break;
+ }
+ case R_RISCV_JAL: {
+ setJType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_ADD8:
+ write8le((uint8_t *)P, read8le((uint8_t *)P) + addend);
+ break;
+ case R_RISCV_ADD16:
+ write16le((cinst_t *)P, read16le((cinst_t *)P) + addend);
+ break;
+ case R_RISCV_ADD32:
+ write32le((inst_t *)P, read32le((inst_t *)P) + addend);
+ break;
+ case R_RISCV_ADD64:
+ write64le((uint64_t *)P, read64le((uint64_t *)P) + addend);
+ break;
+ case R_RISCV_SUB6: {
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
+ uint8_t imm = (((*(uint8_t *)P) & 0x3f) - addend) & 0x3f;
+
+ write8le((uint8_t *)P, keep | imm);
+ break;
+ }
+ case R_RISCV_SUB8:
+ write8le((uint8_t *)P, read8le((uint8_t *)P) - addend);
+ break;
+ case R_RISCV_SUB16:
+ write16le((cinst_t *)P, read16le((cinst_t *)P) - addend);
+ break;
+ case R_RISCV_SUB32:
+ write32le((inst_t *)P, read32le((inst_t *)P) - addend);
+ break;
+ case R_RISCV_SUB64:
+ write64le((uint64_t *)P, read64le((uint64_t *)P) - addend);
+ break;
+ case R_RISCV_SET6: {
+ uint8_t keep = *((uint8_t *)P) & 0xc0;
+ uint8_t imm = (addend & 0x3f) & 0x3f;
+
+ write8le((uint8_t *)P, keep | imm);
+ break;
+ }
+ case R_RISCV_SET8:
+ write8le((uint8_t *)P, addend);
+ break;
+ case R_RISCV_SET16:
+ write16le((cinst_t *)P, addend);
+ break;
+ case R_RISCV_SET32:
+ write32le((inst_t *)P, addend);
+ break;
+ case R_RISCV_PCREL_LO12_S:
+ case R_RISCV_TPREL_LO12_S:
+ case R_RISCV_LO12_S: {
+ setSType((inst_t *)P, addend);
+ break;
+ }
+ case R_RISCV_RELAX:
+ case R_RISCV_ALIGN:
+ // Implementing relaxations (rewriting instructions to more efficient ones)
+ // could be implemented in future. As the code already is aligned and we do
+ // not change the instruction sizes, we should get away with not aligning
+ // (though, that is cheating.) To align or change the instruction count, we
+ // would need machinery to squeeze or extend memory at the current location.
+ break;
+ default:
+ barf("Missing relocation 0x%lx\n", ELF64_R_TYPE(rel->r_info));
+ }
+ return EXIT_SUCCESS;
+}
+
+/**
+ * Compute the *new* addend for a relocation, given a pre-existing addend.
+ * @param section The section the relocation is in.
+ * @param rel The Relocation struct.
+ * @param symbol The target symbol.
+ * @param addend The existing addend. Either explicit or implicit.
+ * @return The new computed addend.
+ */
+int32_t computeAddend(ElfRelocationATable * relaTab, unsigned relNo, Elf_Rel *rel, ElfSymbol *symbol,
+ int64_t addend, ObjectCode *oc) {
+ Section * section = &oc->sections[relaTab->targetSectionIndex];
+
+ // instruction to rewrite (P: Position of the relocation)
+ addr_t P = (addr_t)((uint8_t *)section->start + rel->r_offset);
+
+ CHECK(0x0 != P);
+ CHECK((uint64_t)section->start <= P);
+ CHECK(P <= (uint64_t)section->start + section->size);
+ // S: Value of the symbol in the symbol table
+ addr_t S = (addr_t)symbol->addr;
+ /* GOT slot for the symbol (G + GOT) */
+ addr_t GOT_S = (addr_t)symbol->got_addr;
+
+ // A: Addend field in the relocation entry associated with the symbol
+ int64_t A = addend;
+
+ IF_DEBUG(linker, debugBelch("%s: P 0x%lx S 0x%lx %s GOT_S 0x%lx A 0x%lx relNo %u\n",
+ relocationTypeToString(rel->r_info), P, S,
+ symbol->name, GOT_S, A, relNo));
+ switch (ELF64_R_TYPE(rel->r_info)) {
+ case R_RISCV_32:
+ return S + A;
+ case R_RISCV_64:
+ return S + A;
+ case R_RISCV_HI20:
+ return S + A;
+ case R_RISCV_JUMP_SLOT:
+ return S;
+ case R_RISCV_JAL:
+ return S + A - P;
+ case R_RISCV_PCREL_HI20:
+ return S + A - P;
+ case R_RISCV_LO12_I:
+ return S + A;
+ // Quoting LLVM docs: For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}),
+ // the symbol actually points the corresponding R_RISCV_PCREL_HI20
+ // relocation, and the target VA is calculated using PCREL_HI20's symbol.
+ case R_RISCV_PCREL_LO12_S:
+ FALLTHROUGH;
+ case R_RISCV_PCREL_LO12_I: {
+ // Lookup related HI20 relocation and use that value. I'm still confused why
+ // relocations aren't self-contained, but this is how LLVM does it. And,
+ // calculating the lower 12 bit without any relationship to the GOT entry's
+ // address makes no sense either.
+ for (int64_t i = relNo; i >= 0 ; i--) {
+ Elf_Rela *rel_prime = &relaTab->relocations[i];
+
+ addr_t P_prime =
+ (addr_t)((uint8_t *)section->start + rel_prime->r_offset);
+
+ if (P_prime != S) {
+ // S points to the P of the corresponding *_HI20 relocation.
+ continue;
+ }
+
+ ElfSymbol *symbol_prime =
+ findSymbol(oc, relaTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel_prime->r_info));
+
+ CHECK(0x0 != symbol_prime);
+
+ /* take explicit addend */
+ int64_t addend_prime = rel_prime->r_addend;
+
+ uint64_t type_prime = ELF64_R_TYPE(rel_prime->r_info);
+
+ if (type_prime == R_RISCV_PCREL_HI20 ||
+ type_prime == R_RISCV_GOT_HI20 ||
+ type_prime == R_RISCV_TLS_GD_HI20 ||
+ type_prime == R_RISCV_TLS_GOT_HI20) {
+ IF_DEBUG(linker,
+ debugBelch(
+ "Found matching relocation: %s (P: 0x%lx, S: 0x%lx, "
+ "sym-name: %s) -> %s (P: 0x%lx, S: %p, sym-name: %s, relNo: %ld)",
+ relocationTypeToString(rel->r_info), P, S, symbol->name,
+ relocationTypeToString(rel_prime->r_info), P_prime,
+ symbol_prime->addr, symbol_prime->name, i));
+ int32_t result = computeAddend(relaTab, i, (Elf_Rel *)rel_prime,
+ symbol_prime, addend_prime, oc);
+ IF_DEBUG(linker, debugBelch("Result of computeAddend: 0x%x (%d)\n",
+ result, result));
+ return result;
+ }
+ }
+ debugBelch("Missing HI relocation for %s: P 0x%lx S 0x%lx %s\n",
+ relocationTypeToString(rel->r_info), P, S, symbol->name);
+ abort();
+ }
+
+ case R_RISCV_RVC_JUMP:
+ return S + A - P;
+ case R_RISCV_RVC_BRANCH:
+ return S + A - P;
+ case R_RISCV_BRANCH:
+ return S + A - P;
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT: {
+ addr_t GOT_Target;
+ if (GOT_S != 0) {
+ // 1. Public symbol with GOT entry.
+ GOT_Target = GOT_S;
+ } else {
+ // 2. Fake GOT entry with symbol extra entry.
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT w/ SymbolExtra = %p , "
+ "entry = %p\n",
+ symbolExtra, FAKE_GOT_S));
+ GOT_Target = (addr_t) FAKE_GOT_S;
+ }
+
+ if (findStub(section, (void **)&S, 0)) {
+ /* did not find it. Crete a new stub. */
+ if (makeStub(section, (void **)&S, (void *)GOT_Target, 0)) {
+ abort(/* could not find or make stub */);
+ }
+ }
+ IF_DEBUG(linker, debugBelch("R_RISCV_CALL_PLT: S = 0x%lx A = 0x%lx P = "
+ "0x%lx (S + A) - P = 0x%lx \n",
+ S, A, P, (S + A) - P));
+ return (S + A) - P;
+ }
+ case R_RISCV_ADD8:
+ FALLTHROUGH;
+ case R_RISCV_ADD16:
+ FALLTHROUGH;
+ case R_RISCV_ADD32:
+ FALLTHROUGH;
+ case R_RISCV_ADD64:
+ return S + A; // Add V when the value is set
+ case R_RISCV_SUB6:
+ FALLTHROUGH;
+ case R_RISCV_SUB8:
+ FALLTHROUGH;
+ case R_RISCV_SUB16:
+ FALLTHROUGH;
+ case R_RISCV_SUB32:
+ FALLTHROUGH;
+ case R_RISCV_SUB64:
+ return S + A; // Subtract from V when value is set
+ case R_RISCV_SET6:
+ FALLTHROUGH;
+ case R_RISCV_SET8:
+ FALLTHROUGH;
+ case R_RISCV_SET16:
+ FALLTHROUGH;
+ case R_RISCV_SET32:
+ return S + A;
+ case R_RISCV_RELAX:
+ // This "relocation" has no addend.
+ FALLTHROUGH;
+ case R_RISCV_ALIGN:
+ // I guess we don't need to implement this relaxation. Otherwise, this
+ // should return the number of blank bytes to insert via NOPs.
+ return 0;
+ case R_RISCV_32_PCREL:
+ return S + A - P;
+ case R_RISCV_GOT_HI20: {
+ // TODO: Allocating extra memory for every symbol just to play this trick
+ // seems to be a bit obscene. (GOT relocations hitting local symbols
+ // happens, but not very often.) It would be better to allocate only what we
+ // really need.
+
+ // There are two cases here: 1. The symbol is public and has an entry in the
+ // GOT. 2. It's local and has no corresponding GOT entry. The first case is
+ // easy: We simply calculate the addend with the GOT address. In the second
+ // case we create a symbol extra entry and pretend it's the GOT.
+ if (GOT_S != 0) {
+ // 1. Public symbol with GOT entry.
+ return GOT_S + A - P;
+ } else {
+ // 2. Fake GOT entry with symbol extra entry.
+ SymbolExtra *symbolExtra = makeSymbolExtra(oc, ELF_R_SYM(rel->r_info), S);
+ addr_t* FAKE_GOT_S = &symbolExtra->addr;
+ addr_t res = (addr_t) FAKE_GOT_S + A - P;
+ IF_DEBUG(linker, debugBelch("R_RISCV_GOT_HI20 w/ SymbolExtra = %p , "
+ "entry = %p , reloc-addend = 0x%lu ",
+ symbolExtra, FAKE_GOT_S, res));
+ return res;
+ }
+ }
+ default:
+ barf("Unimplemented relocation: 0x%lx\n (%lu)",
+ ELF64_R_TYPE(rel->r_info), ELF64_R_TYPE(rel->r_info));
+ }
+ barf("This should never happen!");
+}
+
+// Iterate over all relocations and perform them.
+bool relocateObjectCodeRISCV64(ObjectCode *oc) {
+ for (ElfRelocationTable *relTab = oc->info->relTable; relTab != NULL;
+ relTab = relTab->next) {
+ /* only relocate interesting sections */
+ if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind)
+ continue;
+
+ Section *targetSection = &oc->sections[relTab->targetSectionIndex];
+
+ for (unsigned i = 0; i < relTab->n_relocations; i++) {
+ Elf_Rel *rel = &relTab->relocations[i];
+
+ ElfSymbol *symbol = findSymbol(oc, relTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+ CHECK(0x0 != symbol);
+
+ // This always fails, because we don't support Rel locations, yet: Do we
+ // need this case? Leaving it in to spot the potential bug when it
+ // appears.
+ /* decode implicit addend */
+ int64_t addend = decodeAddendRISCV64(targetSection, rel);
+
+ addend = computeAddend((ElfRelocationATable*) relTab, i, rel, symbol, addend, oc);
+ encodeAddendRISCV64(targetSection, rel, addend);
+ }
+ }
+ for (ElfRelocationATable *relaTab = oc->info->relaTable; relaTab != NULL;
+ relaTab = relaTab->next) {
+ /* only relocate interesting sections */
+ if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind)
+ continue;
+
+ Section *targetSection = &oc->sections[relaTab->targetSectionIndex];
+
+ for (unsigned i = 0; i < relaTab->n_relocations; i++) {
+
+ Elf_Rela *rel = &relaTab->relocations[i];
+
+ ElfSymbol *symbol = findSymbol(oc, relaTab->sectionHeader->sh_link,
+ ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+ CHECK(0x0 != symbol);
+
+ /* take explicit addend */
+ int64_t addend = rel->r_addend;
+
+ addend = computeAddend(relaTab, i, (Elf_Rel *)rel, symbol, addend, oc);
+ encodeAddendRISCV64(targetSection, (Elf_Rel *)rel, addend);
+ }
+ }
+ return EXIT_SUCCESS;
+}
+
+void flushInstructionCacheRISCV64(ObjectCode *oc) {
+ // Synchronize the memory and instruction cache to prevent illegal instruction
+ // exceptions. On Linux the parameters of __builtin___clear_cache are
+ // currently unused. Add them anyways for future compatibility. (I.e. the
+ // parameters couldn't be checked during development.)
+
+ /* The main object code */
+ void *codeBegin = oc->image + oc->misalignment;
+ __builtin___clear_cache(codeBegin, (void*) ((uint64_t*) codeBegin + oc->fileSize));
+
+ /* Jump Islands */
+ __builtin___clear_cache((void *)oc->symbol_extras,
+ (void *)(oc->symbol_extras + oc->n_symbol_extras));
+
+ // Memory barrier to ensure nothing circumvents the fence.i / cache flushes.
+ SEQ_CST_FENCE();
+}
+
+#endif /* OBJECTFORMAT_ELF */
+#endif /* riscv64_HOST_ARCH */
=====================================
rts/linker/elf_reloc_riscv64.h
=====================================
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+bool
+relocateObjectCodeRISCV64(ObjectCode * oc);
+
+void flushInstructionCacheRISCV64(ObjectCode *oc);
+#endif /* OBJETFORMAT_ELF */
=====================================
rts/rts.cabal
=====================================
@@ -468,9 +468,11 @@ library
linker/elf_got.c
linker/elf_plt.c
linker/elf_plt_aarch64.c
+ linker/elf_plt_riscv64.c
linker/elf_plt_arm.c
linker/elf_reloc.c
linker/elf_reloc_aarch64.c
+ linker/elf_reloc_riscv64.c
linker/elf_tlsgd.c
linker/elf_util.c
sm/BlockAlloc.c
=====================================
testsuite/tests/codeGen/should_run/CCallConv.hs
=====================================
@@ -0,0 +1,123 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+-- | This test ensures that sub-word signed and unsigned parameters are correctly
+-- handed over to C functions. I.e. it asserts the calling-convention.
+--
+-- The number of parameters is currently shaped for the RISCV64 calling-convention.
+-- You may need to add more parameters to the C functions in case there are more
+-- registers reserved for parameters in your architecture.
+module Main where
+
+import Data.Word
+import GHC.Exts
+import GHC.Int
+
+foreign import ccall "fun8"
+ fun8 ::
+ Int8# -> -- a0
+ Word8# -> -- a1
+ Int8# -> -- a2
+ Int8# -> -- a3
+ Int8# -> -- a4
+ Int8# -> -- a5
+ Int8# -> -- a6
+ Int8# -> -- a7
+ Word8# -> -- s0
+ Int8# -> -- s1
+ Int64# -- result
+
+foreign import ccall "fun16"
+ fun16 ::
+ Int16# -> -- a0
+ Word16# -> -- a1
+ Int16# -> -- a2
+ Int16# -> -- a3
+ Int16# -> -- a4
+ Int16# -> -- a5
+ Int16# -> -- a6
+ Int16# -> -- a7
+ Word16# -> -- s0
+ Int16# -> -- s1
+ Int64# -- result
+
+foreign import ccall "fun32"
+ fun32 ::
+ Int32# -> -- a0
+ Word32# -> -- a1
+ Int32# -> -- a2
+ Int32# -> -- a3
+ Int32# -> -- a4
+ Int32# -> -- a5
+ Int32# -> -- a6
+ Int32# -> -- a7
+ Word32# -> -- s0
+ Int32# -> -- s1
+ Int64# -- result
+
+foreign import ccall "funFloat"
+ funFloat ::
+ Float# -> -- a0
+ Float# -> -- a1
+ Float# -> -- a2
+ Float# -> -- a3
+ Float# -> -- a4
+ Float# -> -- a5
+ Float# -> -- a6
+ Float# -> -- a7
+ Float# -> -- s0
+ Float# -> -- s1
+ Float# -- result
+
+foreign import ccall "funDouble"
+ funDouble ::
+ Double# -> -- a0
+ Double# -> -- a1
+ Double# -> -- a2
+ Double# -> -- a3
+ Double# -> -- a4
+ Double# -> -- a5
+ Double# -> -- a6
+ Double# -> -- a7
+ Double# -> -- s0
+ Double# -> -- s1
+ Double# -- result
+
+main :: IO ()
+main =
+ -- N.B. the values here aren't choosen by accident: -1 means all bits one in
+ -- twos-complement, which is the same as the max word value.
+ let i8 :: Int8# = intToInt8# (-1#)
+ w8 :: Word8# = wordToWord8# (255##)
+ res8 :: Int64# = fun8 i8 w8 i8 i8 i8 i8 i8 i8 w8 i8
+ expected_res8 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word8) + 8 * (-1)
+ i16 :: Int16# = intToInt16# (-1#)
+ w16 :: Word16# = wordToWord16# (65535##)
+ res16 :: Int64# = fun16 i16 w16 i16 i16 i16 i16 i16 i16 w16 i16
+ expected_res16 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word16) + 8 * (-1)
+ i32 :: Int32# = intToInt32# (-1#)
+ w32 :: Word32# = wordToWord32# (4294967295##)
+ res32 :: Int64# = fun32 i32 w32 i32 i32 i32 i32 i32 i32 w32 i32
+ expected_res32 :: Int64 = 2 * (fromInteger . fromIntegral) (maxBound :: Word32) + 8 * (-1)
+ resFloat :: Float = F# (funFloat 1.0# 1.1# 1.2# 1.3# 1.4# 1.5# 1.6# 1.7# 1.8# 1.9#)
+ resDouble :: Double = D# (funDouble 1.0## 1.1## 1.2## 1.3## 1.4## 1.5## 1.6## 1.7## 1.8## 1.9##)
+ in do
+ print $ "fun8 result:" ++ show (I64# res8)
+ assertEqual expected_res8 (I64# res8)
+ print $ "fun16 result:" ++ show (I64# res16)
+ assertEqual expected_res16 (I64# res16)
+ print $ "fun32 result:" ++ show (I64# res32)
+ assertEqual expected_res32 (I64# res32)
+ print $ "funFloat result:" ++ show resFloat
+ assertEqual (14.5 :: Float) resFloat
+ print $ "funDouble result:" ++ show resDouble
+ assertEqual (14.5 :: Double) resDouble
+
+assertEqual :: (Eq a, Show a) => a -> a -> IO ()
+assertEqual a b =
+ if a == b
+ then pure ()
+ else error $ show a ++ " =/= " ++ show b
=====================================
testsuite/tests/codeGen/should_run/CCallConv.stdout
=====================================
@@ -0,0 +1,60 @@
+"fun8 result:502"
+"fun16 result:131062"
+"fun32 result:8589934582"
+"funFloat result:14.5"
+"funDouble result:14.5"
+fun32:
+a0: 0xffffffff -1
+a1: 0xffffffff 4294967295
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffffffff 4294967295
+fun16:
+a0: 0xffffffff -1
+a1: 0xffff 65535
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xffff 65535
+fun8:
+a0: 0xffffffff -1
+a1: 0xff 255
+a2: 0xffffffff -1
+a3: 0xffffffff -1
+a4: 0xffffffff -1
+a5: 0xffffffff -1
+a6: 0xffffffff -1
+a7: 0xffffffff -1
+s0: 0xffffffff -1
+s1: 0xff 255
+funFloat:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000
+funDouble:
+a0: 1.000000
+a1: 1.100000
+a2: 1.200000
+a3: 1.300000
+a4: 1.400000
+a5: 1.500000
+a6: 1.600000
+a7: 1.700000
+s0: 1.800000
+s1: 1.900000
=====================================
testsuite/tests/codeGen/should_run/CCallConv_c.c
=====================================
@@ -0,0 +1,91 @@
+#include "stdint.h"
+#include "stdio.h"
+
+int64_t fun8(int8_t a0, uint8_t a1, int8_t a2, int8_t a3, int8_t a4, int8_t a5,
+ int8_t a6, int8_t a7, int8_t s0, uint8_t s1) {
+ printf("fun8:\n");
+ printf("a0: %#x %hhd\n", a0, a0);
+ printf("a1: %#x %hhu\n", a1, a1);
+ printf("a2: %#x %hhd\n", a2, a2);
+ printf("a3: %#x %hhd\n", a3, a3);
+ printf("a4: %#x %hhd\n", a4, a4);
+ printf("a5: %#x %hhd\n", a5, a5);
+ printf("a6: %#x %hhd\n", a6, a6);
+ printf("a7: %#x %hhd\n", a7, a7);
+ printf("s0: %#x %hhd\n", s0, s0);
+ printf("s1: %#x %hhu\n", s1, s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun16(int16_t a0, uint16_t a1, int16_t a2, int16_t a3, int16_t a4,
+ int16_t a5, int16_t a6, int16_t a7, int16_t s0, uint16_t s1) {
+ printf("fun16:\n");
+ printf("a0: %#x %hd\n", a0, a0);
+ printf("a1: %#x %hu\n", a1, a1);
+ printf("a2: %#x %hd\n", a2, a2);
+ printf("a3: %#x %hd\n", a3, a3);
+ printf("a4: %#x %hd\n", a4, a4);
+ printf("a5: %#x %hd\n", a5, a5);
+ printf("a6: %#x %hd\n", a6, a6);
+ printf("a7: %#x %hd\n", a7, a7);
+ printf("s0: %#x %hd\n", s0, s0);
+ printf("s1: %#x %hu\n", s1, s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+int64_t fun32(int32_t a0, uint32_t a1, int32_t a2, int32_t a3, int32_t a4,
+ int32_t a5, int32_t a6, int32_t a7, int32_t s0, uint32_t s1) {
+ printf("fun32:\n");
+ printf("a0: %#x %d\n", a0, a0);
+ printf("a1: %#x %u\n", a1, a1);
+ printf("a2: %#x %d\n", a2, a2);
+ printf("a3: %#x %d\n", a3, a3);
+ printf("a4: %#x %d\n", a4, a4);
+ printf("a5: %#x %d\n", a5, a5);
+ printf("a6: %#x %d\n", a6, a6);
+ printf("a7: %#x %d\n", a7, a7);
+ printf("s0: %#x %d\n", s0, s0);
+ printf("s1: %#x %u\n", s1, s1);
+
+ // Ensure the addition happens in long int (not just int) precission.
+ // Otherwise, the result is truncated during the operation.
+ int64_t force_int64_precission = 0;
+ return force_int64_precission + a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 +
+ s1;
+}
+
+float funFloat(float a0, float a1, float a2, float a3, float a4, float a5,
+ float a6, float a7, float s0, float s1) {
+ printf("funFloat:\n");
+ printf("a0: %f\n", a0);
+ printf("a1: %f\n", a1);
+ printf("a2: %f\n", a2);
+ printf("a3: %f\n", a3);
+ printf("a4: %f\n", a4);
+ printf("a5: %f\n", a5);
+ printf("a6: %f\n", a6);
+ printf("a7: %f\n", a7);
+ printf("s0: %f\n", s0);
+ printf("s1: %f\n", s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
+
+double funDouble(double a0, double a1, double a2, double a3, double a4, double a5,
+ double a6, double a7, double s0, double s1) {
+ printf("funDouble:\n");
+ printf("a0: %f\n", a0);
+ printf("a1: %f\n", a1);
+ printf("a2: %f\n", a2);
+ printf("a3: %f\n", a3);
+ printf("a4: %f\n", a4);
+ printf("a5: %f\n", a5);
+ printf("a6: %f\n", a6);
+ printf("a7: %f\n", a7);
+ printf("s0: %f\n", s0);
+ printf("s1: %f\n", s1);
+
+ return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + s0 + s1;
+}
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -231,10 +231,10 @@ test('OrigThunkInfo', normal, compile_and_run, ['-forig-thunk-info'])
# not implemeted correctly (according to
# Note [MO_S_MulMayOflo significant width]) and may require fixing/adjustment.
test('MulMayOflo_full',
- [ extra_files(['MulMayOflo.hs']),
+ [ extra_files(['MulMayOflo.hs', 'MulMayOflo_full.cmm']),
when(unregisterised(), skip),
unless(
- arch('aarch64') or arch('x86_64') or arch('i386'),
+ arch('aarch64') or arch('x86_64') or arch('i386') or arch('riscv64'),
expect_broken(23742)
),
ignore_stdout],
@@ -243,3 +243,8 @@ test('MulMayOflo_full',
test('T24264run', normal, compile_and_run, [''])
test('T24295a', normal, compile_and_run, ['-O -floopification'])
test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])
+
+test('CCallConv',
+ [],
+ multi_compile_and_run,
+ ['CCallConv', [('CCallConv_c.c', '')], ''])
=====================================
testsuite/tests/rts/all.T
=====================================
@@ -65,6 +65,10 @@ test('divbyzero',
# each devision. Neither gcc, nor llvm do this as of right now. Microsoft
# apparently does so though?
when(arch('aarch64'), skip),
+ # RISCV64 has no exception for this, too. Instead all bits of the result
+ # are set. As with Aarch64 neither GCC nor LLVM translate this result to
+ # an exception.
+ when(arch('riscv64'), skip),
# Apparently the output can be different on different
# Linux setups, so just ignore it. As long as we get
# the right exit code we're OK.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b473a3516a42c3e857eddb4f89e5330aa7e1143...253bd61c29cc6b129f031ee6c44be82b32cda35c
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b473a3516a42c3e857eddb4f89e5330aa7e1143...253bd61c29cc6b129f031ee6c44be82b32cda35c
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240805/e17cb31e/attachment-0001.html>
More information about the ghc-commits
mailing list