[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 7 commits: rts: Split up rts/include/stg/MachRegs.h by arch

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sun Oct 8 16:16:36 UTC 2023



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
390443b7 by Andreas Klebinger at 2023-10-07T10:00:20-04:00
rts: Split up rts/include/stg/MachRegs.h by arch

- - - - -
3685942f by Bryan Richter at 2023-10-07T10:00:56-04:00
Actually set hackage index state

Or at least, use a version of the cabal command that *claims* to set the
index state.

Time will tell.

- - - - -
46a0e5be by Bryan Richter at 2023-10-07T10:00:56-04:00
Update hackage index state

- - - - -
d4b037de by Bryan Richter at 2023-10-07T10:00:56-04:00
Ensure hadrian uses CI's hackage index state

- - - - -
3a82c4b5 by Andrew Lelechenko at 2023-10-08T12:16:02-04:00
Do not use O_NONBLOCK on regular files or block devices

CLC proposal https://github.com/haskell/core-libraries-committee/issues/166

- - - - -
92df8027 by David Binder at 2023-10-08T12:16:05-04:00
Update hpc-bin submodule to 0.69

- - - - -
36ef6720 by David Binder at 2023-10-08T12:16:05-04:00
Update Hadrian with correct path to happy file for hpc-bin

- - - - -


22 changed files:

- .gitignore
- .gitlab/ci.sh
- compiler/CodeGen.Platform.h
- compiler/GHC/CmmToAsm.hs
- compiler/ghc.cabal.in
- configure.ac
- hadrian/cabal.project
- hadrian/src/Rules/SourceDist.hs
- libraries/base/GHC/IO/FD.hs
- libraries/base/changelog.md
- rts/include/stg/MachRegs.h
- + rts/include/stg/MachRegs/arm32.h
- + rts/include/stg/MachRegs/arm64.h
- + rts/include/stg/MachRegs/loongarch64.h
- + rts/include/stg/MachRegs/ppc.h
- + rts/include/stg/MachRegs/riscv64.h
- + rts/include/stg/MachRegs/s390x.h
- + rts/include/stg/MachRegs/wasm32.h
- + rts/include/stg/MachRegs/x86.h
- rts/rts.cabal.in
- testsuite/tests/hpc/T17073.stdout
- utils/hpc


Changes:

=====================================
.gitignore
=====================================
@@ -111,6 +111,7 @@ _darcs/
 /compiler/ClosureTypes.h
 /compiler/FunTypes.h
 /compiler/MachRegs.h
+/compiler/MachRegs
 /compiler/ghc-llvm-version.h
 /compiler/ghc.cabal
 /compiler/ghc.cabal.old


=====================================
.gitlab/ci.sh
=====================================
@@ -7,7 +7,7 @@
 set -Eeuo pipefail
 
 # Configuration:
-HACKAGE_INDEX_STATE="2020-12-21T14:48:20Z"
+HACKAGE_INDEX_STATE="2023-10-05T11:38:51Z"
 MIN_HAPPY_VERSION="1.20"
 MIN_ALEX_VERSION="3.2.6"
 
@@ -230,7 +230,7 @@ function set_toolchain_paths() {
 function cabal_update() {
   # In principle -w shouldn't be necessary here but with
   # cabal-install 3.8.1.0 it is, due to cabal#8447.
-  run "$CABAL" update -w "$GHC" --index="$HACKAGE_INDEX_STATE"
+  run "$CABAL" update -w "$GHC" "hackage.haskell.org,${HACKAGE_INDEX_STATE}"
 }
 
 
@@ -480,6 +480,9 @@ function build_hadrian() {
 
   check_release_build
 
+  # Just to be sure, use the same hackage index state when building Hadrian.
+  echo "index-state: $HACKAGE_INDEX_STATE" > hadrian/cabal.project.local
+
   # We can safely enable parallel compression for x64. By the time
   # hadrian calls tar/xz to produce bindist, there's no other build
   # work taking place.


=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -480,6 +480,7 @@ import GHC.Platform.Reg
 
 #endif
 
+-- See also Note [Caller saves and callee-saves regs.]
 callerSaves :: GlobalReg -> Bool
 #if defined(CALLER_SAVES_Base)
 callerSaves BaseReg           = True


=====================================
compiler/GHC/CmmToAsm.hs
=====================================
@@ -15,7 +15,8 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE UnboxedTuples #-}
 
--- | Native code generator
+-- | Note [Native code generator]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --
 -- The native-code generator has machine-independent and
 -- machine-dependent modules.
@@ -23,45 +24,39 @@
 -- This module ("GHC.CmmToAsm") is the top-level machine-independent
 -- module.  Before entering machine-dependent land, we do some
 -- machine-independent optimisations (defined below) on the
--- 'CmmStmts's.
+-- 'CmmStmts's. (Which ideally would be folded into CmmOpt ...)
 --
 -- We convert to the machine-specific 'Instr' datatype with
 -- 'cmmCodeGen', assuming an infinite supply of registers.  We then use
--- a machine-independent register allocator ('regAlloc') to rejoin
+-- a (mostly) machine-independent register allocator to rejoin
 -- reality.  Obviously, 'regAlloc' has machine-specific helper
--- functions (see about "RegAllocInfo" below).
+-- functions (see the used register allocator for details).
 --
 -- Finally, we order the basic blocks of the function so as to minimise
 -- the number of jumps between blocks, by utilising fallthrough wherever
 -- possible.
 --
--- The machine-dependent bits break down as follows:
+-- The machine-dependent bits are generally contained under
+--  GHC/CmmToAsm/<Arch>/* and generally breaks down as follows:
 --
---   * ["MachRegs"]  Everything about the target platform's machine
+--   * "Regs": Everything about the target platform's machine
 --     registers (and immediate operands, and addresses, which tend to
 --     intermingle/interact with registers).
 --
---   * ["MachInstrs"]  Includes the 'Instr' datatype (possibly should
---     have a module of its own), plus a miscellany of other things
+--   * "Instr":  Includes the 'Instr' datatype plus a miscellany of other things
 --     (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
 --
---   * ["MachCodeGen"]  is where 'Cmm' stuff turns into
+--   * "CodeGen":  is where 'Cmm' stuff turns into
 --     machine instructions.
 --
---   * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
+--   * "Ppr": 'pprInstr' turns an 'Instr' into text (well, really
 --     a 'SDoc').
 --
---   * ["RegAllocInfo"] In the register allocator, we manipulate
---     'MRegsState's, which are 'BitSet's, one bit per machine register.
---     When we want to say something about a specific machine register
---     (e.g., ``it gets clobbered by this instruction''), we set/unset
---     its bit.  Obviously, we do this 'BitSet' thing for efficiency
---     reasons.
+-- The register allocators lives under GHC.CmmToAsm.Reg.*, there is both a Linear and a Graph
+-- based register allocator. Both of which have their own notes describing them. They
+-- are mostly platform independent but there are some platform specific files
+-- encoding architecture details under Reg/<Allocator>/<Arch.hs>
 --
---     The 'RegAllocInfo' module collects together the machine-specific
---     info needed to do register allocation.
---
---    * ["RegisterAlloc"] The (machine-independent) register allocator.
 -- -}
 --
 module GHC.CmmToAsm


=====================================
compiler/ghc.cabal.in
=====================================
@@ -34,6 +34,14 @@ extra-source-files:
     ClosureTypes.h
     FunTypes.h
     MachRegs.h
+    MachRegs/arm32.h
+    MachRegs/arm64.h
+    MachRegs/loongarch64.h
+    MachRegs/ppc.h
+    MachRegs/riscv64.h
+    MachRegs/s390x.h
+    MachRegs/wasm32.h
+    MachRegs/x86.h
     ghc-llvm-version.h
 
 


=====================================
configure.ac
=====================================
@@ -578,6 +578,15 @@ ln -f rts/include/rts/Bytecodes.h compiler/
 ln -f rts/include/rts/storage/ClosureTypes.h compiler/
 ln -f rts/include/rts/storage/FunTypes.h compiler/
 ln -f rts/include/stg/MachRegs.h compiler/
+mkdir -p compiler/MachRegs
+ln -f rts/include/stg/MachRegs/arm32.h compiler/MachRegs/arm32.h
+ln -f rts/include/stg/MachRegs/arm64.h compiler/MachRegs/arm64.h
+ln -f rts/include/stg/MachRegs/loongarch64.h compiler/MachRegs/loongarch64.h
+ln -f rts/include/stg/MachRegs/ppc.h compiler/MachRegs/ppc.h
+ln -f rts/include/stg/MachRegs/riscv64.h compiler/MachRegs/riscv64.h
+ln -f rts/include/stg/MachRegs/s390x.h compiler/MachRegs/s390x.h
+ln -f rts/include/stg/MachRegs/wasm32.h compiler/MachRegs/wasm32.h
+ln -f rts/include/stg/MachRegs/x86.h compiler/MachRegs/x86.h
 AC_MSG_NOTICE([done.])
 
 dnl ** Copy the files from the "fs" utility into the right folders.


=====================================
hadrian/cabal.project
=====================================
@@ -3,7 +3,8 @@ packages: ./
           ../libraries/ghc-platform/
 
 -- This essentially freezes the build plan for hadrian
-index-state: 2023-09-18T18:43:12Z
+-- It would be wise to keep this up to date with the state set in ci.sh
+index-state: 2023-10-05T11:38:51Z
 
 -- N.B. Compile with -O0 since this is not a performance-critical executable
 -- and the Cabal takes nearly twice as long to build with -O1. See #16817.


=====================================
hadrian/src/Rules/SourceDist.hs
=====================================
@@ -184,7 +184,7 @@ prepareTree dest = do
         , (stage0InTree , compiler,      "GHC/Parser.y",       "GHC/Parser.hs")
         , (stage0InTree , compiler,      "GHC/Parser/Lexer.x", "GHC/Parser/Lexer.hs")
         , (stage0InTree , compiler,      "GHC/Parser/HaddockLex.x", "GHC/Parser/HaddockLex.hs")
-        , (stage0InTree , hpcBin,        "src/HpcParser.y",    "src/HpcParser.hs")
+        , (stage0InTree , hpcBin,        "src/Trace/Hpc/Parser.y", "src/Trace/Hpc/Parser.hs")
         , (stage0InTree , genprimopcode, "Parser.y",           "Parser.hs")
         , (stage0InTree , genprimopcode, "Lexer.x",            "Lexer.hs")
         , (stage0InTree , cabalSyntax  , "src/Distribution/Fields/Lexer.x",  "src/Distribution/Fields/Lexer.hs")


=====================================
libraries/base/GHC/IO/FD.hs
=====================================
@@ -82,13 +82,19 @@ clampReadSize  = min 0x7ffff000
 data FD = FD {
   fdFD :: {-# UNPACK #-} !CInt,
 #if defined(mingw32_HOST_OS)
-  -- On Windows, a socket file descriptor needs to be read and written
+  -- | On Windows, a socket file descriptor needs to be read and written
   -- using different functions (send/recv).
   fdIsSocket_ :: {-# UNPACK #-} !Int
 #else
-  -- On Unix we need to know whether this FD has O_NONBLOCK set.
-  -- If it has, then we can use more efficient routines to read/write to it.
-  -- It is always safe for this to be off.
+  -- | On Unix we need to know whether this 'FD' has @O_NONBLOCK@ set.
+  -- If it has, then we can use more efficient routines (namely, unsafe FFI)
+  -- to read/write to it. Otherwise safe FFI is used.
+  --
+  -- @O_NONBLOCK@ has no effect on regular files and block devices at the moment,
+  -- thus this flag should be off for them. While reading from a file cannot
+  -- block indefinitely (as opposed to reading from a socket or a pipe), it can block
+  -- the entire runtime for a "brief" moment of time: you cannot read a file from
+  -- a floppy drive or network share without delay.
   fdIsNonBlocking :: {-# UNPACK #-} !Int
 #endif
  }
@@ -197,6 +203,9 @@ openFileWith
   :: FilePath -- ^ file to open
   -> IOMode   -- ^ mode in which to open the file
   -> Bool     -- ^ open the file in non-blocking mode?
+              --   This has no effect on regular files and block devices:
+              --   they are always opened in blocking mode.
+              --   See 'fdIsNonBlocking' for more discussion.
   -> (FD -> IODeviceType -> IO r) -- ^ @act1@: An action to perform
                     -- on the file descriptor with the masking state
                     -- restored and an exception handler that closes
@@ -332,7 +341,11 @@ mkFD fd iomode mb_stat is_socket is_nonblock = do
 
     return (FD{ fdFD = fd,
 #if !defined(mingw32_HOST_OS)
-                fdIsNonBlocking = fromEnum is_nonblock
+                -- As https://man7.org/linux/man-pages/man2/open.2.html explains,
+                -- O_NONBLOCK has no effect on regular files and block devices;
+                -- utilities inspecting fdIsNonBlocking (such as readRawBufferPtr)
+                -- should not be tricked to think otherwise.
+                fdIsNonBlocking = fromEnum (is_nonblock && fd_type /= RegularFile && fd_type /= RawDevice)
 #else
                 fdIsSocket_ = fromEnum is_socket
 #endif
@@ -452,11 +465,19 @@ dup2 fd fdto = do
 
 setNonBlockingMode :: FD -> Bool -> IO FD
 setNonBlockingMode fd set = do
-  setNonBlockingFD (fdFD fd) set
+  -- This mirrors the behaviour of mkFD:
+  -- O_NONBLOCK has no effect on regular files and block devices;
+  -- utilities inspecting fdIsNonBlocking (such as readRawBufferPtr)
+  -- should not be tricked to think otherwise.
+  is_nonblock <- if set then do
+    (fd_type, _, _) <- fdStat (fdFD fd)
+    pure $ fd_type /= RegularFile && fd_type /= RawDevice
+    else pure False
+  setNonBlockingFD (fdFD fd) is_nonblock
 #if defined(mingw32_HOST_OS)
   return fd
 #else
-  return fd{ fdIsNonBlocking = fromEnum set }
+  return fd{ fdIsNonBlocking = fromEnum is_nonblock }
 #endif
 
 ready :: FD -> Bool -> Int -> IO Bool


=====================================
libraries/base/changelog.md
=====================================
@@ -5,6 +5,9 @@
   * Add a `RULE` to `Prelude.lookup`, allowing it to participate in list fusion ([CLC proposal #174](https://github.com/haskell/core-libraries-committee/issues/175))
   * The `Enum Int64` and `Enum Word64` instances now use native operations on 32-bit platforms, increasing performance by up to 1.5x on i386 and up to 5.6x with the JavaScript backend. ([CLC proposal #187](https://github.com/haskell/core-libraries-committee/issues/187))
   * Update to [Unicode 15.1.0](https://www.unicode.org/versions/Unicode15.1.0/).
+  * Fix `fdIsNonBlocking` to always be `0` for regular files and block devices on unix, regardless of `O_NONBLOCK`
+  * Always use `safe` call to `read` for regular files and block devices on unix if the RTS is multi-threaded, regardless of `O_NONBLOCK`.
+    ([CLC proposal #166](https://github.com/haskell/core-libraries-committee/issues/166))
 
 ## 4.19.0.0 *TBA*
   * Add `{-# WARNING in "x-partial" #-}` to `Data.List.{head,tail}`.


=====================================
rts/include/stg/MachRegs.h
=====================================
@@ -51,637 +51,54 @@
 #elif MACHREGS_NO_REGS == 0
 
 /* ----------------------------------------------------------------------------
-   Caller saves and callee-saves regs.
-
+   Note [Caller saves and callee-saves regs.]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Caller-saves regs have to be saved around C-calls made from STG
    land, so this file defines CALLER_SAVES_<reg> for each <reg> that
    is designated caller-saves in that machine's C calling convention.
+   NB: Caller-saved registers not mapped to a STG register don't
+       require a CALLER_SAVES_ define.
 
    As it stands, the only registers that are ever marked caller saves
-   are the RX, FX, DX and USER registers; as a result, if you
+   are the RX, FX, DX, XMM and USER registers; as a result, if you
    decide to caller save a system register (e.g. SP, HP, etc), note that
    this code path is completely untested! -- EZY
 
    See Note [Register parameter passing] for details.
    -------------------------------------------------------------------------- */
 
-/* -----------------------------------------------------------------------------
-   The x86 register mapping
-
-   Ok, we've only got 6 general purpose registers, a frame pointer and a
-   stack pointer.  \tr{%eax} and \tr{%edx} are return values from C functions,
-   hence they get trashed across ccalls and are caller saves. \tr{%ebx},
-   \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
-
-   Reg     STG-Reg
-   ---------------
-   ebx     Base
-   ebp     Sp
-   esi     R1
-   edi     Hp
-
-   Leaving SpLim out of the picture.
-   -------------------------------------------------------------------------- */
-
-#if defined(MACHREGS_i386)
-
-#define REG(x) __asm__("%" #x)
-
-#if !defined(not_doing_dynamic_linking)
-#define REG_Base    ebx
-#endif
-#define REG_Sp      ebp
-
-#if !defined(STOLEN_X86_REGS)
-#define STOLEN_X86_REGS 4
-#endif
-
-#if STOLEN_X86_REGS >= 3
-# define REG_R1     esi
-#endif
-
-#if STOLEN_X86_REGS >= 4
-# define REG_Hp     edi
-#endif
-#define REG_MachSp  esp
-
-#define REG_XMM1    xmm0
-#define REG_XMM2    xmm1
-#define REG_XMM3    xmm2
-#define REG_XMM4    xmm3
-
-#define REG_YMM1    ymm0
-#define REG_YMM2    ymm1
-#define REG_YMM3    ymm2
-#define REG_YMM4    ymm3
-
-#define REG_ZMM1    zmm0
-#define REG_ZMM2    zmm1
-#define REG_ZMM3    zmm2
-#define REG_ZMM4    zmm3
-
-#define MAX_REAL_VANILLA_REG 1  /* always, since it defines the entry conv */
-#define MAX_REAL_FLOAT_REG   0
-#define MAX_REAL_DOUBLE_REG  0
-#define MAX_REAL_LONG_REG    0
-#define MAX_REAL_XMM_REG     4
-#define MAX_REAL_YMM_REG     4
-#define MAX_REAL_ZMM_REG     4
-
-/* -----------------------------------------------------------------------------
-  The x86-64 register mapping
-
-  %rax          caller-saves, don't steal this one
-  %rbx          YES
-  %rcx          arg reg, caller-saves
-  %rdx          arg reg, caller-saves
-  %rsi          arg reg, caller-saves
-  %rdi          arg reg, caller-saves
-  %rbp          YES (our *prime* register)
-  %rsp          (unavailable - stack pointer)
-  %r8           arg reg, caller-saves
-  %r9           arg reg, caller-saves
-  %r10          caller-saves
-  %r11          caller-saves
-  %r12          YES
-  %r13          YES
-  %r14          YES
-  %r15          YES
-
-  %xmm0-7       arg regs, caller-saves
-  %xmm8-15      caller-saves
-
-  Use the caller-saves regs for Rn, because we don't always have to
-  save those (as opposed to Sp/Hp/SpLim etc. which always have to be
-  saved).
-
-  --------------------------------------------------------------------------- */
-
-#elif defined(MACHREGS_x86_64)
-
-#define REG(x) __asm__("%" #x)
-
-#define REG_Base  r13
-#define REG_Sp    rbp
-#define REG_Hp    r12
-#define REG_R1    rbx
-#define REG_R2    r14
-#define REG_R3    rsi
-#define REG_R4    rdi
-#define REG_R5    r8
-#define REG_R6    r9
-#define REG_SpLim r15
-#define REG_MachSp  rsp
-
-/*
-Map both Fn and Dn to register xmmn so that we can pass a function any
-combination of up to six Float# or Double# arguments without touching
-the stack. See Note [Overlapping global registers] for implications.
-*/
-
-#define REG_F1    xmm1
-#define REG_F2    xmm2
-#define REG_F3    xmm3
-#define REG_F4    xmm4
-#define REG_F5    xmm5
-#define REG_F6    xmm6
-
-#define REG_D1    xmm1
-#define REG_D2    xmm2
-#define REG_D3    xmm3
-#define REG_D4    xmm4
-#define REG_D5    xmm5
-#define REG_D6    xmm6
-
-#define REG_XMM1    xmm1
-#define REG_XMM2    xmm2
-#define REG_XMM3    xmm3
-#define REG_XMM4    xmm4
-#define REG_XMM5    xmm5
-#define REG_XMM6    xmm6
-
-#define REG_YMM1    ymm1
-#define REG_YMM2    ymm2
-#define REG_YMM3    ymm3
-#define REG_YMM4    ymm4
-#define REG_YMM5    ymm5
-#define REG_YMM6    ymm6
-
-#define REG_ZMM1    zmm1
-#define REG_ZMM2    zmm2
-#define REG_ZMM3    zmm3
-#define REG_ZMM4    zmm4
-#define REG_ZMM5    zmm5
-#define REG_ZMM6    zmm6
-
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_R3
-#define CALLER_SAVES_R4
-#endif
-#define CALLER_SAVES_R5
-#define CALLER_SAVES_R6
-
-#define CALLER_SAVES_F1
-#define CALLER_SAVES_F2
-#define CALLER_SAVES_F3
-#define CALLER_SAVES_F4
-#define CALLER_SAVES_F5
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_F6
-#endif
-
-#define CALLER_SAVES_D1
-#define CALLER_SAVES_D2
-#define CALLER_SAVES_D3
-#define CALLER_SAVES_D4
-#define CALLER_SAVES_D5
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_D6
-#endif
-
-#define CALLER_SAVES_XMM1
-#define CALLER_SAVES_XMM2
-#define CALLER_SAVES_XMM3
-#define CALLER_SAVES_XMM4
-#define CALLER_SAVES_XMM5
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_XMM6
-#endif
-
-#define CALLER_SAVES_YMM1
-#define CALLER_SAVES_YMM2
-#define CALLER_SAVES_YMM3
-#define CALLER_SAVES_YMM4
-#define CALLER_SAVES_YMM5
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_YMM6
-#endif
-
-#define CALLER_SAVES_ZMM1
-#define CALLER_SAVES_ZMM2
-#define CALLER_SAVES_ZMM3
-#define CALLER_SAVES_ZMM4
-#define CALLER_SAVES_ZMM5
-#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_ZMM6
-#endif
-
-#define MAX_REAL_VANILLA_REG 6
-#define MAX_REAL_FLOAT_REG   6
-#define MAX_REAL_DOUBLE_REG  6
-#define MAX_REAL_LONG_REG    0
-#define MAX_REAL_XMM_REG     6
-#define MAX_REAL_YMM_REG     6
-#define MAX_REAL_ZMM_REG     6
-
-/* -----------------------------------------------------------------------------
-   The PowerPC register mapping
-
-   0            system glue?    (caller-save, volatile)
-   1            SP              (callee-save, non-volatile)
-   2            AIX, powerpc64-linux:
-                    RTOC        (a strange special case)
-                powerpc32-linux:
-                                reserved for use by system
+/* Define STG <-> machine register mappings. */
+#if defined(MACHREGS_i386) || defined(MACHREGS_x86_64)
 
-   3-10         args/return     (caller-save, volatile)
-   11,12        system glue?    (caller-save, volatile)
-   13           on 64-bit:      reserved for thread state pointer
-                on 32-bit:      (callee-save, non-volatile)
-   14-31                        (callee-save, non-volatile)
-
-   f0                           (caller-save, volatile)
-   f1-f13       args/return     (caller-save, volatile)
-   f14-f31                      (callee-save, non-volatile)
-
-   \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
-   \tr{0}--\tr{12} are caller-save registers.
-
-   \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
-
-   We can do the Whole Business with callee-save registers only!
-   -------------------------------------------------------------------------- */
+#include "MachRegs/x86.h"
 
 #elif defined(MACHREGS_powerpc)
 
-#define REG(x) __asm__(#x)
-
-#define REG_R1          r14
-#define REG_R2          r15
-#define REG_R3          r16
-#define REG_R4          r17
-#define REG_R5          r18
-#define REG_R6          r19
-#define REG_R7          r20
-#define REG_R8          r21
-#define REG_R9          r22
-#define REG_R10         r23
-
-#define REG_F1          fr14
-#define REG_F2          fr15
-#define REG_F3          fr16
-#define REG_F4          fr17
-#define REG_F5          fr18
-#define REG_F6          fr19
-
-#define REG_D1          fr20
-#define REG_D2          fr21
-#define REG_D3          fr22
-#define REG_D4          fr23
-#define REG_D5          fr24
-#define REG_D6          fr25
-
-#define REG_Sp          r24
-#define REG_SpLim       r25
-#define REG_Hp          r26
-#define REG_Base        r27
-
-#define MAX_REAL_FLOAT_REG   6
-#define MAX_REAL_DOUBLE_REG  6
-
-/* -----------------------------------------------------------------------------
-   The ARM EABI register mapping
-
-   Here we consider ARM mode (i.e. 32bit isns)
-   and also CPU with full VFPv3 implementation
-
-   ARM registers (see Chapter 5.1 in ARM IHI 0042D and
-   Section 9.2.2 in ARM Software Development Toolkit Reference Guide)
-
-   r15  PC         The Program Counter.
-   r14  LR         The Link Register.
-   r13  SP         The Stack Pointer.
-   r12  IP         The Intra-Procedure-call scratch register.
-   r11  v8/fp      Variable-register 8.
-   r10  v7/sl      Variable-register 7.
-   r9   v6/SB/TR   Platform register. The meaning of this register is
-                   defined by the platform standard.
-   r8   v5         Variable-register 5.
-   r7   v4         Variable register 4.
-   r6   v3         Variable register 3.
-   r5   v2         Variable register 2.
-   r4   v1         Variable register 1.
-   r3   a4         Argument / scratch register 4.
-   r2   a3         Argument / scratch register 3.
-   r1   a2         Argument / result / scratch register 2.
-   r0   a1         Argument / result / scratch register 1.
-
-   VFPv2/VFPv3/NEON registers
-   s0-s15/d0-d7/q0-q3    Argument / result/ scratch registers
-   s16-s31/d8-d15/q4-q7  callee-saved registers (must be preserved across
-                         subroutine calls)
-
-   VFPv3/NEON registers (added to the VFPv2 registers set)
-   d16-d31/q8-q15        Argument / result/ scratch registers
-   ----------------------------------------------------------------------------- */
+#include "MachRegs/ppc.h"
 
 #elif defined(MACHREGS_arm)
 
-#define REG(x) __asm__(#x)
-
-#define REG_Base        r4
-#define REG_Sp          r5
-#define REG_Hp          r6
-#define REG_R1          r7
-#define REG_R2          r8
-#define REG_R3          r9
-#define REG_R4          r10
-#define REG_SpLim       r11
-
-#if !defined(arm_HOST_ARCH_PRE_ARMv6)
-/* d8 */
-#define REG_F1    s16
-#define REG_F2    s17
-/* d9 */
-#define REG_F3    s18
-#define REG_F4    s19
-
-#define REG_D1    d10
-#define REG_D2    d11
-#endif
-
-/* -----------------------------------------------------------------------------
-   The ARMv8/AArch64 ABI register mapping
-
-   The AArch64 provides 31 64-bit general purpose registers
-   and 32 128-bit SIMD/floating point registers.
-
-   General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
-
-   Register | Special | Role in the procedure call standard
-   ---------+---------+------------------------------------
-     SP     |         | The Stack Pointer
-     r30    |  LR     | The Link Register
-     r29    |  FP     | The Frame Pointer
-   r19-r28  |         | Callee-saved registers
-     r18    |         | The Platform Register, if needed;
-            |         | or temporary register
-     r17    |  IP1    | The second intra-procedure-call temporary register
-     r16    |  IP0    | The first intra-procedure-call scratch register
-    r9-r15  |         | Temporary registers
-     r8     |         | Indirect result location register
-    r0-r7   |         | Parameter/result registers
-
-
-   FPU/SIMD registers
-
-   s/d/q/v0-v7    Argument / result/ scratch registers
-   s/d/q/v8-v15   callee-saved registers (must be preserved across subroutine calls,
-                  but only bottom 64-bit value needs to be preserved)
-   s/d/q/v16-v31  temporary registers
-
-   ----------------------------------------------------------------------------- */
+#include "MachRegs/arm32.h"
 
 #elif defined(MACHREGS_aarch64)
 
-#define REG(x) __asm__(#x)
-
-#define REG_Base        r19
-#define REG_Sp          r20
-#define REG_Hp          r21
-#define REG_R1          r22
-#define REG_R2          r23
-#define REG_R3          r24
-#define REG_R4          r25
-#define REG_R5          r26
-#define REG_R6          r27
-#define REG_SpLim       r28
-
-#define REG_F1          s8
-#define REG_F2          s9
-#define REG_F3          s10
-#define REG_F4          s11
-
-#define REG_D1          d12
-#define REG_D2          d13
-#define REG_D3          d14
-#define REG_D4          d15
-
-#define REG_XMM1        q4
-#define REG_XMM2        q5
-
-#define CALLER_SAVES_XMM1
-#define CALLER_SAVES_XMM2
-
-/* -----------------------------------------------------------------------------
-   The s390x register mapping
-
-   Register    | Role(s)                                 | Call effect
-   ------------+-------------------------------------+-----------------
-   r0,r1       | -                                       | caller-saved
-   r2          | Argument / return value                 | caller-saved
-   r3,r4,r5    | Arguments                               | caller-saved
-   r6          | Argument                                | callee-saved
-   r7...r11    | -                                       | callee-saved
-   r12         | (Commonly used as GOT pointer)          | callee-saved
-   r13         | (Commonly used as literal pool pointer) | callee-saved
-   r14         | Return address                          | caller-saved
-   r15         | Stack pointer                           | callee-saved
-   f0          | Argument / return value                 | caller-saved
-   f2,f4,f6    | Arguments                               | caller-saved
-   f1,f3,f5,f7 | -                                       | caller-saved
-   f8...f15    | -                                       | callee-saved
-   v0...v31    | -                                       | caller-saved
-
-   Each general purpose register r0 through r15 as well as each floating-point
-   register f0 through f15 is 64 bits wide. Each vector register v0 through v31
-   is 128 bits wide.
-
-   Note, the vector registers v0 through v15 overlap with the floating-point
-   registers f0 through f15.
-
-   -------------------------------------------------------------------------- */
+#include "MachRegs/arm64.h"
 
 #elif defined(MACHREGS_s390x)
 
-#define REG(x) __asm__("%" #x)
-
-#define REG_Base        r7
-#define REG_Sp          r8
-#define REG_Hp          r10
-#define REG_R1          r11
-#define REG_R2          r12
-#define REG_R3          r13
-#define REG_R4          r6
-#define REG_R5          r2
-#define REG_R6          r3
-#define REG_R7          r4
-#define REG_R8          r5
-#define REG_SpLim       r9
-#define REG_MachSp      r15
-
-#define REG_F1          f8
-#define REG_F2          f9
-#define REG_F3          f10
-#define REG_F4          f11
-#define REG_F5          f0
-#define REG_F6          f1
-
-#define REG_D1          f12
-#define REG_D2          f13
-#define REG_D3          f14
-#define REG_D4          f15
-#define REG_D5          f2
-#define REG_D6          f3
-
-#define CALLER_SAVES_R5
-#define CALLER_SAVES_R6
-#define CALLER_SAVES_R7
-#define CALLER_SAVES_R8
-
-#define CALLER_SAVES_F5
-#define CALLER_SAVES_F6
-
-#define CALLER_SAVES_D5
-#define CALLER_SAVES_D6
-
-/* -----------------------------------------------------------------------------
-   The riscv64 register mapping
-
-   Register    | Role(s)                                 | Call effect
-   ------------+-----------------------------------------+-------------
-   zero        | Hard-wired zero                         | -
-   ra          | Return address                          | caller-saved
-   sp          | Stack pointer                           | callee-saved
-   gp          | Global pointer                          | callee-saved
-   tp          | Thread pointer                          | callee-saved
-   t0,t1,t2    | -                                       | caller-saved
-   s0          | Frame pointer                           | callee-saved
-   s1          | -                                       | callee-saved
-   a0,a1       | Arguments / return values               | caller-saved
-   a2..a7      | Arguments                               | caller-saved
-   s2..s11     | -                                       | callee-saved
-   t3..t6      | -                                       | caller-saved
-   ft0..ft7    | -                                       | caller-saved
-   fs0,fs1     | -                                       | callee-saved
-   fa0,fa1     | Arguments / return values               | caller-saved
-   fa2..fa7    | Arguments                               | caller-saved
-   fs2..fs11   | -                                       | callee-saved
-   ft8..ft11   | -                                       | caller-saved
-
-   Each general purpose register as well as each floating-point
-   register is 64 bits wide.
-
-   -------------------------------------------------------------------------- */
+#include "MachRegs/s390x.h"
 
 #elif defined(MACHREGS_riscv64)
 
-#define REG(x) __asm__(#x)
-
-#define REG_Base        s1
-#define REG_Sp          s2
-#define REG_Hp          s3
-#define REG_R1          s4
-#define REG_R2          s5
-#define REG_R3          s6
-#define REG_R4          s7
-#define REG_R5          s8
-#define REG_R6          s9
-#define REG_R7          s10
-#define REG_SpLim       s11
-
-#define REG_F1          fs0
-#define REG_F2          fs1
-#define REG_F3          fs2
-#define REG_F4          fs3
-#define REG_F5          fs4
-#define REG_F6          fs5
-
-#define REG_D1          fs6
-#define REG_D2          fs7
-#define REG_D3          fs8
-#define REG_D4          fs9
-#define REG_D5          fs10
-#define REG_D6          fs11
-
-#define MAX_REAL_FLOAT_REG   6
-#define MAX_REAL_DOUBLE_REG  6
+#include "MachRegs/riscv64.h"
 
 #elif defined(MACHREGS_wasm32)
 
-#define REG_R1             1
-#define REG_R2             2
-#define REG_R3             3
-#define REG_R4             4
-#define REG_R5             5
-#define REG_R6             6
-#define REG_R7             7
-#define REG_R8             8
-#define REG_R9             9
-#define REG_R10            10
-
-#define REG_F1             11
-#define REG_F2             12
-#define REG_F3             13
-#define REG_F4             14
-#define REG_F5             15
-#define REG_F6             16
-
-#define REG_D1             17
-#define REG_D2             18
-#define REG_D3             19
-#define REG_D4             20
-#define REG_D5             21
-#define REG_D6             22
-
-#define REG_L1             23
-
-#define REG_Sp             24
-#define REG_SpLim          25
-#define REG_Hp             26
-#define REG_HpLim          27
-#define REG_CCCS           28
-
-/* -----------------------------------------------------------------------------
-   The loongarch64 register mapping
-
-   Register    | Role(s)                                 | Call effect
-   ------------+-----------------------------------------+-------------
-   zero        | Hard-wired zero                         | -
-   ra          | Return address                          | caller-saved
-   tp          | Thread pointer                          | -
-   sp          | Stack pointer                           | callee-saved
-   a0,a1       | Arguments / return values               | caller-saved
-   a2..a7      | Arguments                               | caller-saved
-   t0..t8      | -                                       | caller-saved
-   u0          | Reserve                                 | -
-   fp          | Frame pointer                           | callee-saved
-   s0..s8      | -                                       | callee-saved
-   fa0,fa1     | Arguments / return values               | caller-saved
-   fa2..fa7    | Arguments                               | caller-saved
-   ft0..ft15   | -                                       | caller-saved
-   fs0..fs7    | -                                       | callee-saved
-
-   Each general purpose register as well as each floating-point
-   register is 64 bits wide, also, the u0 register is called r21 in some cases.
+#include "MachRegs/wasm32.h"
 
-   -------------------------------------------------------------------------- */
 #elif defined(MACHREGS_loongarch64)
 
-#define REG(x) __asm__("$" #x)
-
-#define REG_Base        s0
-#define REG_Sp          s1
-#define REG_Hp          s2
-#define REG_R1          s3
-#define REG_R2          s4
-#define REG_R3          s5
-#define REG_R4          s6
-#define REG_R5          s7
-#define REG_SpLim       s8
-
-#define REG_F1          fs0
-#define REG_F2          fs1
-#define REG_F3          fs2
-#define REG_F4          fs3
-
-#define REG_D1          fs4
-#define REG_D2          fs5
-#define REG_D3          fs6
-#define REG_D4          fs7
-
-#define MAX_REAL_FLOAT_REG   4
-#define MAX_REAL_DOUBLE_REG  4
+#include "MachRegs/loongarch64.h"
 
 #else
 


=====================================
rts/include/stg/MachRegs/arm32.h
=====================================
@@ -0,0 +1,60 @@
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The ARM EABI register mapping
+
+   Here we consider ARM mode (i.e. 32bit isns)
+   and also CPU with full VFPv3 implementation
+
+   ARM registers (see Chapter 5.1 in ARM IHI 0042D and
+   Section 9.2.2 in ARM Software Development Toolkit Reference Guide)
+
+   r15  PC         The Program Counter.
+   r14  LR         The Link Register.
+   r13  SP         The Stack Pointer.
+   r12  IP         The Intra-Procedure-call scratch register.
+   r11  v8/fp      Variable-register 8.
+   r10  v7/sl      Variable-register 7.
+   r9   v6/SB/TR   Platform register. The meaning of this register is
+                   defined by the platform standard.
+   r8   v5         Variable-register 5.
+   r7   v4         Variable register 4.
+   r6   v3         Variable register 3.
+   r5   v2         Variable register 2.
+   r4   v1         Variable register 1.
+   r3   a4         Argument / scratch register 4.
+   r2   a3         Argument / scratch register 3.
+   r1   a2         Argument / result / scratch register 2.
+   r0   a1         Argument / result / scratch register 1.
+
+   VFPv2/VFPv3/NEON registers
+   s0-s15/d0-d7/q0-q3    Argument / result/ scratch registers
+   s16-s31/d8-d15/q4-q7  callee-saved registers (must be preserved across
+                         subroutine calls)
+
+   VFPv3/NEON registers (added to the VFPv2 registers set)
+   d16-d31/q8-q15        Argument / result/ scratch registers
+   ----------------------------------------------------------------------------- */
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base        r4
+#define REG_Sp          r5
+#define REG_Hp          r6
+#define REG_R1          r7
+#define REG_R2          r8
+#define REG_R3          r9
+#define REG_R4          r10
+#define REG_SpLim       r11
+
+#if !defined(arm_HOST_ARCH_PRE_ARMv6)
+/* d8 */
+#define REG_F1    s16
+#define REG_F2    s17
+/* d9 */
+#define REG_F3    s18
+#define REG_F4    s19
+
+#define REG_D1    d10
+#define REG_D2    d11
+#endif
\ No newline at end of file


=====================================
rts/include/stg/MachRegs/arm64.h
=====================================
@@ -0,0 +1,64 @@
+#pragma once
+
+
+/* -----------------------------------------------------------------------------
+   The ARMv8/AArch64 ABI register mapping
+
+   The AArch64 provides 31 64-bit general purpose registers
+   and 32 128-bit SIMD/floating point registers.
+
+   General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
+
+   Register | Special | Role in the procedure call standard
+   ---------+---------+------------------------------------
+     SP     |         | The Stack Pointer
+     r30    |  LR     | The Link Register
+     r29    |  FP     | The Frame Pointer
+   r19-r28  |         | Callee-saved registers
+     r18    |         | The Platform Register, if needed;
+            |         | or temporary register
+     r17    |  IP1    | The second intra-procedure-call temporary register
+     r16    |  IP0    | The first intra-procedure-call scratch register
+    r9-r15  |         | Temporary registers
+     r8     |         | Indirect result location register
+    r0-r7   |         | Parameter/result registers
+
+
+   FPU/SIMD registers
+
+   s/d/q/v0-v7    Argument / result/ scratch registers
+   s/d/q/v8-v15   callee-saved registers (must be preserved across subroutine calls,
+                  but only bottom 64-bit value needs to be preserved)
+   s/d/q/v16-v31  temporary registers
+
+   ----------------------------------------------------------------------------- */
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base        r19
+#define REG_Sp          r20
+#define REG_Hp          r21
+#define REG_R1          r22
+#define REG_R2          r23
+#define REG_R3          r24
+#define REG_R4          r25
+#define REG_R5          r26
+#define REG_R6          r27
+#define REG_SpLim       r28
+
+#define REG_F1          s8
+#define REG_F2          s9
+#define REG_F3          s10
+#define REG_F4          s11
+
+#define REG_D1          d12
+#define REG_D2          d13
+#define REG_D3          d14
+#define REG_D4          d15
+
+#define REG_XMM1        q4
+#define REG_XMM2        q5
+
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+


=====================================
rts/include/stg/MachRegs/loongarch64.h
=====================================
@@ -0,0 +1,51 @@
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The loongarch64 register mapping
+
+   Register    | Role(s)                                 | Call effect
+   ------------+-----------------------------------------+-------------
+   zero        | Hard-wired zero                         | -
+   ra          | Return address                          | caller-saved
+   tp          | Thread pointer                          | -
+   sp          | Stack pointer                           | callee-saved
+   a0,a1       | Arguments / return values               | caller-saved
+   a2..a7      | Arguments                               | caller-saved
+   t0..t8      | -                                       | caller-saved
+   u0          | Reserve                                 | -
+   fp          | Frame pointer                           | callee-saved
+   s0..s8      | -                                       | callee-saved
+   fa0,fa1     | Arguments / return values               | caller-saved
+   fa2..fa7    | Arguments                               | caller-saved
+   ft0..ft15   | -                                       | caller-saved
+   fs0..fs7    | -                                       | callee-saved
+
+   Each general purpose register as well as each floating-point
+   register is 64 bits wide, also, the u0 register is called r21 in some cases.
+
+   -------------------------------------------------------------------------- */
+
+#define REG(x) __asm__("$" #x)
+
+#define REG_Base        s0
+#define REG_Sp          s1
+#define REG_Hp          s2
+#define REG_R1          s3
+#define REG_R2          s4
+#define REG_R3          s5
+#define REG_R4          s6
+#define REG_R5          s7
+#define REG_SpLim       s8
+
+#define REG_F1          fs0
+#define REG_F2          fs1
+#define REG_F3          fs2
+#define REG_F4          fs3
+
+#define REG_D1          fs4
+#define REG_D2          fs5
+#define REG_D3          fs6
+#define REG_D4          fs7
+
+#define MAX_REAL_FLOAT_REG   4
+#define MAX_REAL_DOUBLE_REG  4


=====================================
rts/include/stg/MachRegs/ppc.h
=====================================
@@ -0,0 +1,65 @@
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The PowerPC register mapping
+
+   0            system glue?    (caller-save, volatile)
+   1            SP              (callee-save, non-volatile)
+   2            AIX, powerpc64-linux:
+                    RTOC        (a strange special case)
+                powerpc32-linux:
+                                reserved for use by system
+
+   3-10         args/return     (caller-save, volatile)
+   11,12        system glue?    (caller-save, volatile)
+   13           on 64-bit:      reserved for thread state pointer
+                on 32-bit:      (callee-save, non-volatile)
+   14-31                        (callee-save, non-volatile)
+
+   f0                           (caller-save, volatile)
+   f1-f13       args/return     (caller-save, volatile)
+   f14-f31                      (callee-save, non-volatile)
+
+   \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
+   \tr{0}--\tr{12} are caller-save registers.
+
+   \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
+
+   We can do the Whole Business with callee-save registers only!
+   -------------------------------------------------------------------------- */
+
+
+#define REG(x) __asm__(#x)
+
+#define REG_R1          r14
+#define REG_R2          r15
+#define REG_R3          r16
+#define REG_R4          r17
+#define REG_R5          r18
+#define REG_R6          r19
+#define REG_R7          r20
+#define REG_R8          r21
+#define REG_R9          r22
+#define REG_R10         r23
+
+#define REG_F1          fr14
+#define REG_F2          fr15
+#define REG_F3          fr16
+#define REG_F4          fr17
+#define REG_F5          fr18
+#define REG_F6          fr19
+
+#define REG_D1          fr20
+#define REG_D2          fr21
+#define REG_D3          fr22
+#define REG_D4          fr23
+#define REG_D5          fr24
+#define REG_D6          fr25
+
+#define REG_Sp          r24
+#define REG_SpLim       r25
+#define REG_Hp          r26
+#define REG_Base        r27
+
+#define MAX_REAL_FLOAT_REG   6
+#define MAX_REAL_DOUBLE_REG  6
\ No newline at end of file


=====================================
rts/include/stg/MachRegs/riscv64.h
=====================================
@@ -0,0 +1,61 @@
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The riscv64 register mapping
+
+   Register    | Role(s)                                 | Call effect
+   ------------+-----------------------------------------+-------------
+   zero        | Hard-wired zero                         | -
+   ra          | Return address                          | caller-saved
+   sp          | Stack pointer                           | callee-saved
+   gp          | Global pointer                          | callee-saved
+   tp          | Thread pointer                          | callee-saved
+   t0,t1,t2    | -                                       | caller-saved
+   s0          | Frame pointer                           | callee-saved
+   s1          | -                                       | callee-saved
+   a0,a1       | Arguments / return values               | caller-saved
+   a2..a7      | Arguments                               | caller-saved
+   s2..s11     | -                                       | callee-saved
+   t3..t6      | -                                       | caller-saved
+   ft0..ft7    | -                                       | caller-saved
+   fs0,fs1     | -                                       | callee-saved
+   fa0,fa1     | Arguments / return values               | caller-saved
+   fa2..fa7    | Arguments                               | caller-saved
+   fs2..fs11   | -                                       | callee-saved
+   ft8..ft11   | -                                       | caller-saved
+
+   Each general purpose register as well as each floating-point
+   register is 64 bits wide.
+
+   -------------------------------------------------------------------------- */
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base        s1
+#define REG_Sp          s2
+#define REG_Hp          s3
+#define REG_R1          s4
+#define REG_R2          s5
+#define REG_R3          s6
+#define REG_R4          s7
+#define REG_R5          s8
+#define REG_R6          s9
+#define REG_R7          s10
+#define REG_SpLim       s11
+
+#define REG_F1          fs0
+#define REG_F2          fs1
+#define REG_F3          fs2
+#define REG_F4          fs3
+#define REG_F5          fs4
+#define REG_F6          fs5
+
+#define REG_D1          fs6
+#define REG_D2          fs7
+#define REG_D3          fs8
+#define REG_D4          fs9
+#define REG_D5          fs10
+#define REG_D6          fs11
+
+#define MAX_REAL_FLOAT_REG   6
+#define MAX_REAL_DOUBLE_REG  6
\ No newline at end of file


=====================================
rts/include/stg/MachRegs/s390x.h
=====================================
@@ -0,0 +1,72 @@
+#pragma once
+
+/* -----------------------------------------------------------------------------
+   The s390x register mapping
+
+   Register    | Role(s)                                 | Call effect
+   ------------+-------------------------------------+-----------------
+   r0,r1       | -                                       | caller-saved
+   r2          | Argument / return value                 | caller-saved
+   r3,r4,r5    | Arguments                               | caller-saved
+   r6          | Argument                                | callee-saved
+   r7...r11    | -                                       | callee-saved
+   r12         | (Commonly used as GOT pointer)          | callee-saved
+   r13         | (Commonly used as literal pool pointer) | callee-saved
+   r14         | Return address                          | caller-saved
+   r15         | Stack pointer                           | callee-saved
+   f0          | Argument / return value                 | caller-saved
+   f2,f4,f6    | Arguments                               | caller-saved
+   f1,f3,f5,f7 | -                                       | caller-saved
+   f8...f15    | -                                       | callee-saved
+   v0...v31    | -                                       | caller-saved
+
+   Each general purpose register r0 through r15 as well as each floating-point
+   register f0 through f15 is 64 bits wide. Each vector register v0 through v31
+   is 128 bits wide.
+
+   Note, the vector registers v0 through v15 overlap with the floating-point
+   registers f0 through f15.
+
+   -------------------------------------------------------------------------- */
+
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_Base        r7
+#define REG_Sp          r8
+#define REG_Hp          r10
+#define REG_R1          r11
+#define REG_R2          r12
+#define REG_R3          r13
+#define REG_R4          r6
+#define REG_R5          r2
+#define REG_R6          r3
+#define REG_R7          r4
+#define REG_R8          r5
+#define REG_SpLim       r9
+#define REG_MachSp      r15
+
+#define REG_F1          f8
+#define REG_F2          f9
+#define REG_F3          f10
+#define REG_F4          f11
+#define REG_F5          f0
+#define REG_F6          f1
+
+#define REG_D1          f12
+#define REG_D2          f13
+#define REG_D3          f14
+#define REG_D4          f15
+#define REG_D5          f2
+#define REG_D6          f3
+
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+#define CALLER_SAVES_R7
+#define CALLER_SAVES_R8
+
+#define CALLER_SAVES_F5
+#define CALLER_SAVES_F6
+
+#define CALLER_SAVES_D5
+#define CALLER_SAVES_D6
\ No newline at end of file


=====================================
rts/include/stg/MachRegs/wasm32.h
=====================================


=====================================
rts/include/stg/MachRegs/x86.h
=====================================
@@ -0,0 +1,210 @@
+/* -----------------------------------------------------------------------------
+   The x86 register mapping
+
+   Ok, we've only got 6 general purpose registers, a frame pointer and a
+   stack pointer.  \tr{%eax} and \tr{%edx} are return values from C functions,
+   hence they get trashed across ccalls and are caller saves. \tr{%ebx},
+   \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
+
+   Reg     STG-Reg
+   ---------------
+   ebx     Base
+   ebp     Sp
+   esi     R1
+   edi     Hp
+
+   Leaving SpLim out of the picture.
+   -------------------------------------------------------------------------- */
+
+#if defined(MACHREGS_i386)
+
+#define REG(x) __asm__("%" #x)
+
+#if !defined(not_doing_dynamic_linking)
+#define REG_Base    ebx
+#endif
+#define REG_Sp      ebp
+
+#if !defined(STOLEN_X86_REGS)
+#define STOLEN_X86_REGS 4
+#endif
+
+#if STOLEN_X86_REGS >= 3
+# define REG_R1     esi
+#endif
+
+#if STOLEN_X86_REGS >= 4
+# define REG_Hp     edi
+#endif
+#define REG_MachSp  esp
+
+#define REG_XMM1    xmm0
+#define REG_XMM2    xmm1
+#define REG_XMM3    xmm2
+#define REG_XMM4    xmm3
+
+#define REG_YMM1    ymm0
+#define REG_YMM2    ymm1
+#define REG_YMM3    ymm2
+#define REG_YMM4    ymm3
+
+#define REG_ZMM1    zmm0
+#define REG_ZMM2    zmm1
+#define REG_ZMM3    zmm2
+#define REG_ZMM4    zmm3
+
+#define MAX_REAL_VANILLA_REG 1  /* always, since it defines the entry conv */
+#define MAX_REAL_FLOAT_REG   0
+#define MAX_REAL_DOUBLE_REG  0
+#define MAX_REAL_LONG_REG    0
+#define MAX_REAL_XMM_REG     4
+#define MAX_REAL_YMM_REG     4
+#define MAX_REAL_ZMM_REG     4
+
+/* -----------------------------------------------------------------------------
+  The x86-64 register mapping
+
+  %rax          caller-saves, don't steal this one
+  %rbx          YES
+  %rcx          arg reg, caller-saves
+  %rdx          arg reg, caller-saves
+  %rsi          arg reg, caller-saves
+  %rdi          arg reg, caller-saves
+  %rbp          YES (our *prime* register)
+  %rsp          (unavailable - stack pointer)
+  %r8           arg reg, caller-saves
+  %r9           arg reg, caller-saves
+  %r10          caller-saves
+  %r11          caller-saves
+  %r12          YES
+  %r13          YES
+  %r14          YES
+  %r15          YES
+
+  %xmm0-7       arg regs, caller-saves
+  %xmm8-15      caller-saves
+
+  Use the caller-saves regs for Rn, because we don't always have to
+  save those (as opposed to Sp/Hp/SpLim etc. which always have to be
+  saved).
+
+  --------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_x86_64)
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_Base  r13
+#define REG_Sp    rbp
+#define REG_Hp    r12
+#define REG_R1    rbx
+#define REG_R2    r14
+#define REG_R3    rsi
+#define REG_R4    rdi
+#define REG_R5    r8
+#define REG_R6    r9
+#define REG_SpLim r15
+#define REG_MachSp  rsp
+
+/*
+Map both Fn and Dn to register xmmn so that we can pass a function any
+combination of up to six Float# or Double# arguments without touching
+the stack. See Note [Overlapping global registers] for implications.
+*/
+
+#define REG_F1    xmm1
+#define REG_F2    xmm2
+#define REG_F3    xmm3
+#define REG_F4    xmm4
+#define REG_F5    xmm5
+#define REG_F6    xmm6
+
+#define REG_D1    xmm1
+#define REG_D2    xmm2
+#define REG_D3    xmm3
+#define REG_D4    xmm4
+#define REG_D5    xmm5
+#define REG_D6    xmm6
+
+#define REG_XMM1    xmm1
+#define REG_XMM2    xmm2
+#define REG_XMM3    xmm3
+#define REG_XMM4    xmm4
+#define REG_XMM5    xmm5
+#define REG_XMM6    xmm6
+
+#define REG_YMM1    ymm1
+#define REG_YMM2    ymm2
+#define REG_YMM3    ymm3
+#define REG_YMM4    ymm4
+#define REG_YMM5    ymm5
+#define REG_YMM6    ymm6
+
+#define REG_ZMM1    zmm1
+#define REG_ZMM2    zmm2
+#define REG_ZMM3    zmm3
+#define REG_ZMM4    zmm4
+#define REG_ZMM5    zmm5
+#define REG_ZMM6    zmm6
+
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_R3
+#define CALLER_SAVES_R4
+#endif
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+
+#define CALLER_SAVES_F1
+#define CALLER_SAVES_F2
+#define CALLER_SAVES_F3
+#define CALLER_SAVES_F4
+#define CALLER_SAVES_F5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_F6
+#endif
+
+#define CALLER_SAVES_D1
+#define CALLER_SAVES_D2
+#define CALLER_SAVES_D3
+#define CALLER_SAVES_D4
+#define CALLER_SAVES_D5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_D6
+#endif
+
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+#define CALLER_SAVES_XMM3
+#define CALLER_SAVES_XMM4
+#define CALLER_SAVES_XMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_XMM6
+#endif
+
+#define CALLER_SAVES_YMM1
+#define CALLER_SAVES_YMM2
+#define CALLER_SAVES_YMM3
+#define CALLER_SAVES_YMM4
+#define CALLER_SAVES_YMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_YMM6
+#endif
+
+#define CALLER_SAVES_ZMM1
+#define CALLER_SAVES_ZMM2
+#define CALLER_SAVES_ZMM3
+#define CALLER_SAVES_ZMM4
+#define CALLER_SAVES_ZMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_ZMM6
+#endif
+
+#define MAX_REAL_VANILLA_REG 6
+#define MAX_REAL_FLOAT_REG   6
+#define MAX_REAL_DOUBLE_REG  6
+#define MAX_REAL_LONG_REG    0
+#define MAX_REAL_XMM_REG     6
+#define MAX_REAL_YMM_REG     6
+#define MAX_REAL_ZMM_REG     6
+
+#endif  /* MACHREGS_i386 || MACHREGS_x86_64 */
\ No newline at end of file


=====================================
rts/rts.cabal.in
=====================================
@@ -126,6 +126,14 @@ library
                         ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h
                         DerivedConstants.h
                         stg/MachRegs.h
+                        stg/MachRegs/arm32.h
+                        stg/MachRegs/arm64.h
+                        stg/MachRegs/loongarch64.h
+                        stg/MachRegs/ppc.h
+                        stg/MachRegs/riscv64.h
+                        stg/MachRegs/s390x.h
+                        stg/MachRegs/wasm32.h
+                        stg/MachRegs/x86.h
                         stg/MachRegsForHost.h
                         stg/Types.h
 
@@ -296,6 +304,14 @@ library
                         rts/storage/TSO.h
                         stg/DLL.h
                         stg/MachRegs.h
+                        stg/MachRegs/arm32.h
+                        stg/MachRegs/arm64.h
+                        stg/MachRegs/loongarch64.h
+                        stg/MachRegs/ppc.h
+                        stg/MachRegs/riscv64.h
+                        stg/MachRegs/s390x.h
+                        stg/MachRegs/wasm32.h
+                        stg/MachRegs/x86.h
                         stg/MachRegsForHost.h
                         stg/MiscClosures.h
                         stg/Prim.h


=====================================
testsuite/tests/hpc/T17073.stdout
=====================================
@@ -7,7 +7,7 @@
 100% alternatives used (0/0)
 100% local declarations used (0/0)
 100% top-level declarations used (1/1)
-hpc tools, version 0.68
+hpc tools, version 0.69
 Writing: Main.hs.html
 Writing: hpc_index.html
 Writing: hpc_index_fun.html


=====================================
utils/hpc
=====================================
@@ -1 +1 @@
-Subproject commit 2d75eb33d4c179b1c21000d32c2906ad273de0de
+Subproject commit 4b46380a06c16e38a5b9d623ab85538ee4b2319d



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f828f48ffc74d9eaaac6db61a7a8675cd57185a5...36ef67208607261d445263e9579355b4fc57ca58

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/f828f48ffc74d9eaaac6db61a7a8675cd57185a5...36ef67208607261d445263e9579355b4fc57ca58
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231008/292376df/attachment-0001.html>


More information about the ghc-commits mailing list