[Git][ghc/ghc][wip/ncg-simd] X86_64: xmm registers start at 0

sheaf (@sheaf) gitlab at gitlab.haskell.org
Wed Aug 14 10:53:34 UTC 2024



sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
a133c338 by sheaf at 2024-08-14T12:53:27+02:00
X86_64: xmm registers start at 0

The Cmm XmmReg/YmmReg/ZmmReg registers start at 1, but on X86 the vector
machine registers start at 0.
This was accounted for X86_32 in the MachRegs/x86.h file, but not for
X86_64 where REG_XMM1 was incorrectly defiend as xmm1 instead of xmm0.

Fixing this means that one can correctly use "foreign import prim"
with code that expects vector arguments in xmm0, xmm1, etc.

Fixes #25156

- - - - -


3 changed files:

- docs/users_guide/9.12.1-notes.rst
- rts/include/stg/MachRegs/x86.h
- testsuite/tests/codeGen/should_run/CallConv_x86_64.s


Changes:

=====================================
docs/users_guide/9.12.1-notes.rst
=====================================
@@ -96,6 +96,10 @@ Compiler
   operations. Other operations still require the LLVM backend. Contributors
   welcome!
 
+- On X86_64, GHC will now use register `xmm0` to pass the first floating-point
+  or vector argument in a `prim` call. This should only affects users of
+  `foreign import prim` on X86_64.
+
 - i386 Windows support is now completely removed amid massive cleanup
   of legacy code to pave way for Arm64 Windows support (`#24883
   <https://gitlab.haskell.org/ghc/ghc/-/issues/24883>`_). Rest


=====================================
rts/include/stg/MachRegs/x86.h
=====================================
@@ -112,40 +112,40 @@ combination of up to six Float# or Double# arguments without touching
 the stack. See Note [Overlapping global registers] for implications.
 */
 
-#define REG_F1    xmm1
-#define REG_F2    xmm2
-#define REG_F3    xmm3
-#define REG_F4    xmm4
-#define REG_F5    xmm5
-#define REG_F6    xmm6
-
-#define REG_D1    xmm1
-#define REG_D2    xmm2
-#define REG_D3    xmm3
-#define REG_D4    xmm4
-#define REG_D5    xmm5
-#define REG_D6    xmm6
-
-#define REG_XMM1    xmm1
-#define REG_XMM2    xmm2
-#define REG_XMM3    xmm3
-#define REG_XMM4    xmm4
-#define REG_XMM5    xmm5
-#define REG_XMM6    xmm6
-
-#define REG_YMM1    ymm1
-#define REG_YMM2    ymm2
-#define REG_YMM3    ymm3
-#define REG_YMM4    ymm4
-#define REG_YMM5    ymm5
-#define REG_YMM6    ymm6
-
-#define REG_ZMM1    zmm1
-#define REG_ZMM2    zmm2
-#define REG_ZMM3    zmm3
-#define REG_ZMM4    zmm4
-#define REG_ZMM5    zmm5
-#define REG_ZMM6    zmm6
+#define REG_F1    xmm0
+#define REG_F2    xmm1
+#define REG_F3    xmm2
+#define REG_F4    xmm3
+#define REG_F5    xmm4
+#define REG_F6    xmm5
+
+#define REG_D1    xmm0
+#define REG_D2    xmm1
+#define REG_D3    xmm2
+#define REG_D4    xmm3
+#define REG_D5    xmm4
+#define REG_D6    xmm5
+
+#define REG_XMM1    xmm0
+#define REG_XMM2    xmm1
+#define REG_XMM3    xmm2
+#define REG_XMM4    xmm3
+#define REG_XMM5    xmm4
+#define REG_XMM6    xmm5
+
+#define REG_YMM1    ymm0
+#define REG_YMM2    ymm1
+#define REG_YMM3    ymm2
+#define REG_YMM4    ymm3
+#define REG_YMM5    ymm4
+#define REG_YMM6    ymm5
+
+#define REG_ZMM1    zmm0
+#define REG_ZMM2    zmm1
+#define REG_ZMM3    zmm2
+#define REG_ZMM4    zmm3
+#define REG_ZMM5    zmm4
+#define REG_ZMM6    zmm5
 
 #if !defined(mingw32_HOST_OS)
 #define CALLER_SAVES_R3


=====================================
testsuite/tests/codeGen/should_run/CallConv_x86_64.s
=====================================
@@ -2,26 +2,25 @@
 _someFuncF:
 	.globl someFuncF
 someFuncF:
-	movss %xmm1,%xmm0
-	subss %xmm2,%xmm0
-	addss %xmm2,%xmm1
-	movss %xmm0,%xmm2
-	movss %xmm3,%xmm0
-	divss %xmm4,%xmm0
-	mulss %xmm4,%xmm3
-	movss %xmm0,%xmm4
+	movss %xmm1, %xmm4
+	movss %xmm0, %xmm1
+	subss %xmm4, %xmm1
+	addss %xmm4, %xmm0
+	movss %xmm3, %xmm4
+	movss %xmm2, %xmm3
+	divss %xmm4, %xmm3
+	mulss %xmm4, %xmm2
 	jmp *(%rbp)
-
 	.globl _someFuncD
 _someFuncD:
 	.globl someFuncD
 someFuncD:
-	movsd %xmm1,%xmm0
-	subsd %xmm2,%xmm0
-	addsd %xmm2,%xmm1
-	movsd %xmm0,%xmm2
-	movsd %xmm3,%xmm0
-	divsd %xmm4,%xmm0
-	mulsd %xmm4,%xmm3
-	movsd %xmm0,%xmm4
+	movsd %xmm1, %xmm4
+	movsd %xmm0, %xmm1
+	subsd %xmm4, %xmm1
+	addsd %xmm4, %xmm0
+	movsd %xmm3, %xmm4
+	movsd %xmm2, %xmm3
+	divsd %xmm4, %xmm3
+	mulsd %xmm4, %xmm2
 	jmp *(%rbp)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a133c338ed935dc321e81e46f772f3966a518ada

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a133c338ed935dc321e81e46f772f3966a518ada
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240814/1c56a987/attachment-0001.html>


More information about the ghc-commits mailing list