[commit: ghc] ghc-8.4: Mark xmm6 as caller saved in the register allocator for windows. (fe485f2)
git at git.haskell.org
git at git.haskell.org
Thu Feb 1 04:52:01 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : ghc-8.4
Link : http://ghc.haskell.org/trac/ghc/changeset/fe485f2961324d3b798d4dc8b1ccd27e887fa213/ghc
>---------------------------------------------------------------
commit fe485f2961324d3b798d4dc8b1ccd27e887fa213
Author: klebinger.andreas at gmx.at <klebinger.andreas at gmx.at>
Date: Wed Jan 31 21:39:19 2018 -0500
Mark xmm6 as caller saved in the register allocator for windows.
This prevents the register being picked up as a scratch register.
Otherwise the allocator would be free to use it before a call. This
fixes #14619.
Test Plan: ci, repro case on #14619
Reviewers: bgamari, Phyx, erikd, simonmar, RyanGlScott, simonpj
Reviewed By: Phyx, RyanGlScott, simonpj
Subscribers: simonpj, RyanGlScott, Phyx, rwbarton, thomie, carter
GHC Trac Issues: #14619
Differential Revision: https://phabricator.haskell.org/D4348
(cherry picked from commit add4e1f11b88cd603f6c01bc135eb576e1922a8e)
>---------------------------------------------------------------
fe485f2961324d3b798d4dc8b1ccd27e887fa213
compiler/nativeGen/X86/Regs.hs | 6 ++--
includes/rts/Constants.h | 8 +++--
rts/StgCRun.c | 33 ++++++++++++++---
testsuite/tests/codeGen/should_run/T14619.hs | 46 ++++++++++++++++++++++++
testsuite/tests/codeGen/should_run/T14619.stdout | 1 +
testsuite/tests/codeGen/should_run/all.T | 1 +
6 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 169d402..d6983b7 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -238,7 +238,6 @@ xmmregnos platform = [firstxmm .. lastxmm platform]
floatregnos :: Platform -> [RegNo]
floatregnos platform = fakeregnos ++ xmmregnos platform
-
-- argRegs is the set of regs which are read for an n-argument call to C.
-- For archs which pass all args on the stack (x86), is empty.
-- Sparc passes up to the first 6 args in regs.
@@ -408,7 +407,10 @@ callClobberedRegs platform
| target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform)
| platformOS platform == OSMinGW32
= [rax,rcx,rdx,r8,r9,r10,r11]
- ++ map regSingle (floatregnos platform)
+ -- Only xmm0-5 are caller-saves registers on 64bit windows.
+ -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage )
+ -- For details check the Win64 ABI.
+ ++ map regSingle fakeregnos ++ map xmm [0 .. 5]
| otherwise
-- all xmm regs are caller-saves
-- caller-saves registers
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index 27097bf..5774bd7 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -113,11 +113,15 @@
/* -----------------------------------------------------------------------------
How large is the stack frame saved by StgRun?
world. Used in StgCRun.c.
+
+ The size has to be enough to save the registers (see StgCRun)
+ plus padding if the result is not 16 byte aligned.
+ See the Note [Stack Alignment on X86] in StgCRun.c for details.
+
-------------------------------------------------------------------------- */
#if defined(x86_64_HOST_ARCH)
# if defined(mingw32_HOST_OS)
-/* 8 larger than necessary to make the alignment right*/
-# define STG_RUN_STACK_FRAME_SIZE 80
+# define STG_RUN_STACK_FRAME_SIZE 144
# else
# define STG_RUN_STACK_FRAME_SIZE 48
# endif
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 4ce0c44..010af44 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -236,7 +236,7 @@ StgRunIsImplementedInAssembler(void)
);
}
-#endif
+#endif // defined(i386_HOST_ARCH)
/* ----------------------------------------------------------------------------
x86-64 is almost the same as plain x86.
@@ -279,9 +279,23 @@ StgRunIsImplementedInAssembler(void)
"movq %%r14,32(%%rax)\n\t"
"movq %%r15,40(%%rax)\n\t"
#if defined(mingw32_HOST_OS)
+ /*
+ * Additional callee saved registers on Win64. This must match
+ * callClobberedRegisters in compiler/nativeGen/X86/Regs.hs as
+ * both represent the Win64 calling convention.
+ */
"movq %%rdi,48(%%rax)\n\t"
"movq %%rsi,56(%%rax)\n\t"
- "movq %%xmm6,64(%%rax)\n\t"
+ "movq %%xmm6, 64(%%rax)\n\t"
+ "movq %%xmm7, 72(%%rax)\n\t"
+ "movq %%xmm8, 80(%%rax)\n\t"
+ "movq %%xmm9, 88(%%rax)\n\t"
+ "movq %%xmm10, 96(%%rax)\n\t"
+ "movq %%xmm11,104(%%rax)\n\t"
+ "movq %%xmm12,112(%%rax)\n\t"
+ "movq %%xmm13,120(%%rax)\n\t"
+ "movq %%xmm14,128(%%rax)\n\t"
+ "movq %%xmm15,136(%%rax)\n\t"
#endif
/*
* Set BaseReg
@@ -317,9 +331,18 @@ StgRunIsImplementedInAssembler(void)
"movq 32(%%rsp),%%r14\n\t"
"movq 40(%%rsp),%%r15\n\t"
#if defined(mingw32_HOST_OS)
- "movq 48(%%rsp),%%rdi\n\t"
- "movq 56(%%rsp),%%rsi\n\t"
- "movq 64(%%rsp),%%xmm6\n\t"
+ "movq 48(%%rsp),%%rdi\n\t"
+ "movq 56(%%rsp),%%rsi\n\t"
+ "movq 64(%%rsp),%%xmm6\n\t"
+ "movq 72(%%rax),%%xmm7\n\t"
+ "movq 80(%%rax),%%xmm8\n\t"
+ "movq 88(%%rax),%%xmm9\n\t"
+ "movq 96(%%rax),%%xmm10\n\t"
+ "movq 104(%%rax),%%xmm11\n\t"
+ "movq 112(%%rax),%%xmm12\n\t"
+ "movq 120(%%rax),%%xmm13\n\t"
+ "movq 128(%%rax),%%xmm14\n\t"
+ "movq 136(%%rax),%%xmm15\n\t"
#endif
"addq %1, %%rsp\n\t"
"retq"
diff --git a/testsuite/tests/codeGen/should_run/T14619.hs b/testsuite/tests/codeGen/should_run/T14619.hs
new file mode 100644
index 0000000..7af16df
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T14619.hs
@@ -0,0 +1,46 @@
+{-# OPTIONS_GHC -O1 #-}
+
+{-
+ On windows some xmm registers are callee saved. This means
+ they can't be used as scratch registers before a call to C.
+
+ In #14619 this wasn't respected which lead to a wrong value
+ ending up in xmm6 and being returned in the final result.
+
+ This code compiles to a non trivial fp computation followed
+ by a call to sqrt at O1+. If xmm6 isn't properly handled it
+ will be used as a scratch register failing the test.
+
+ The original code used regular sqrt which on 8.2 generated
+ a C call in the backend. To imitate this behaviour on 8.4+
+ we force a call to a C function instead.
+-}
+
+module Main (main) where
+
+
+
+import Prelude hiding((*>), (<*))
+import Foreign.C
+import Unsafe.Coerce
+
+foreign import ccall unsafe "sqrt" call_sqrt :: CDouble -> CDouble
+
+type V3 = (Double, Double, Double)
+
+absf :: V3 -> V3 -> Double
+absf (x, y, z) (x', y', z') = x*x' +y*y'+z*z'
+
+
+{-# NOINLINE sphereIntersection #-}
+sphereIntersection :: V3 -> V3 -> (V3)
+sphereIntersection orig dir@(_, _, dirz)
+ | b < 0 = undefined
+ | t1 > 0 = dir
+ | t1 < 0 = orig
+ | otherwise = undefined
+ where b = orig `absf` dir
+ sqrtDisc = realToFrac . call_sqrt $ CDouble b
+ t1 = b - sqrtDisc
+
+main = print $ sphereIntersection (11, 22, 33) (44, 55, 66)
diff --git a/testsuite/tests/codeGen/should_run/T14619.stdout b/testsuite/tests/codeGen/should_run/T14619.stdout
new file mode 100644
index 0000000..a11c04d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T14619.stdout
@@ -0,0 +1 @@
+(44.0,55.0,66.0)
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 42d8a2f..145365e 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -165,3 +165,4 @@ test('T13825-unit',
extra_run_opts('"' + config.libdir + '"'),
compile_and_run,
['-package ghc'])
+test('T14619', normal, compile_and_run, [''])
More information about the ghc-commits
mailing list