[commit: ghc] master: Mark xmm6 as caller saved in the register allocator for windows. (add4e1f)

git at git.haskell.org git at git.haskell.org
Thu Feb 1 04:30:14 UTC 2018


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/add4e1f11b88cd603f6c01bc135eb576e1922a8e/ghc

>---------------------------------------------------------------

commit add4e1f11b88cd603f6c01bc135eb576e1922a8e
Author: klebinger.andreas at gmx.at <klebinger.andreas at gmx.at>
Date:   Wed Jan 31 21:39:19 2018 -0500

    Mark xmm6 as caller saved in the register allocator for windows.
    
    This prevents the register being picked up as a scratch register.
    Otherwise the allocator would be free to use it before a call. This
    fixes #14619.
    
    Test Plan: ci, repro case on #14619
    
    Reviewers: bgamari, Phyx, erikd, simonmar, RyanGlScott, simonpj
    
    Reviewed By: Phyx, RyanGlScott, simonpj
    
    Subscribers: simonpj, RyanGlScott, Phyx, rwbarton, thomie, carter
    
    GHC Trac Issues: #14619
    
    Differential Revision: https://phabricator.haskell.org/D4348


>---------------------------------------------------------------

add4e1f11b88cd603f6c01bc135eb576e1922a8e
 compiler/nativeGen/X86/Regs.hs                   |  6 ++--
 includes/rts/Constants.h                         |  8 +++--
 rts/StgCRun.c                                    | 33 ++++++++++++++---
 testsuite/tests/codeGen/should_run/T14619.hs     | 46 ++++++++++++++++++++++++
 testsuite/tests/codeGen/should_run/T14619.stdout |  1 +
 testsuite/tests/codeGen/should_run/all.T         |  1 +
 6 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 169d402..d6983b7 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -238,7 +238,6 @@ xmmregnos platform = [firstxmm  .. lastxmm platform]
 floatregnos :: Platform -> [RegNo]
 floatregnos platform = fakeregnos ++ xmmregnos platform
 
-
 -- argRegs is the set of regs which are read for an n-argument call to C.
 -- For archs which pass all args on the stack (x86), is empty.
 -- Sparc passes up to the first 6 args in regs.
@@ -408,7 +407,10 @@ callClobberedRegs platform
  | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform)
  | platformOS platform == OSMinGW32
    = [rax,rcx,rdx,r8,r9,r10,r11]
-   ++ map regSingle (floatregnos platform)
+   -- Only xmm0-5 are caller-saves registers on 64bit windows.
+   -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage )
+   -- For details check the Win64 ABI.
+   ++ map regSingle fakeregnos ++ map xmm [0  .. 5]
  | otherwise
     -- all xmm regs are caller-saves
     -- caller-saves registers
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index 27097bf..5774bd7 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -113,11 +113,15 @@
 /* -----------------------------------------------------------------------------
    How large is the stack frame saved by StgRun?
    world.  Used in StgCRun.c.
+
+   The size has to be enough to save the registers (see StgCRun)
+   plus padding if the result is not 16 byte aligned.
+   See the Note [Stack Alignment on X86] in StgCRun.c for details.
+
    -------------------------------------------------------------------------- */
 #if defined(x86_64_HOST_ARCH)
 #  if defined(mingw32_HOST_OS)
-/* 8 larger than necessary to make the alignment right*/
-#    define STG_RUN_STACK_FRAME_SIZE 80
+#    define STG_RUN_STACK_FRAME_SIZE 144
 #  else
 #    define STG_RUN_STACK_FRAME_SIZE 48
 #  endif
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 5460598..ab66c64 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -236,7 +236,7 @@ StgRunIsImplementedInAssembler(void)
     );
 }
 
-#endif
+#endif // defined(i386_HOST_ARCH)
 
 /* ----------------------------------------------------------------------------
    x86-64 is almost the same as plain x86.
@@ -279,9 +279,23 @@ StgRunIsImplementedInAssembler(void)
         "movq %%r14,32(%%rax)\n\t"
         "movq %%r15,40(%%rax)\n\t"
 #if defined(mingw32_HOST_OS)
+        /*
+         * Additional callee saved registers on Win64. This must match
+         * callClobberedRegisters in compiler/nativeGen/X86/Regs.hs as
+         * both represent the Win64 calling convention.
+         */
         "movq %%rdi,48(%%rax)\n\t"
         "movq %%rsi,56(%%rax)\n\t"
-        "movq %%xmm6,64(%%rax)\n\t"
+        "movq %%xmm6,  64(%%rax)\n\t"
+        "movq %%xmm7,  72(%%rax)\n\t"
+        "movq %%xmm8,  80(%%rax)\n\t"
+        "movq %%xmm9,  88(%%rax)\n\t"
+        "movq %%xmm10, 96(%%rax)\n\t"
+        "movq %%xmm11,104(%%rax)\n\t"
+        "movq %%xmm12,112(%%rax)\n\t"
+        "movq %%xmm13,120(%%rax)\n\t"
+        "movq %%xmm14,128(%%rax)\n\t"
+        "movq %%xmm15,136(%%rax)\n\t"
 #endif
         /*
          * Set BaseReg
@@ -317,9 +331,18 @@ StgRunIsImplementedInAssembler(void)
         "movq 32(%%rsp),%%r14\n\t"
         "movq 40(%%rsp),%%r15\n\t"
 #if defined(mingw32_HOST_OS)
-        "movq 48(%%rsp),%%rdi\n\t"
-        "movq 56(%%rsp),%%rsi\n\t"
-        "movq 64(%%rsp),%%xmm6\n\t"
+        "movq  48(%%rsp),%%rdi\n\t"
+        "movq  56(%%rsp),%%rsi\n\t"
+        "movq  64(%%rsp),%%xmm6\n\t"
+        "movq  72(%%rax),%%xmm7\n\t"
+        "movq  80(%%rax),%%xmm8\n\t"
+        "movq  88(%%rax),%%xmm9\n\t"
+        "movq  96(%%rax),%%xmm10\n\t"
+        "movq 104(%%rax),%%xmm11\n\t"
+        "movq 112(%%rax),%%xmm12\n\t"
+        "movq 120(%%rax),%%xmm13\n\t"
+        "movq 128(%%rax),%%xmm14\n\t"
+        "movq 136(%%rax),%%xmm15\n\t"
 #endif
         "addq %1, %%rsp\n\t"
         "retq"
diff --git a/testsuite/tests/codeGen/should_run/T14619.hs b/testsuite/tests/codeGen/should_run/T14619.hs
new file mode 100644
index 0000000..7af16df
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T14619.hs
@@ -0,0 +1,46 @@
+{-# OPTIONS_GHC -O1 #-}
+
+{-
+  On windows some xmm registers are callee saved. This means
+  they can't be used as scratch registers before a call to C.
+
+  In #14619 this wasn't respected which lead to a wrong value
+  ending up in xmm6 and being returned in the final result.
+
+  This code compiles to a non trivial fp computation followed
+  by a call to sqrt at O1+. If xmm6 isn't properly handled it
+  will be used as a scratch register failing the test.
+
+  The original code used regular sqrt which on 8.2 generated
+  a C call in the backend. To imitate this behaviour on 8.4+
+  we force a call to a C function instead.
+-}
+
+module Main (main) where
+
+
+
+import Prelude hiding((*>), (<*))
+import Foreign.C
+import Unsafe.Coerce
+
+foreign import ccall unsafe "sqrt" call_sqrt :: CDouble -> CDouble
+
+type V3 = (Double, Double, Double)
+
+absf :: V3 -> V3 -> Double
+absf (x, y, z) (x', y', z') = x*x' +y*y'+z*z'
+
+
+{-# NOINLINE sphereIntersection #-}
+sphereIntersection :: V3 -> V3 -> (V3)
+sphereIntersection orig dir@(_, _, dirz)
+  | b < 0  = undefined
+  | t1   > 0  = dir
+  | t1   < 0  = orig
+  | otherwise = undefined
+    where b  = orig `absf` dir
+          sqrtDisc = realToFrac . call_sqrt $ CDouble b
+          t1 = b - sqrtDisc
+
+main = print $ sphereIntersection (11, 22, 33) (44, 55, 66)
diff --git a/testsuite/tests/codeGen/should_run/T14619.stdout b/testsuite/tests/codeGen/should_run/T14619.stdout
new file mode 100644
index 0000000..a11c04d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T14619.stdout
@@ -0,0 +1 @@
+(44.0,55.0,66.0)
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 42d8a2f..145365e 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -165,3 +165,4 @@ test('T13825-unit',
      extra_run_opts('"' + config.libdir + '"'),
      compile_and_run,
      ['-package ghc'])
+test('T14619', normal, compile_and_run, [''])



More information about the ghc-commits mailing list