[commit: ghc] master: PPC NCG: Fix float parameter passing on 64-bit. (2897be7)

Sat Jun 18 22:23:34 UTC 2016

Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/2897be77123bf31cad1c60dd5560eba7f2f021ce/ghc

>---------------------------------------------------------------

commit 2897be77123bf31cad1c60dd5560eba7f2f021ce
Author: Peter Trommler <ptrommler at acm.org>
Date:   Sat Jun 18 12:28:41 2016 +0200

    PPC NCG: Fix float parameter passing on 64-bit.
    
    On Linux 64-bit PowerPC the first 13 floating point parameters are
    passed in registers. We only passed the first 8 floating point params.
    
    The alignment of a floating point single precision value in ELF v1.9 is
    the second word of a doubleword. For ELF v2 we support only little
    endian and the least significant word of a doubleword is the first word,
    so no special handling is required.
    
    Add a regression test.
    
    Test Plan: validate on powerpc Linux and AIX
    
    Reviewers: erikd, hvr, austin, simonmar, bgamari
    
    Reviewed By: simonmar
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2327
    
    GHC Trac Issues: #12134


>---------------------------------------------------------------

2897be77123bf31cad1c60dd5560eba7f2f021ce
 compiler/nativeGen/PPC/CodeGen.hs            | 24 ++++++++++++++++++------
 compiler/nativeGen/PPC/Regs.hs               |  5 ++++-
 testsuite/tests/ffi/should_run/T12134.hs     |  8 ++++++++
 testsuite/tests/ffi/should_run/T12134.stdout | 15 +++++++++++++++
 testsuite/tests/ffi/should_run/T12134_c.c    |  8 ++++++++
 testsuite/tests/ffi/should_run/all.T         |  6 ++++++
 6 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 12d5d88..86903e4 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1260,10 +1260,12 @@ genCCall' dflags gcp target dest_regs args
                                 GCPLinux -> roundTo 16 finalStack
                                 GCPLinux64ELF 1 ->
                                     roundTo 16 $ (48 +) $ max 64 $ sum $
-                                    map (widthInBytes . typeWidth) argReps
+                                    map (roundTo 8 . widthInBytes . typeWidth)
+                                        argReps
                                 GCPLinux64ELF 2 ->
                                     roundTo 16 $ (32 +) $ max 64 $ sum $
-                                    map (widthInBytes . typeWidth) argReps
+                                    map (roundTo 8 . widthInBytes . typeWidth)
+                                        argReps
                                 _ -> panic "genCall': unknown calling conv."
 
         argReps = map (cmmExprType dflags) args
@@ -1414,11 +1416,21 @@ genCCall' dflags gcp target dest_regs args
                                 | otherwise ->
                                    stackOffset
                                GCPLinux64ELF _ ->
-                                   -- everything on the stack is 8-byte
-                                   -- aligned on a 64 bit system
-                                   -- (except vector status, not used now)
+                                   -- Everything on the stack is mapped to
+                                   -- 8-byte aligned doublewords
                                    stackOffset
-                stackSlot = AddrRegImm sp (ImmInt stackOffset')
+                stackOffset''
+                     | isFloatType rep && typeWidth rep == W32 =
+                         case gcp of
+                         -- The ELF v1 ABI Section 3.2.3 requires:
+                         -- "Single precision floating point values
+                         -- are mapped to the second word in a single
+                         -- doubleword"
+                         GCPLinux64ELF 1 -> stackOffset' + 4
+                         _               -> stackOffset'
+                     | otherwise = stackOffset'
+
+                stackSlot = AddrRegImm sp (ImmInt stackOffset'')
                 (nGprs, nFprs, stackBytes, regs)
                     = case gcp of
                       GCPAIX ->
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index 780aecc..a1befc7 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -263,7 +263,10 @@ allFPArgRegs platform
     = case platformOS platform of
       OSAIX    -> map (regSingle . fReg) [1..13]
       OSDarwin -> map (regSingle . fReg) [1..13]
-      OSLinux  -> map (regSingle . fReg) [1..8]
+      OSLinux  -> case platformArch platform of
+        ArchPPC      -> map (regSingle . fReg) [1..8]
+        ArchPPC_64 _ -> map (regSingle . fReg) [1..13]
+        _            -> panic "PPC.Regs.allFPArgRegs: unknown PPC Linux"
       _        -> panic "PPC.Regs.allFPArgRegs: not defined for this architecture"
 
 fits16Bits :: Integral a => a -> Bool
diff --git a/testsuite/tests/ffi/should_run/T12134.hs b/testsuite/tests/ffi/should_run/T12134.hs
new file mode 100644
index 0000000..f07d892
--- /dev/null
+++ b/testsuite/tests/ffi/should_run/T12134.hs
@@ -0,0 +1,8 @@
+import Foreign.C.Types
+
+foreign import ccall "many_floats" many :: CFloat -> CFloat ->
+     CFloat -> CFloat -> CFloat -> CFloat -> CFloat -> CFloat ->
+     CFloat -> CFloat -> CFloat -> CFloat -> CFloat -> CFloat ->
+     CDouble -> IO ()
+
+main = many 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 12.5 13.5 14.5 15.5
diff --git a/testsuite/tests/ffi/should_run/T12134.stdout b/testsuite/tests/ffi/should_run/T12134.stdout
new file mode 100644
index 0000000..798f1a2
--- /dev/null
+++ b/testsuite/tests/ffi/should_run/T12134.stdout
@@ -0,0 +1,15 @@
+1.500000
+2.500000
+3.500000
+4.500000
+5.500000
+6.500000
+7.500000
+8.500000
+9.500000
+10.500000
+11.500000
+12.500000
+13.500000
+14.500000
+15.500000
diff --git a/testsuite/tests/ffi/should_run/T12134_c.c b/testsuite/tests/ffi/should_run/T12134_c.c
new file mode 100644
index 0000000..0e61670
--- /dev/null
+++ b/testsuite/tests/ffi/should_run/T12134_c.c
@@ -0,0 +1,8 @@
+#include <stdio.h>
+
+void many_floats(float f1, float f2, float f3, float f4, float f5,
+                 float f6, float f7, float f8, float f9, float f10,
+                 float f11, float f12, float f13, float f14, double f15) {
+  printf("%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n",
+         f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15);
+}
diff --git a/testsuite/tests/ffi/should_run/all.T b/testsuite/tests/ffi/should_run/all.T
index eb2c152..efb6969 100644
--- a/testsuite/tests/ffi/should_run/all.T
+++ b/testsuite/tests/ffi/should_run/all.T
@@ -217,3 +217,9 @@ test('ffi023', [ omit_ways(['ghci']),
                 # ffi023_stub.h before compiling ffi023_c.c, which
                 # needs it.
               compile_and_run, ['ffi023_c.c'])
+
+test('T12134',
+     [omit_ways(['ghci']), extra_clean(['T12134_c.o'])],
+     compile_and_run,
+     ['T12134_c.c'])
+