[commit: ghc] wip/simd: By default, only pass 128-bit SIMD vectors in registers on X86-64. (52e04e5)

Mon Sep 16 07:05:40 CEST 2013

Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/simd
Link       : http://ghc.haskell.org/trac/ghc/changeset/52e04e53d64882602a4324b6f0508d129d4b7fa7/ghc

>---------------------------------------------------------------

commit 52e04e53d64882602a4324b6f0508d129d4b7fa7
Author: Geoffrey Mainland <gmainlan at microsoft.com>
Date:   Sun Sep 15 23:43:29 2013 -0400

    By default, only pass 128-bit SIMD vectors in registers on X86-64.
    
    LLVM's GHC calling convention only allows 128-bit SIMD vectors to be passed in
    machine registers on X86-64. This may change in LLVM 3.4; the hidden flag
    -fllvm-pass-vectors-in-regs causes all SIMD vector widths to be passed in
    registers on both X86-64 and on X86-32.


>---------------------------------------------------------------

52e04e53d64882602a4324b6f0508d129d4b7fa7
 compiler/cmm/CmmCallConv.hs |   20 +++++++++++++++++---
 compiler/main/DynFlags.hs   |    2 ++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6a93166..155ba76 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -66,9 +66,12 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
                                     | isFloatType ty = float
                                     | otherwise      = int
         where vec = case (w, regs) of
-                      (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
-                      (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
-                      (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+                      (W128, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+                      (W256, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+                      (W512, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
                       _ -> (assts, (r:rs))
               float = case (w, regs) of
                         (W32, (vs, fs, ds, ls, s:ss))
@@ -100,6 +103,17 @@ passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
                               ArchX86_64 -> True
                               _          -> False
 
+-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
+-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
+-- doesn't currently passing vectors in registers. This may change with LLVM
+-- 3.4, and the patch is small and well-contained, so we support passing these
+-- vectors in registers with a hidden GHC flag.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
+                                ArchX86_64 -> True
+                                _          -> gopt Opt_LlvmPassVectorsInRegisters dflags
+passVectorInReg _    dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
+
 assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
             -> (
                  ByteOff              -- bytes of stack args
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 0e9193d..1d43b7e 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -308,6 +308,7 @@ data GeneralFlag
    | Opt_RegsIterative                  -- do iterative coalescing graph coloring register allocation
    | Opt_PedanticBottoms                -- Be picky about how we treat bottom
    | Opt_LlvmTBAA                       -- Use LLVM TBAA infastructure for improving AA (hidden flag)
+   | Opt_LlvmPassVectorsInRegisters     -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
    | Opt_IrrefutableTuples
    | Opt_CmmSink
    | Opt_CmmElimCommonBlocks
@@ -2605,6 +2606,7 @@ fFlags = [
   ( "regs-graph",                       Opt_RegsGraph, nop ),
   ( "regs-iterative",                   Opt_RegsIterative, nop ),
   ( "llvm-tbaa",                        Opt_LlvmTBAA, nop), -- hidden flag
+  ( "llvm-pass-vectors-in-regs",        Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
   ( "irrefutable-tuples",               Opt_IrrefutableTuples, nop ),
   ( "cmm-sink",                         Opt_CmmSink, nop ),
   ( "cmm-elim-common-blocks",           Opt_CmmElimCommonBlocks, nop ),