[commit: ghc] wip/simd: By default, only pass 128-bit SIMD vectors in registers on X86-64. (d2b9526)

Mon Sep 23 06:12:43 CEST 2013

Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/simd
Link       : http://ghc.haskell.org/trac/ghc/changeset/d2b95264c97b3d7786b359fbf04fb297a160daa3/ghc

>---------------------------------------------------------------

commit d2b95264c97b3d7786b359fbf04fb297a160daa3
Author: Geoffrey Mainland <gmainlan at microsoft.com>
Date:   Sun Sep 15 23:43:29 2013 -0400

    By default, only pass 128-bit SIMD vectors in registers on X86-64.
    
    LLVM's GHC calling convention only allows 128-bit SIMD vectors to be passed in
    machine registers on X86-64. This may change in LLVM 3.4; the hidden flag
    -fllvm-pass-vectors-in-regs causes all SIMD vector widths to be passed in
    registers on both X86-64 and on X86-32.


>---------------------------------------------------------------

d2b95264c97b3d7786b359fbf04fb297a160daa3
 compiler/cmm/CmmCallConv.hs |   23 ++++++++++++++++++++---
 compiler/main/DynFlags.hs   |    2 ++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6a93166..60e2c8c 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -66,9 +66,12 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
                                     | isFloatType ty = float
                                     | otherwise      = int
         where vec = case (w, regs) of
-                      (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
-                      (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
-                      (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+                      (W128, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+                      (W256, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+                      (W512, (vs, fs, ds, ls, s:ss))
+                          | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
                       _ -> (assts, (r:rs))
               float = case (w, regs) of
                         (W32, (vs, fs, ds, ls, s:ss))
@@ -100,6 +103,20 @@ passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
                               ArchX86_64 -> True
                               _          -> False
 
+-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
+-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
+-- doesn't currently passing vectors in registers. The patch to update the GHC
+-- calling convention to support passing SIMD vectors in registers is small and
+-- well-contained, so it may make it into LLVM 3.4. The hidden
+-- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
+-- pass vectors in registers, but it must only be used with a version of LLVM
+-- that has an updated GHC calling convention.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
+                                ArchX86_64 -> True
+                                _          -> gopt Opt_LlvmPassVectorsInRegisters dflags
+passVectorInReg _    dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
+
 assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
             -> (
                  ByteOff              -- bytes of stack args
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 74241ba..37f35e6 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -309,6 +309,7 @@ data GeneralFlag
    | Opt_RegsIterative                  -- do iterative coalescing graph coloring register allocation
    | Opt_PedanticBottoms                -- Be picky about how we treat bottom
    | Opt_LlvmTBAA                       -- Use LLVM TBAA infastructure for improving AA (hidden flag)
+   | Opt_LlvmPassVectorsInRegisters     -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
    | Opt_IrrefutableTuples
    | Opt_CmmSink
    | Opt_CmmElimCommonBlocks
@@ -2611,6 +2612,7 @@ fFlags = [
   ( "regs-graph",                       Opt_RegsGraph, nop ),
   ( "regs-iterative",                   Opt_RegsIterative, nop ),
   ( "llvm-tbaa",                        Opt_LlvmTBAA, nop), -- hidden flag
+  ( "llvm-pass-vectors-in-regs",        Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
   ( "irrefutable-tuples",               Opt_IrrefutableTuples, nop ),
   ( "cmm-sink",                         Opt_CmmSink, nop ),
   ( "cmm-elim-common-blocks",           Opt_CmmElimCommonBlocks, nop ),