[commit: ghc] wip/simd: By default, only pass 128-bit SIMD vectors in registers on X86-64. (52e04e5)
git at git.haskell.org
git at git.haskell.org
Mon Sep 16 07:05:40 CEST 2013
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/simd
Link : http://ghc.haskell.org/trac/ghc/changeset/52e04e53d64882602a4324b6f0508d129d4b7fa7/ghc
>---------------------------------------------------------------
commit 52e04e53d64882602a4324b6f0508d129d4b7fa7
Author: Geoffrey Mainland <gmainlan at microsoft.com>
Date: Sun Sep 15 23:43:29 2013 -0400
By default, only pass 128-bit SIMD vectors in registers on X86-64.
LLVM's GHC calling convention only allows 128-bit SIMD vectors to be passed in
machine registers on X86-64. This may change in LLVM 3.4; the hidden flag
-fllvm-pass-vectors-in-regs causes all SIMD vector widths to be passed in
registers on both X86-64 and on X86-32.
>---------------------------------------------------------------
52e04e53d64882602a4324b6f0508d129d4b7fa7
compiler/cmm/CmmCallConv.hs | 20 +++++++++++++++++---
compiler/main/DynFlags.hs | 2 ++
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6a93166..155ba76 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -66,9 +66,12 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| isFloatType ty = float
| otherwise = int
where vec = case (w, regs) of
- (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
- (W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
- (W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+ (W128, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W256, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+ (W512, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
@@ -100,6 +103,17 @@ passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True
_ -> False
+-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
+-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
+-- doesn't currently passing vectors in registers. This may change with LLVM
+-- 3.4, and the patch is small and well-contained, so we support passing these
+-- vectors in registers with a hidden GHC flag.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> True
+ _ -> gopt Opt_LlvmPassVectorsInRegisters dflags
+passVectorInReg _ dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
+
assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
-> (
ByteOff -- bytes of stack args
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 0e9193d..1d43b7e 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -308,6 +308,7 @@ data GeneralFlag
| Opt_RegsIterative -- do iterative coalescing graph coloring register allocation
| Opt_PedanticBottoms -- Be picky about how we treat bottom
| Opt_LlvmTBAA -- Use LLVM TBAA infastructure for improving AA (hidden flag)
+ | Opt_LlvmPassVectorsInRegisters -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
| Opt_IrrefutableTuples
| Opt_CmmSink
| Opt_CmmElimCommonBlocks
@@ -2605,6 +2606,7 @@ fFlags = [
( "regs-graph", Opt_RegsGraph, nop ),
( "regs-iterative", Opt_RegsIterative, nop ),
( "llvm-tbaa", Opt_LlvmTBAA, nop), -- hidden flag
+ ( "llvm-pass-vectors-in-regs", Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
( "irrefutable-tuples", Opt_IrrefutableTuples, nop ),
( "cmm-sink", Opt_CmmSink, nop ),
( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ),
More information about the ghc-commits
mailing list