[commit: ghc] master: nativeGen: Use SSE2 SQRT instruction (9ac2218)
git at git.haskell.org
git at git.haskell.org
Fri Apr 28 19:01:47 UTC 2017
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/9ac22183e405773ea7147728e593edd78f30a025/ghc
>---------------------------------------------------------------
commit 9ac22183e405773ea7147728e593edd78f30a025
Author: Ben Gamari <bgamari.foss at gmail.com>
Date: Fri Apr 28 14:24:53 2017 -0400
nativeGen: Use SSE2 SQRT instruction
Reviewers: austin, dfeuer
Subscribers: dfeuer, rwbarton, thomie
GHC Trac Issues: #13629
Differential Revision: https://phabricator.haskell.org/D3508
>---------------------------------------------------------------
9ac22183e405773ea7147728e593edd78f30a025
compiler/nativeGen/X86/CodeGen.hs | 17 +++++++++++------
compiler/nativeGen/X86/Instr.hs | 4 +++-
compiler/nativeGen/X86/Ppr.hs | 1 +
libraries/base/tests/Numeric/num009.hs | 5 +++++
4 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 562303c..baa5c8f 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2057,13 +2057,15 @@ genCCall _ is32Bit target dest_regs args = do
MO_F64_Fabs -> case args of
[x] -> sse2FabsCode W64 x
_ -> panic "genCCall: Wrong number of arguments for fabs"
+
+ MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
+ MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
_other_op -> outOfLineCmmOp op (Just r) args
| otherwise -> do
l1 <- getNewLabelNat
l2 <- getNewLabelNat
if sse2
- then
- outOfLineCmmOp op (Just r) args
+ then outOfLineCmmOp op (Just r) args
else case op of
MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
@@ -2080,13 +2082,16 @@ genCCall _ is32Bit target dest_regs args = do
_other_op -> outOfLineCmmOp op (Just r) args
where
- actuallyInlineFloatOp instr format [x]
+ actuallyInlineFloatOp = actuallyInlineFloatOp' False
+ actuallyInlineSSE2Op = actuallyInlineFloatOp' True
+
+ actuallyInlineFloatOp' usesSSE instr format [x]
= do res <- trivialUFCode format (instr format) x
any <- anyReg res
- return (any (getRegisterReg platform False (CmmLocal r)))
+ return (any (getRegisterReg platform usesSSE (CmmLocal r)))
- actuallyInlineFloatOp _ _ args
- = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
+ actuallyInlineFloatOp' _ _ _ args
+ = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
++ show (length args) ++ ")"
sse2FabsCode :: Width -> CmmExpr -> NatM InstrBlock
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index f4ac55c..16e08f3 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -289,7 +289,7 @@ data Instr
| CVTSI2SS Format Operand Reg -- I32/I64 to F32
| CVTSI2SD Format Operand Reg -- I32/I64 to F64
- -- use ADD & SUB for arithmetic. In both cases, operands
+ -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
-- are Operand Reg.
-- SSE2 floating-point division:
@@ -447,6 +447,7 @@ x86_regUsageOfInstr platform instr
CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
FDIV _ src dst -> usageRM src dst
+ SQRT _ src dst -> mkRU (use_R src []) [dst]
FETCHGOT reg -> mkRU [] [reg]
FETCHPC reg -> mkRU [] [reg]
@@ -617,6 +618,7 @@ x86_patchRegsOfInstr instr env
CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
+ SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst)
CALL (Left _) _ -> instr
CALL (Right reg) p -> CALL (Right (env reg)) p
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 5044c83..bd957b4 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -724,6 +724,7 @@ pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2
pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op
pprInstr (FDIV format op1 op2) = pprFormatOpOp (sLit "div") format op1 op2
+pprInstr (SQRT format op1 op2) = pprFormatOpReg (sLit "sqrt") format op1 op2
pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
diff --git a/libraries/base/tests/Numeric/num009.hs b/libraries/base/tests/Numeric/num009.hs
index c0dec43..e405ddf 100644
--- a/libraries/base/tests/Numeric/num009.hs
+++ b/libraries/base/tests/Numeric/num009.hs
@@ -17,6 +17,9 @@ main = do let d = [0, pi, pi/2, pi/3, 1e10, 1e20] :: [Double]
mapM_ (test "cosf" cosf cos) f
mapM_ (test "tand" tand tan) d
mapM_ (test "tanf" tanf tan) f
+ -- added to test #13629
+ mapM_ (test "sqrtd" sqrtd sqrt) f
+ mapM_ (test "sqrtf" sqrtf sqrt) f
putStrLn "Done"
test :: (RealFloat a, Floating a, RealFloat b, Floating b, Show b)
@@ -39,3 +42,5 @@ foreign import ccall "math.h cosf" cosf :: CFloat -> CFloat
foreign import ccall "math.h tan" tand :: CDouble -> CDouble
foreign import ccall "math.h tanf" tanf :: CFloat -> CFloat
+foreign import ccall "math.h sqrt" sqrtd :: CDouble -> CDouble
+foreign import ccall "math.h sqrtf" sqrtf :: CFloat -> CFloat
More information about the ghc-commits
mailing list