[Git][ghc/ghc][wip/ncg-simd] SIMD: add vector FMA primops
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Fri Jun 14 11:54:22 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
cd3c0b64 by sheaf at 2024-06-14T13:54:06+02:00
SIMD: add vector FMA primops
- - - - -
16 changed files:
- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- libraries/base/src/GHC/Exts.hs
- testsuite/tests/simd/should_run/all.T
- + testsuite/tests/simd/should_run/simd011.hs
- + testsuite/tests/simd/should_run/simd011.stdout
Changes:
=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -4190,6 +4190,31 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
llvm_only = True
vector = ALL_VECTOR_TYPES
+primop VecFMAdd "fmadd#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused multiply-add operation @x*y+z at . See "GHC.Prim#fma".}
+ with
+ llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
+primop VecFMSub "fmsub#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused multiply-subtract operation @x*y-z at . See "GHC.Prim#fma".}
+ with
+ llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
+primop VecFNMAdd "fnmadd#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused negate-multiply-add operation @-x*y+z at . See "GHC.Prim#fma".}
+ with
+ llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
+primop VecFNMSub "fnmsub#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused negate-multiply-subtract operation @-x*y-z at . See "GHC.Prim#fma".}
+ with
+ llvm_only = True
+ vector = FLOAT_VECTOR_TYPES
+
primop VecShuffleOp "shuffle#" GenPrimOp
VECTOR -> VECTOR -> INTVECTUPLE -> VECTOR
{ Shuffle elements of the concatenation of the input two vectors
=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -116,7 +116,7 @@ data MachOp
-- Floating-point fused multiply-add operations
-- | Fused multiply-add, see 'FMASign'.
- | MO_FMA FMASign Width
+ | MO_FMA FMASign Length Width
-- Floating point comparison
| MO_F_Eq Width
@@ -465,7 +465,7 @@ machOpResultType platform mop tys =
MO_F_Quot r -> cmmFloat r
MO_F_Neg r -> cmmFloat r
- MO_FMA _ r -> cmmFloat r
+ MO_FMA _ l r -> if l == 1 then cmmFloat r else cmmVec l (cmmFloat r)
MO_F_Eq {} -> comparisonResultRep platform
MO_F_Ne {} -> comparisonResultRep platform
@@ -567,7 +567,7 @@ machOpArgReps platform op =
MO_F_Quot r -> [r,r]
MO_F_Neg r -> [r]
- MO_FMA _ r -> [r,r,r]
+ MO_FMA _ l r -> [vecwidth l r, vecwidth l r, vecwidth l r]
MO_F_Eq r -> [r,r]
MO_F_Ne r -> [r,r]
=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1051,10 +1051,10 @@ machOps = listToUFM $
( "fmul", MO_F_Mul ),
( "fquot", MO_F_Quot ),
- ( "fmadd" , MO_FMA FMAdd ),
- ( "fmsub" , MO_FMA FMSub ),
- ( "fnmadd", MO_FMA FNMAdd ),
- ( "fnmsub", MO_FMA FNMSub ),
+ ( "fmadd" , MO_FMA FMAdd 1),
+ ( "fmsub" , MO_FMA FMSub 1),
+ ( "fnmadd", MO_FMA FNMAdd 1),
+ ( "fnmsub", MO_FMA FNMSub 1),
( "feq", MO_F_Eq ),
( "fne", MO_F_Ne ),
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1218,11 +1218,15 @@ getRegister' config plat expr
-- x86 fnmadd - x * y + z <=> AArch64 fmsub : d = - r1 * r2 + r3
-- x86 fnmsub - x * y - z <=> AArch64 fnmadd: d = - r1 * r2 - r3
- MO_FMA var w -> case var of
- FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
- FMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
- FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
- FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
+ MO_FMA var l w
+ | l == 1
+ -> case var of
+ FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
+ FMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
+ FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
+ FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
+ | otherwise
+ -> vectorsNeedLlvm
MO_V_Insert {} -> vectorsNeedLlvm
MO_VF_Insert {} -> vectorsNeedLlvm
=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -19,6 +19,7 @@ module GHC.CmmToAsm.Format (
floatFormat,
isIntFormat,
isFloatFormat,
+ vecFormat,
isVecFormat,
cmmTypeFormat,
formatToWidth,
=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -687,12 +687,14 @@ getRegister' _ _ (CmmMachOp mop [x, y, z]) -- ternary PrimOps
-- x86 fnmadd - x * y + z ~~ PPC fnmsub rt = -(ra * rc - rb)
-- x86 fnmsub - x * y - z ~~ PPC fnmadd rt = -(ra * rc + rb)
- MO_FMA variant w ->
+ MO_FMA variant l w | l == 1 ->
case variant of
FMAdd -> fma_code w (FMADD FMAdd) x y z
FMSub -> fma_code w (FMADD FMSub) x y z
FNMAdd -> fma_code w (FMADD FNMAdd) x y z
FNMSub -> fma_code w (FMADD FNMSub) x y z
+ | otherwise
+ -> vectorsNeedLlvm
MO_V_Insert {} -> vectorsNeedLlvm
MO_VF_Insert {} -> vectorsNeedLlvm
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1851,18 +1851,22 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
sse <- sseEnabled
case mop of
-- Floating point fused multiply-add operations @ ± x*y ± z@
- MO_FMA var w -> genFMA3Code w var x y z
+ MO_FMA var l w
+ | l * widthInBits w > 256
+ -> sorry "Please use -fllvm for wide vector FMA support"
+ | otherwise
+ -> genFMA3Code l w var x y z
-- Ternary vector operations
MO_VF_Insert l W32 | sse4_1 && sse -> vector_float_insert l W32 x y z
| otherwise
- -> sorry "Please enable the -msse4 and -msse flag"
+ -> sorry "Please enable the -msse4 and -msse flags"
MO_VF_Insert l W64 | sse2 && sse -> vector_float_insert l W64 x y z
| otherwise
- -> sorry "Please enable the -msse2 and -msse flag"
+ -> sorry "Please enable the -msse2 and -msse flags"
MO_V_Insert l W64 | sse2 && sse -> vector_int_insert_sse l W64 x y z
| otherwise
- -> sorry "Please enable the -msse2 and -msse flag"
+ -> sorry "Please enable the -msse2 and -msse flags"
_other -> pprPanic "getRegister(x86) - ternary CmmMachOp (1)"
(pprMachOp mop)
@@ -4029,10 +4033,12 @@ _ `regClashesWithOp` _ = False
-- | Generate code for a fused multiply-add operation, of the form @± x * y ± z@,
-- with 3 operands (FMA3 instruction set).
-genFMA3Code :: Width
+genFMA3Code :: Length
+ -> Width
-> FMASign
-> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
-genFMA3Code w signs x y z = do
+genFMA3Code l w signs x y z = do
+ platform <- getPlatform
-- For the FMA instruction, we want to compute x * y + z
--
-- There are three possible instructions we could emit:
@@ -4059,7 +4065,11 @@ genFMA3Code w signs x y z = do
-- only possible if the other arguments don't use the destination register.
-- We check for this and if there is a conflict we move the result only after
-- the computation. See #24496 how this went wrong in the past.
- let rep = floatFormat w
+ let rep
+ | l == 1
+ = floatFormat w
+ | otherwise
+ = vecFormat (cmmVec l $ cmmFloat w)
(y_reg, y_code) <- getNonClobberedReg y
(z_op, z_code) <- getNonClobberedOperand z
x_code <- getAnyReg x
@@ -4069,17 +4079,17 @@ genFMA3Code w signs x y z = do
code, code_direct, code_mov :: Reg -> InstrBlock
-- Ideal: Compute the result directly into dst
- code_direct dst = x_code dst `snocOL`
+ code_direct dst = x_code dst `snocOL`
fma213 z_op y_reg dst
-- Fallback: Compute the result into a tmp reg and then move it.
code_mov dst = x_code x_tmp `snocOL`
fma213 z_op y_reg x_tmp `snocOL`
- MOV rep (OpReg x_tmp) (OpReg dst)
+ mkRegRegMoveInstr platform rep x_tmp dst
code dst =
- y_code `appOL`
- z_code `appOL`
- ( if arg_regs_conflict then code_mov dst else code_direct dst )
+ y_code `appOL`
+ z_code `appOL`
+ ( if arg_regs_conflict then code_mov dst else code_direct dst )
where
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -285,7 +285,7 @@ data Instr
-- | FMA3 fused multiply-add operations.
| FMA3 Format FMASign FMAPermutation Operand Reg Reg
-- src3 (r/m), src2 (r), dst/src1 (r)
- -- The is exactly reversed from how intel lists the arguments.
+ -- This is exactly reversed from how intel lists the arguments.
-- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
-- are Operand Reg.
=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -727,7 +727,7 @@ pprMachOp_for_C platform mop = case mop of
MO_F_Quot _ -> char '/'
-- Floating-point fused multiply-add operations
- MO_FMA FMAdd w ->
+ MO_FMA FMAdd 1 w ->
case w of
W32 -> text "fmaf"
W64 -> text "fma"
@@ -736,10 +736,15 @@ pprMachOp_for_C platform mop = case mop of
(text "FMAdd")
(panic $ "PprC.pprMachOp_for_C: FMAdd unsupported"
++ "at width " ++ show w)
- MO_FMA var _width ->
- pprTrace "offending mop:"
- (text $ "FMA " ++ show var)
- (panic $ "PprC.pprMachOp_for_C: should have been handled earlier!")
+ MO_FMA var l width
+ | l == 1
+ -> pprTrace "offending mop:"
+ (text $ "FMA " ++ show var)
+ (panic $ "PprC.pprMachOp_for_C: should have been handled earlier!")
+ | otherwise
+ -> pprTrace "offending mop:"
+ (text $ "FMA " ++ show var ++ " " ++ show l ++ " " ++ show width)
+ (panic $ "PprC.pprMachOp_for_C: unsupported vector operation")
-- Signed comparisons
MO_S_Ge _ -> text ">="
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1491,7 +1491,7 @@ genMachOp _ op [x] = case op of
MO_F_Mul _ -> panicOp
MO_F_Quot _ -> panicOp
- MO_FMA _ _ -> panicOp
+ MO_FMA _ _ _ -> panicOp
MO_F_Eq _ -> panicOp
MO_F_Ne _ -> panicOp
@@ -1681,7 +1681,7 @@ genMachOp_slow opt op [x, y] = case op of
MO_F_Mul _ -> genBinMach LM_MO_FMul
MO_F_Quot _ -> genBinMach LM_MO_FDiv
- MO_FMA _ _ -> panicOp
+ MO_FMA _ _ _ -> panicOp
MO_And _ -> genBinMach LM_MO_And
MO_Or _ -> genBinMach LM_MO_Or
@@ -1822,13 +1822,11 @@ genMachOp_slow opt op [x, y] = case op of
++ "with two arguments! (" ++ show op ++ ")"
genMachOp_slow _opt op [x, y, z] = do
- platform <- getPlatform
let
- neg x = CmmMachOp (MO_F_Neg (cmmExprWidth platform x)) [x]
panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: non-ternary op encountered"
++ "with three arguments! (" ++ show op ++ ")"
case op of
- MO_FMA var _ ->
+ MO_FMA var lg width ->
case var of
-- LLVM only has the fmadd variant.
FMAdd -> genFmaOp x y z
@@ -1837,6 +1835,12 @@ genMachOp_slow _opt op [x, y, z] = do
FMSub -> genFmaOp x y (neg z)
FNMAdd -> genFmaOp (neg x) y z
FNMSub -> genFmaOp (neg x) y (neg z)
+ where
+ neg x
+ | lg == 1
+ = CmmMachOp (MO_F_Neg width) [x]
+ | otherwise
+ = CmmMachOp (MO_VF_Neg lg width) [x]
_ -> panicOp
-- More than three expressions, invalid!
@@ -1873,7 +1877,13 @@ genFmaOp x y z = runExprData $ do
let fname = case tx of
LMFloat -> fsLit "llvm.fma.f32"
LMDouble -> fsLit "llvm.fma.f64"
- _ -> pprPanic "fma: type not LMFloat or LMDouble" (ppLlvmType tx)
+ LMVector 4 LMFloat -> fsLit "llvm.fma.v4f32"
+ LMVector 8 LMFloat -> fsLit "llvm.fma.v8f32"
+ LMVector 16 LMFloat -> fsLit "llvm.fma.v16f32"
+ LMVector 2 LMDouble -> fsLit "llvm.fma.v2f64"
+ LMVector 4 LMDouble -> fsLit "llvm.fma.v4f64"
+ LMVector 8 LMDouble -> fsLit "llvm.fma.v8f64"
+ _ -> pprPanic "CmmToLlvm.genFmaOp: unsupported type" (ppLlvmType tx)
fptr <- liftExprData $ getInstrinct fname ty [tx, ty, tz]
doExprW tx $ Call StdCall fptr [vx, vy, vz] [ReadNone, NoUnwind]
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1503,10 +1503,10 @@ emitPrimOp cfg primop =
DoubleDivOp -> opTranslate (MO_F_Quot W64)
DoubleNegOp -> opTranslate (MO_F_Neg W64)
- DoubleFMAdd -> fmaOp FMAdd W64
- DoubleFMSub -> fmaOp FMSub W64
- DoubleFNMAdd -> fmaOp FNMAdd W64
- DoubleFNMSub -> fmaOp FNMSub W64
+ DoubleFMAdd -> fmaOp FMAdd 1 W64
+ DoubleFMSub -> fmaOp FMSub 1 W64
+ DoubleFNMAdd -> fmaOp FNMAdd 1 W64
+ DoubleFNMSub -> fmaOp FNMSub 1 W64
-- Float ops
@@ -1523,10 +1523,10 @@ emitPrimOp cfg primop =
FloatDivOp -> opTranslate (MO_F_Quot W32)
FloatNegOp -> opTranslate (MO_F_Neg W32)
- FloatFMAdd -> fmaOp FMAdd W32
- FloatFMSub -> fmaOp FMSub W32
- FloatFNMAdd -> fmaOp FNMAdd W32
- FloatFNMSub -> fmaOp FNMSub W32
+ FloatFMAdd -> fmaOp FMAdd 1 W32
+ FloatFMSub -> fmaOp FMSub 1 W32
+ FloatFNMAdd -> fmaOp FNMAdd 1 W32
+ FloatFNMSub -> fmaOp FNMSub 1 W32
-- Vector ops
@@ -1554,6 +1554,12 @@ emitPrimOp cfg primop =
(VecRemOp WordVec n w) -> opTranslate (MO_VU_Rem n w)
(VecNegOp WordVec _ _) -> \_ -> panic "unsupported primop"
+ -- Vector FMA instructions
+ VecFMAdd _ n w -> fmaOp FMAdd n w
+ VecFMSub _ n w -> fmaOp FMSub n w
+ VecFNMAdd _ n w -> fmaOp FNMAdd n w
+ VecFNMSub _ n w -> fmaOp FNMSub n w
+
-- Conversions
IntToDoubleOp -> opTranslate (MO_SF_Round (wordWidth platform) W64)
@@ -1851,10 +1857,11 @@ emitPrimOp cfg primop =
allowFMA = stgToCmmAllowFMAInstr cfg
- fmaOp :: FMASign -> Width -> [CmmActual] -> PrimopCmmEmit
- fmaOp signs w args@[arg_x, arg_y, arg_z]
- | allowFMA signs
- = opTranslate (MO_FMA signs w) args
+ fmaOp :: FMASign -> Length -> Width -> [CmmActual] -> PrimopCmmEmit
+ fmaOp signs l w args@[arg_x, arg_y, arg_z]
+ | allowFMA signs
+ || l > 1 -- (always use the MachOp for vector FMA)
+ = opTranslate (MO_FMA signs l w) args
| otherwise
= case signs of
@@ -1863,12 +1870,16 @@ emitPrimOp cfg primop =
-- Other fused multiply-add operations are implemented in terms of fmadd
-- This is sound: it does not lose any precision.
- FMSub -> fmaOp FMAdd w [arg_x, arg_y, neg arg_z]
- FNMAdd -> fmaOp FMAdd w [neg arg_x, arg_y, arg_z]
- FNMSub -> fmaOp FMAdd w [neg arg_x, arg_y, neg arg_z]
+ FMSub -> fmaOp FMAdd l w [arg_x, arg_y, neg arg_z]
+ FNMAdd -> fmaOp FMAdd l w [neg arg_x, arg_y, arg_z]
+ FNMSub -> fmaOp FMAdd l w [neg arg_x, arg_y, neg arg_z]
where
- neg x = CmmMachOp (MO_F_Neg w) [x]
- fmaOp _ _ _ = panic "fmaOp: wrong number of arguments (expected 3)"
+ neg x
+ | l == 1
+ = CmmMachOp (MO_F_Neg w) [x]
+ | otherwise
+ = CmmMachOp (MO_VF_Neg l w) [x]
+ fmaOp _ _ _ _ = panic "fmaOp: wrong number of arguments (expected 3)"
data PrimopCmmEmit
-- | Out of line fake primop that's actually just a foreign call to other
=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -1192,6 +1192,11 @@ genPrim prof bound ty op = case op of
VecReadOffAddrOp _ _ _ -> unhandledPrimop op
VecWriteOffAddrOp _ _ _ -> unhandledPrimop op
+ VecFMAdd {} -> unhandledPrimop op
+ VecFMSub {} -> unhandledPrimop op
+ VecFNMAdd {} -> unhandledPrimop op
+ VecFNMSub {} -> unhandledPrimop op
+
VecIndexScalarByteArrayOp _ _ _ -> unhandledPrimop op
VecReadScalarByteArrayOp _ _ _ -> unhandledPrimop op
VecWriteScalarByteArrayOp _ _ _ -> unhandledPrimop op
=====================================
libraries/base/src/GHC/Exts.hs
=====================================
@@ -120,7 +120,32 @@ import GHC.Prim hiding
, dataToTagSmall#, dataToTagLarge#
-- whereFrom# is similarly internal.
, whereFrom#
- -- Don't re-export SIMD shuffle primops (to avoid changing GHC.Exts)
+ -- Don't re-export vector FMA instructions
+ , fmaddFloatX4#
+ , fmsubFloatX4#
+ , fnmaddFloatX4#
+ , fnmsubFloatX4#
+ , fmaddFloatX8#
+ , fmsubFloatX8#
+ , fnmaddFloatX8#
+ , fnmsubFloatX8#
+ , fmaddFloatX16#
+ , fmsubFloatX16#
+ , fnmaddFloatX16#
+ , fnmsubFloatX16#
+ , fmaddDoubleX2#
+ , fmsubDoubleX2#
+ , fnmaddDoubleX2#
+ , fnmsubDoubleX2#
+ , fmaddDoubleX4#
+ , fmsubDoubleX4#
+ , fnmaddDoubleX4#
+ , fnmsubDoubleX4#
+ , fmaddDoubleX8#
+ , fmsubDoubleX8#
+ , fnmaddDoubleX8#
+ , fnmsubDoubleX8#
+ -- Don't re-export SIMD shuffle primops
, shuffleDoubleX2#
, shuffleDoubleX4#
, shuffleDoubleX8#
=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -15,3 +15,4 @@ test('simd007', [], compile_and_run, [''])
test('simd008', [], compile_and_run, [''])
test('simd009', [req_th, extra_files(['Simd009b.hs', 'Simd009c.hs'])], multimod_compile_and_run, ['simd009', ''])
test('simd010', [], compile_and_run, [''])
+test('simd011', [when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))], compile_and_run, [''])
=====================================
testsuite/tests/simd/should_run/simd011.hs
=====================================
@@ -0,0 +1,43 @@
+{-# OPTIONS_GHC -O2 #-}
+{-# OPTIONS_GHC -msse2 #-}
+{-# OPTIONS_GHC -msse4 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+-- tests for vector FMA instructions
+
+import GHC.Exts
+import GHC.Prim
+
+
+main :: IO ()
+main = do
+
+ -- FloatX4#
+ let
+ !f1 = packFloatX4# (# 1.1#, 2.2#, 3.3#, 4.4# #)
+ !f2 = packFloatX4# (# 10.1#, 20.2#, 30.3#, 40.4# #)
+ !f3 = packFloatX4# (# 1000.0#, 2000.0#, 3000.0#, 4000.0# #)
+
+ case unpackFloatX4# (fmaddFloatX4# f1 f2 f3) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (fmsubFloatX4# f1 f2 f3) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (fnmaddFloatX4# f1 f2 f3) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+ case unpackFloatX4# (fnmsubFloatX4# f1 f2 f3) of
+ (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d)
+
+ -- DoubleX2#
+ let
+ !d1 = packDoubleX2# (# 1.1##, 2.2## #)
+ !d2 = packDoubleX2# (# 10.1##, 20.2## #)
+ !d3 = packDoubleX2# (# 1000.0##, 2000.0## #)
+
+ case unpackDoubleX2# (fmaddDoubleX2# d1 d2 d3) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (fmsubDoubleX2# d1 d2 d3) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (fnmaddDoubleX2# d1 d2 d3) of
+ (# a, b #) -> print (D# a, D# b)
+ case unpackDoubleX2# (fnmsubDoubleX2# d1 d2 d3) of
+ (# a, b #) -> print (D# a, D# b)
=====================================
testsuite/tests/simd/should_run/simd011.stdout
=====================================
@@ -0,0 +1,8 @@
+(1011.11,2044.4401,3099.99,4177.7603)
+(-988.89,-1955.5599,-2900.01,-3822.24)
+(988.89,1955.5599,2900.01,3822.24)
+(-1011.11,-2044.4401,-3099.99,-4177.7603)
+(1011.11,2044.44)
+(-988.89,-1955.56)
+(988.89,1955.56)
+(-1011.11,-2044.44)
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/cd3c0b64c180bb6f50f2ed63d2565e00a1888ecd
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/cd3c0b64c180bb6f50f2ed63d2565e00a1888ecd
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240614/28328336/attachment-0001.html>
More information about the ghc-commits
mailing list