[Git][ghc/ghc][wip/ncg-simd] 4 commits: Add vector fused multiply-add operations
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Wed Jul 3 11:59:49 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
ffc1e910 by sheaf at 2024-07-03T13:59:20+02:00
Add vector fused multiply-add operations
This commit adds fused multiply add operations such as `fmaddDoubleX2#`.
These are handled both in the X86 NCG and the LLVM backends.
- - - - -
60d3c203 by sheaf at 2024-07-03T13:59:20+02:00
Add vector shuffle primops
This adds vector shuffle primops, such as
```
shuffleFloatX4# :: FloatX4# -> FloatX4# -> (# Int#, Int#, Int#, Int# #) -> FloatX4#
```
which shuffle the components of the input two vectors into the output vector.
NB: the indices must be compile time literals, to match the X86 SHUFPD
instruction immediate and the LLVM shufflevector instruction.
These are handled in the X86 NCG and the LLVM backend.
Tested in simd009.
- - - - -
751c6143 by sheaf at 2024-07-03T13:59:21+02:00
Add Broadcast MachOps
This adds proper MachOps for broadcast instructions, allowing us to
produce better code for broadcasting a value than simply packing that
value (doing many vector insertions in a row).
These are lowered in the X86 NCG and LLVM backends. In the LLVM backend,
it uses the previously introduced shuffle instructions.
- - - - -
f47ef2d5 by sheaf at 2024-07-03T13:59:21+02:00
Fix treatment of signed zero in vector negation
This commit fixes the handling of signed zero in floating-point vector
negation.
A slight hack was introduced to work around the fact that Cmm doesn't
currently have a notion of signed floating point literals
(see get_float_broadcast_value_reg). This can be removed once CmmFloat
can express the value -0.0.
The simd006 test has been updated to use a stricter notion of equality
of floating-point values, which ensure the validity of this change.
- - - - -
27 changed files:
- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/Cmm/Opt.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/Llvm/Ppr.hs
- compiler/GHC/Llvm/Syntax.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- docs/users_guide/9.12.1-notes.rst
- libraries/base/src/GHC/Base.hs
- libraries/base/src/GHC/Exts.hs
- testsuite/tests/simd/should_run/all.T
- testsuite/tests/simd/should_run/simd006.hs
- + testsuite/tests/simd/should_run/simd011.stdout
- utils/genprimopcode/Lexer.x
- utils/genprimopcode/Main.hs
- utils/genprimopcode/Parser.y
- utils/genprimopcode/ParserM.hs
- utils/genprimopcode/Syntax.hs
Changes:
=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -4175,6 +4175,33 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
can_fail_warning = YesWarnCanFail
vector = ALL_VECTOR_TYPES
+primop VecFMAdd "fmadd#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused multiply-add operation @x*y+z at . See "GHC.Prim#fma".}
+ with
+ vector = FLOAT_VECTOR_TYPES
+primop VecFMSub "fmsub#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused multiply-subtract operation @x*y-z at . See "GHC.Prim#fma".}
+ with
+ vector = FLOAT_VECTOR_TYPES
+primop VecFNMAdd "fnmadd#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused negate-multiply-add operation @-x*y+z at . See "GHC.Prim#fma".}
+ with
+ vector = FLOAT_VECTOR_TYPES
+primop VecFNMSub "fnmsub#" GenPrimOp
+ VECTOR -> VECTOR -> VECTOR -> VECTOR
+ {Fused negate-multiply-subtract operation @-x*y-z at . See "GHC.Prim#fma".}
+ with
+ vector = FLOAT_VECTOR_TYPES
+
+primop VecShuffleOp "shuffle#" GenPrimOp
+ VECTOR -> VECTOR -> INTVECTUPLE -> VECTOR
+ {Shuffle elements of the concatenation of the input two vectors
+ into the result vector.}
+ with vector = ALL_VECTOR_TYPES
+
------------------------------------------------------------------------
section "Prefetch"
=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -116,7 +116,7 @@ data MachOp
-- Floating-point fused multiply-add operations
-- | Fused multiply-add, see 'FMASign'.
- | MO_FMA FMASign Width
+ | MO_FMA FMASign Length Width
-- Floating point comparison
| MO_F_Eq Width
@@ -158,6 +158,7 @@ data MachOp
| MO_FW_Bitcast Width -- Float/Double -> Word32/Word64
-- Vector element insertion and extraction operations
+ | MO_V_Broadcast Length Width -- Broadcast a scalar into a vector
| MO_V_Insert Length Width -- Insert scalar into vector
| MO_V_Extract Length Width -- Extract scalar from vector
@@ -175,7 +176,12 @@ data MachOp
| MO_VU_Quot Length Width
| MO_VU_Rem Length Width
+ -- Vector shuffles
+ | MO_V_Shuffle Length Width [Int]
+ | MO_VF_Shuffle Length Width [Int]
+
-- Floating point vector element insertion and extraction operations
+ | MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
@@ -459,7 +465,7 @@ machOpResultType platform mop tys =
MO_F_Quot r -> cmmFloat r
MO_F_Neg r -> cmmFloat r
- MO_FMA _ r -> cmmFloat r
+ MO_FMA _ l r -> if l == 1 then cmmFloat r else cmmVec l (cmmFloat r)
MO_F_Eq {} -> comparisonResultRep platform
MO_F_Ne {} -> comparisonResultRep platform
@@ -485,6 +491,7 @@ machOpResultType platform mop tys =
MO_WF_Bitcast w -> cmmFloat w
MO_FW_Bitcast w -> cmmBits w
+ MO_V_Broadcast l w -> cmmVec l (cmmBits w)
MO_V_Insert l w -> cmmVec l (cmmBits w)
MO_V_Extract _ w -> cmmBits w
@@ -499,6 +506,10 @@ machOpResultType platform mop tys =
MO_VU_Quot l w -> cmmVec l (cmmBits w)
MO_VU_Rem l w -> cmmVec l (cmmBits w)
+ MO_V_Shuffle l w _ -> cmmVec l (cmmBits w)
+ MO_VF_Shuffle l w _ -> cmmVec l (cmmFloat w)
+
+ MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
@@ -556,7 +567,7 @@ machOpArgReps platform op =
MO_F_Quot r -> [r,r]
MO_F_Neg r -> [r]
- MO_FMA _ r -> [r,r,r]
+ MO_FMA _ l r -> [vecwidth l r, vecwidth l r, vecwidth l r]
MO_F_Eq r -> [r,r]
MO_F_Ne r -> [r,r]
@@ -582,8 +593,13 @@ machOpArgReps platform op =
MO_WF_Bitcast w -> [w]
MO_FW_Bitcast w -> [w]
+ MO_V_Shuffle l r _ -> [vecwidth l r, vecwidth l r]
+ MO_VF_Shuffle l r _ -> [vecwidth l r, vecwidth l r]
+
+ MO_V_Broadcast _ r -> [r]
MO_V_Insert l r -> [vecwidth l r, r, W32]
MO_V_Extract l r -> [vecwidth l r, W32]
+ MO_VF_Broadcast _ r -> [r]
MO_VF_Insert l r -> [vecwidth l r, r, W32]
MO_VF_Extract l r -> [vecwidth l r, W32]
-- SIMD vector indices are always 32 bit
=====================================
compiler/GHC/Cmm/Opt.hs
=====================================
@@ -79,6 +79,11 @@ cmmMachOpFoldM
-> MachOp
-> [CmmExpr]
-> Maybe CmmExpr
+cmmMachOpFoldM _ (MO_V_Broadcast {}) _ = Nothing
+cmmMachOpFoldM _ (MO_VF_Broadcast {}) _ = Nothing
+ -- SIMD NCG TODO: supporting constant folding for vector operations
+ -- would require augmenting getRegister' to handle them.
+ -- See the code for "getRegister' platform _ (CmmLit lit)".
cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
= Just $! case op of
MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1053,10 +1053,10 @@ machOps = listToUFM $
( "fmul", MO_F_Mul ),
( "fquot", MO_F_Quot ),
- ( "fmadd" , MO_FMA FMAdd ),
- ( "fmsub" , MO_FMA FMSub ),
- ( "fnmadd", MO_FMA FNMAdd ),
- ( "fnmsub", MO_FMA FNMSub ),
+ ( "fmadd" , MO_FMA FMAdd 1 ),
+ ( "fmsub" , MO_FMA FMSub 1 ),
+ ( "fnmadd", MO_FMA FNMAdd 1 ),
+ ( "fnmsub", MO_FMA FNMSub 1 ),
( "feq", MO_F_Eq ),
( "fne", MO_F_Ne ),
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -813,6 +813,8 @@ getRegister' config plat expr
MO_VS_Neg {} -> notUnary
MO_VU_Quot {} -> notUnary
MO_VU_Rem {} -> notUnary
+ MO_V_Shuffle {} -> notUnary
+ MO_VF_Shuffle {} -> notUnary
MO_VF_Insert {} -> notUnary
MO_VF_Extract {} -> notUnary
MO_VF_Add {} -> notUnary
@@ -825,6 +827,8 @@ getRegister' config plat expr
MO_AlignmentCheck {} ->
pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr)
+ MO_V_Broadcast {} -> vectorsNeedLlvm
+ MO_VF_Broadcast {} -> vectorsNeedLlvm
MO_VF_Neg {} -> vectorsNeedLlvm
where
notUnary = pprPanic "getRegister' (non-unary CmmMachOp with 1 argument):" (pdoc plat expr)
@@ -1170,6 +1174,8 @@ getRegister' config plat expr
MO_FF_Conv {} -> notDyadic
MO_WF_Bitcast {} -> notDyadic
MO_FW_Bitcast {} -> notDyadic
+ MO_V_Broadcast {} -> notDyadic
+ MO_VF_Broadcast {} -> notDyadic
MO_V_Insert {} -> notDyadic
MO_VF_Insert {} -> notDyadic
MO_AlignmentCheck {} -> notDyadic
@@ -1191,6 +1197,8 @@ getRegister' config plat expr
MO_VF_Neg {} -> vectorsNeedLlvm
MO_VF_Mul {} -> vectorsNeedLlvm
MO_VF_Quot {} -> vectorsNeedLlvm
+ MO_V_Shuffle {} -> vectorsNeedLlvm
+ MO_VF_Shuffle {} -> vectorsNeedLlvm
where
notDyadic =
pprPanic "getRegister' (non-dyadic CmmMachOp with 2 arguments): " $
@@ -1210,11 +1218,15 @@ getRegister' config plat expr
-- x86 fnmadd - x * y + z <=> AArch64 fmsub : d = - r1 * r2 + r3
-- x86 fnmsub - x * y - z <=> AArch64 fnmadd: d = - r1 * r2 - r3
- MO_FMA var w -> case var of
- FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
- FMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
- FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
- FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
+ MO_FMA var l w
+ | l == 1
+ -> case var of
+ FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
+ FMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMSub d n m a)
+ FNMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
+ FNMSub -> float3Op w (\d n m a -> unitOL $ FMA FNMAdd d n m a)
+ | otherwise
+ -> vectorsNeedLlvm
MO_V_Insert {} -> vectorsNeedLlvm
MO_VF_Insert {} -> vectorsNeedLlvm
=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -24,6 +24,7 @@ module GHC.CmmToAsm.Format (
isVecFormat,
cmmTypeFormat,
formatToWidth,
+ scalarWidth,
formatInBytes,
isFloatScalarFormat,
scalarFormatFormat,
=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -538,6 +538,8 @@ getRegister' config platform (CmmMachOp mop [x]) -- unary MachOps
MO_XX_Conv _ to -> conversionNop (intFormat to) x
+ MO_V_Broadcast {} -> vectorsNeedLlvm
+ MO_VF_Broadcast {} -> vectorsNeedLlvm
MO_VF_Neg {} -> vectorsNeedLlvm
_ -> panic "PPC.CodeGen.getRegister: no match"
@@ -667,6 +669,8 @@ getRegister' _ _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
MO_VF_Neg {} -> vectorsNeedLlvm
MO_VF_Mul {} -> vectorsNeedLlvm
MO_VF_Quot {} -> vectorsNeedLlvm
+ MO_V_Shuffle {} -> vectorsNeedLlvm
+ MO_VF_Shuffle {} -> vectorsNeedLlvm
_ -> panic "PPC.CodeGen.getRegister: no match"
@@ -692,12 +696,14 @@ getRegister' _ _ (CmmMachOp mop [x, y, z]) -- ternary PrimOps
-- x86 fnmadd - x * y + z ~~ PPC fnmsub rt = -(ra * rc - rb)
-- x86 fnmsub - x * y - z ~~ PPC fnmadd rt = -(ra * rc + rb)
- MO_FMA variant w ->
+ MO_FMA variant l w | l == 1 ->
case variant of
FMAdd -> fma_code w (FMADD FMAdd) x y z
FMSub -> fma_code w (FMADD FMSub) x y z
FNMAdd -> fma_code w (FMADD FNMAdd) x y z
FNMSub -> fma_code w (FMADD FNMSub) x y z
+ | otherwise
+ -> vectorsNeedLlvm
MO_V_Insert {} -> vectorsNeedLlvm
MO_VF_Insert {} -> vectorsNeedLlvm
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -944,6 +944,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
float_const_sse2 where
float_const_sse2
| f == 0.0 = do
+ -- TODO: this mishandles negative zero floating point literals.
let
format = floatFormat w
code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
@@ -951,9 +952,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
-- They all appear to do the same thing --SDM
return (Any format code)
- | otherwise = do
- Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
- loadFloatAmode w addr code
+ | otherwise = getFloatLitRegister lit
-- catch simple cases of zero- or sign-extended load
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _ _]) = do
@@ -1010,9 +1009,10 @@ getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal (GlobalRegUse
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
- sse2 <- sse2Enabled
- sse <- sseEnabled
- avx <- avxEnabled
+ sse4_1 <- sse4_1Enabled
+ sse2 <- sse2Enabled
+ sse <- sseEnabled
+ avx <- avxEnabled
case mop of
MO_F_Neg w -> sse2NegCode w x
@@ -1099,13 +1099,27 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_FS_Truncate from to -> coerceFP2Int from to x
MO_SF_Round from to -> coerceInt2FP from to x
- MO_VF_Neg l w | avx -> vector_float_negate_avx l w x
- | sse && sse2 -> vector_float_negate_sse l w x
+ MO_VF_Neg l w | avx -> vector_float_negate_avx l w x
+ | sse && w == W32 -> vector_float_negate_sse l w x
+ | sse2 && w == W64 -> vector_float_negate_sse l w x
| otherwise
- -> sorry "Please enable the -mavx or -msse, -msse2 flag"
+ -> sorry "Please enable the -mavx or -msse, -msse2 flag"
-- SIMD NCG TODO: add integer negation
MO_VS_Neg {} -> needLlvm mop
+ MO_VF_Broadcast l W32 | avx -> vector_float_broadcast_avx l W32 x
+ | sse4_1 -> vector_float_broadcast_sse l W32 x
+ | otherwise
+ -> sorry "Please enable the -mavx or -msse4 flag"
+ MO_VF_Broadcast l W64 | sse2 -> vector_float_broadcast_sse l W64 x
+ | otherwise -> sorry "Please enable the -msse2 flag"
+ MO_VF_Broadcast {} -> incorrectOperands
+
+ MO_V_Broadcast l W64 | sse2 -> vector_int_broadcast l W64 x
+ | otherwise -> sorry "Please enable the -msse2 flag"
+ -- SIMD NCG TODO: W32, W16, W8
+ MO_V_Broadcast {} -> needLlvm mop
+
-- Binary MachOps
MO_Add {} -> incorrectOperands
MO_Sub {} -> incorrectOperands
@@ -1150,6 +1164,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Rem {} -> incorrectOperands
MO_VU_Quot {} -> incorrectOperands
MO_VU_Rem {} -> incorrectOperands
+ MO_V_Shuffle {} -> incorrectOperands
+ MO_VF_Shuffle {} -> incorrectOperands
MO_VF_Extract {} -> incorrectOperands
MO_VF_Add {} -> incorrectOperands
@@ -1206,37 +1222,117 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx l w expr = do
- tmp <- getNewRegNat (VecFormat l FmtFloat)
- (reg, exp) <- getSomeReg expr
- Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes W32) (CmmFloat 0.0 W32)
- let format = case w of
- W32 -> VecFormat l FmtFloat
- W64 -> VecFormat l FmtDouble
- _ -> pprPanic "Cannot negate vector of width" (ppr w)
- code dst = case w of
- W32 -> exp `appOL` addr_code `snocOL`
- (VBROADCAST format addr tmp) `snocOL`
- (VSUB format (OpReg reg) tmp dst)
- W64 -> exp `appOL` addr_code `snocOL`
- (MOVL format (OpAddr addr) (OpReg tmp)) `snocOL`
- (MOVH format (OpAddr addr) (OpReg tmp)) `snocOL`
- (VSUB format (OpReg reg) tmp dst)
- _ -> pprPanic "Cannot negate vector of width" (ppr w)
- return (Any format code)
+ let fmt :: Format
+ mask :: CmmLit
+ (fmt, mask) = case w of
+ W32 -> (VecFormat l FmtFloat , CmmInt (bit 31) w) -- TODO: these should be negative 0 floating point literals,
+ W64 -> (VecFormat l FmtDouble, CmmInt (bit 63) w) -- but we don't currently have those in Cmm.
+ _ -> panic "AVX floating-point negation: elements must be FF32 or FF64"
+ (maskReg, maskCode) <- getSomeReg (CmmMachOp (MO_VF_Broadcast l w) [CmmLit mask])
+ (reg, exp) <- getSomeReg expr
+ let code dst = maskCode `appOL`
+ exp `snocOL`
+ (VMOVU fmt (OpReg reg) (OpReg dst)) `snocOL`
+ (VXOR fmt (OpReg maskReg) dst dst)
+ return (Any fmt code)
vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse l w expr = do
- tmp <- getNewRegNat (VecFormat l FmtFloat)
- (reg, exp) <- getSomeReg expr
- let format = case w of
- W32 -> VecFormat l FmtFloat
- W64 -> VecFormat l FmtDouble
- _ -> pprPanic "Cannot negate vector of width" (ppr w)
+ let fmt :: Format
+ mask :: CmmLit
+ (fmt, mask) = case w of
+ W32 -> (VecFormat l FmtFloat , CmmInt (bit 31) w) -- Same comment as for vector_float_negate_avx,
+ W64 -> (VecFormat l FmtDouble, CmmInt (bit 63) w) -- these should be -0.0 CmmFloat values.
+ _ -> panic "SSE floating-point negation: elements must be FF32 or FF64"
+ (maskReg, maskCode) <- getSomeReg (CmmMachOp (MO_VF_Broadcast l w) [CmmLit mask])
+ (reg, exp) <- getSomeReg expr
+ let code dst = maskCode `appOL`
+ exp `snocOL`
+ (MOVU fmt (OpReg reg) (OpReg dst)) `snocOL`
+ (XOR fmt (OpReg maskReg) (OpReg dst))
+ return (Any fmt code)
+
+ -----------------------
+
+ -- Like 'getSomeReg', but with special handling for int literals
+ -- used as floating point values, to work around the fact that we don't
+ -- have negative zero floating point literals in Cmm yet.
+ --
+ -- This should get removed once we have negative zero in CmmFloat.
+ get_float_broadcast_value_reg expr = case expr of
+ CmmLit lit -> do
+ r <- getFloatLitRegister lit
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+ _ -> getSomeReg expr
+
+ vector_float_broadcast_avx :: Length
+ -> Width
+ -> CmmExpr
+ -> NatM Register
+ vector_float_broadcast_avx len W32 expr
+ = do
+ (reg, exp) <- get_float_broadcast_value_reg expr
+ let f = VecFormat len FmtFloat
+ addr = spRel platform 0
+ in return $ Any f (\dst -> exp `snocOL`
+ (VMOVU f (OpReg reg) (OpAddr addr)) `snocOL`
+ (VBROADCAST f addr dst))
+ vector_float_broadcast_avx l w _
+ -- NB: for some reason, VBROADCASTSD does not support xmm, only ymm.
+ = pprPanic "vector_float_broadcast_avx" (text "l" <+> ppr l $$ text "w" <+> ppr w)
+
+ vector_float_broadcast_sse :: Length
+ -> Width
+ -> CmmExpr
+ -> NatM Register
+ vector_float_broadcast_sse len W32 expr
+ = do
+ (reg, exp) <- get_float_broadcast_value_reg expr
+ let f = VecFormat len FmtFloat
+ addr = spRel platform 0
code dst = exp `snocOL`
- (XOR format (OpReg tmp) (OpReg tmp)) `snocOL`
- (MOVU format (OpReg tmp) (OpReg dst)) `snocOL`
- (SUB format (OpReg reg) (OpReg dst))
- return (Any format code)
+ (MOVU f (OpReg reg) (OpAddr addr)) `snocOL`
+ (insertps $ 0b1110) `snocOL`
+ (insertps $ 16) `snocOL`
+ (insertps $ 32) `snocOL`
+ (insertps $ 48)
+ where
+ insertps imm =
+ INSERTPS f (ImmInt imm) (OpAddr addr) dst
+
+ in return $ Any f code
+ vector_float_broadcast_sse len W64 expr
+ = do
+ (reg, exp) <- get_float_broadcast_value_reg expr
+ let f = VecFormat len FmtDouble
+ addr = spRel platform 0
+ in return $ Any f (\dst -> exp `snocOL`
+ (MOVU f (OpReg reg) (OpAddr addr)) `snocOL`
+ (MOVL f (OpAddr addr) (OpReg dst)) `snocOL`
+ (MOVH f (OpAddr addr) (OpReg dst)))
+ vector_float_broadcast_sse _ _ c
+ = pprPanic "Broadcast not supported for : " (pdoc platform c)
+
+ vector_int_broadcast :: Length
+ -> Width
+ -> CmmExpr
+ -> NatM Register
+ vector_int_broadcast len W64 expr
+ = do
+ (reg, exp) <- getSomeReg expr
+ let fmt = VecFormat len FmtInt64
+ return $ Any fmt (\dst -> exp `snocOL`
+ (MOVD II64 (OpReg reg) (OpReg dst)) `snocOL`
+ (PUNPCKLQDQ fmt (OpReg dst) dst)
+ )
+ vector_int_broadcast _ _ c
+ = pprPanic "Broadcast not supported for : " (pdoc platform c)
+
getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
sse2 <- sse2Enabled
@@ -1294,6 +1390,16 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
+ MO_VF_Shuffle l w is
+ | l * widthInBits w == 128
+ -> if
+ | avx
+ -> vector_shuffle_float l w x y is
+ | otherwise
+ -> sorry "Please enable the -mavx flag"
+ | otherwise
+ -> sorry "Please use -fllvm for wide shuffle instructions"
+
MO_VF_Extract l W32 | avx -> vector_float_extract l W32 x y
| sse -> vector_float_extract_sse l W32 x y
| otherwise
@@ -1332,6 +1438,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
-> sorry "Please enable the -mavx or -msse flag"
-- SIMD NCG TODO: integer vector operations
+ MO_V_Shuffle {} -> needLlvm mop
MO_V_Add {} -> needLlvm mop
MO_V_Sub {} -> needLlvm mop
MO_V_Mul {} -> needLlvm mop
@@ -1356,6 +1463,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_AlignmentCheck {} -> incorrectOperands
MO_VS_Neg {} -> incorrectOperands
MO_VF_Neg {} -> incorrectOperands
+ MO_V_Broadcast {} -> incorrectOperands
+ MO_VF_Broadcast {} -> incorrectOperands
-- Ternary MachOps
MO_FMA {} -> incorrectOperands
@@ -1674,13 +1783,95 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_int_extract_sse _ w c e
= pprPanic "Unsupported SSE floating-point vector extract" (pdoc platform c $$ pdoc platform e $$ ppr w)
+ vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
+ vector_shuffle_float l w v1 v2 is = do
+ (r1, exp1) <- getSomeReg v1
+ (r2, exp2) <- getSomeReg v2
+ let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble)
+ code dst
+ = exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst)
+ return (Any fmt code)
+
+ shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
+ shuffleInstructions fmt v1 v2 is dst =
+ case fmt of
+ VecFormat 2 FmtDouble ->
+ case is of
+ [i1, i2] -> case (i1, i2) of
+ (0,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v1 dst)
+ (1,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v1 dst)
+ (2,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v2 dst)
+ (3,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v2 dst)
+ (0,1) -> unitOL (VMOVU fmt (OpReg v1) (OpReg dst))
+ (2,3) -> unitOL (VMOVU fmt (OpReg v2) (OpReg dst))
+ (1,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v1 dst)
+ (3,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v2 dst)
+ (0,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v1 dst)
+ (2,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v2 dst)
+ (0,3) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v2) v1 dst)
+ (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
+ (1,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v1 dst)
+ (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
+ (1,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v1 dst)
+ (3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
+ _ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
+ _ -> pprPanic "vector shuffle: wrong number of indices (expected 2)" (ppr is)
+ VecFormat 4 FmtFloat
+ -- indices 0 <= i <= 7
+ | all ( (>= 0) <&&> (<= 7) ) is ->
+ case is of
+ [i1, i2, i3, i4]
+ | all ( <= 3 ) is
+ , let imm = i1 + i2 `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v1 dst)
+ | all ( >= 4 ) is
+ , let [j1, j2, j3, j4] = map ( subtract 4 ) is
+ imm = j1 + j2 `shiftL` 2 + j3 `shiftL` 4 + j4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v2 dst)
+ | i1 <= 3, i2 <= 3
+ , i3 >= 4, i4 >= 4
+ , let imm = i1 + i2 `shiftL` 2 + (i3 - 4) `shiftL` 4 + (i4 - 4) `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
+ | i1 >= 4, i2 >= 4
+ , i3 <= 3, i4 <= 3
+ , let imm = (i1 - 4) + (i2 - 4) `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
+ | otherwise
+ ->
+ -- Fall-back code with 4 INSERTPS operations.
+ -- SIMD NCG TODO: handle more cases with better lowering.
+ let -- bits: ss_dd_zzzz
+ -- ss: pick source location
+ -- dd: pick destination location
+ -- zzzz: pick locations to be zeroed
+ insertImm src dst = shiftL ( src `mod` 4 ) 6
+ .|. shiftL dst 4
+ vec src = if src >= 4 then v2 else v1
+ in unitOL
+ (INSERTPS fmt (ImmInt $ insertImm i1 0 .|. 0b1110) (OpReg $ vec i1) dst)
+ `snocOL`
+ (INSERTPS fmt (ImmInt $ insertImm i2 1) (OpReg $ vec i2) dst)
+ `snocOL`
+ (INSERTPS fmt (ImmInt $ insertImm i3 2) (OpReg $ vec i3) dst)
+ `snocOL`
+ (INSERTPS fmt (ImmInt $ insertImm i4 3) (OpReg $ vec i4) dst)
+ _ -> pprPanic "vector shuffle: wrong number of indices (expected 4)" (ppr is)
+ | otherwise
+ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 7" (ppr is)
+ _ ->
+ pprPanic "vector shuffle: unsupported format" (ppr fmt)
+
getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
sse4_1 <- sse4_1Enabled
sse2 <- sse2Enabled
sse <- sseEnabled
case mop of
-- Floating point fused multiply-add operations @ ± x*y ± z@
- MO_FMA var w -> genFMA3Code w var x y z
+ MO_FMA var l w
+ | l * widthInBits w > 256
+ -> sorry "Please use -fllvm for wide vector FMA support"
+ | otherwise
+ -> genFMA3Code l w var x y z
-- Ternary vector operations
MO_VF_Insert l W32 | sse4_1 && sse -> vector_float_insert_sse l W32 x y z
@@ -1875,33 +2066,48 @@ getRegister' platform is32Bit (CmmLit lit)
-- note2: all labels are small, because we're assuming the
-- small memory model. See Note [%rip-relative addressing on x86-64].
-getRegister' platform _ (CmmLit lit)
- | isVecType cmmtype = vectorRegister cmmtype
- | otherwise = standardRegister cmmtype
- where
- cmmtype = cmmLitType platform lit
- vectorRegister ctype
- | case lit of { CmmVec fs -> all (\case { CmmInt i _ -> i == 0; CmmFloat f _ -> f == 0; _ -> False }) fs; _ -> False }
- = -- NOTE:
- -- This operation is only used to zero a register. For loading a
- -- vector literal there are pack and broadcast operations
- let format = cmmTypeFormat ctype
- code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
- in return (Any format code)
+getRegister' platform _ (CmmLit lit) =
+ case fmt of
+ VecFormat l sFmt
+ | case lit of { CmmVec fs -> all is_zero fs; _ -> False }
+ -> let code dst = unitOL (XOR fmt (OpReg dst) (OpReg dst))
+ in return (Any fmt code)
+ | Just f <- case lit of { CmmVec (f:fs) | all (== f) fs -> Just f; _ -> Nothing }
+ -> do config <- getConfig
+ let w = scalarWidth sFmt
+ broadcast = if isFloatScalarFormat sFmt
+ then MO_VF_Broadcast l w
+ else MO_V_Broadcast l w
+ (valReg, valCode) <- getSomeReg (CmmMachOp broadcast [CmmLit f])
+ let code dst =
+ valCode `snocOL`
+ (mkRegRegMoveInstr config fmt valReg dst)
+ return $ Any fmt code
| otherwise
- = pprPanic "getRegister': no support for (nonzero) vector literals" $
- vcat [ text "lit:" <+> ppr lit ]
- -- SIMD NCG TODO: can we do better here?
- standardRegister ctype
- = do
- let format = cmmTypeFormat ctype
- imm = litToImm lit
- code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
- return (Any format code)
+ -- SIMD NCG TODO: handle this case as well.
+ -> pprPanic "getRegister': non-constant vector literals not supported"
+ (ppr lit)
+ where
+ is_zero (CmmInt i _) = i == 0
+ is_zero (CmmFloat f _) = f == 0 -- TODO: mishandles negative zero
+ is_zero _ = False
+
+ _ -> let imm = litToImm lit
+ code dst = unitOL (MOV fmt (OpImm imm) (OpReg dst))
+ in return (Any fmt code)
+ where
+ cmmTy = cmmLitType platform lit
+ fmt = cmmTypeFormat cmmTy
getRegister' platform _ other
= pprPanic "getRegister(x86)" (pdoc platform other)
+getFloatLitRegister :: CmmLit -> NatM Register
+getFloatLitRegister lit = do
+ let w :: Width
+ w = case lit of { CmmInt _ w -> w; CmmFloat _ w -> w; _ -> panic "getFloatLitRegister" (ppr lit) }
+ Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
+ loadFloatAmode w addr code
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
-> NatM (Reg -> InstrBlock)
@@ -3852,10 +4058,11 @@ _ `regClashesWithOp` _ = False
-- | Generate code for a fused multiply-add operation, of the form @± x * y ± z@,
-- with 3 operands (FMA3 instruction set).
-genFMA3Code :: Width
+genFMA3Code :: Length
+ -> Width
-> FMASign
-> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
-genFMA3Code w signs x y z = do
+genFMA3Code l w signs x y z = do
config <- getConfig
-- For the FMA instruction, we want to compute x * y + z
--
@@ -3883,7 +4090,11 @@ genFMA3Code w signs x y z = do
-- only possible if the other arguments don't use the destination register.
-- We check for this and if there is a conflict we move the result only after
-- the computation. See #24496 how this went wrong in the past.
- let rep = floatFormat w
+ let rep
+ | l == 1
+ = floatFormat w
+ | otherwise
+ = vecFormat (cmmVec l $ cmmFloat w)
(y_reg, y_code) <- getNonClobberedReg y
(z_op, z_code) <- getNonClobberedOperand z
x_code <- getAnyReg x
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -259,6 +259,8 @@ data Instr
| AND Format Operand Operand
| OR Format Operand Operand
| XOR Format Operand Operand
+ -- | AVX bitwise logical XOR operation
+ | VXOR Format Operand Reg Reg
| NOT Format Operand
| NEGI Format Operand -- NEG instruction (name clash with Cond)
| BSWAP Format Reg
@@ -295,8 +297,9 @@ data Instr
-- | FMA3 fused multiply-add operations.
| FMA3 Format FMASign FMAPermutation Operand Reg Reg
- -- src3 (r/m), src2 (r), dst/src1 (r)
- -- This is exactly reversed from how intel lists the arguments.
+ -- For the FMA213 permutation (the only one we use currently),
+ -- this is: src3 (r/m), src2 (r), dst/src1 (r)
+ -- (NB: this isexactly reversed from how Intel lists the arguments.)
-- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
-- are Operand Reg.
@@ -476,9 +479,16 @@ regUsageOfInstr platform instr
OR fmt src dst -> usageRM fmt src dst
XOR fmt (OpReg src) (OpReg dst)
- | src == dst -> mkRU [] [mk fmt dst]
+ | src == dst
+ -> mkRU [] [mk fmt dst]
+ XOR fmt src dst
+ -> usageRM fmt src dst
+ VXOR fmt (OpReg src1) src2 dst
+ | src1 == src2, src1 == dst
+ -> mkRU [] [mk fmt dst]
+ VXOR fmt src1 src2 dst
+ -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst]
- XOR fmt src dst -> usageRM fmt src dst
NOT fmt op -> usageM fmt op
BSWAP fmt reg -> mkRU [mk fmt reg] [mk fmt reg]
NEGI fmt op -> usageM fmt op
@@ -721,6 +731,7 @@ patchRegsOfInstr platform instr env
AND fmt src dst -> patch2 (AND fmt) src dst
OR fmt src dst -> patch2 (OR fmt) src dst
XOR fmt src dst -> patch2 (XOR fmt) src dst
+ VXOR fmt src1 src2 dst -> VXOR fmt (patchOp src1) (env src2) (env dst)
NOT fmt op -> patch1 (NOT fmt) op
BSWAP fmt reg -> BSWAP fmt (env reg)
NEGI fmt op -> patch1 (NEGI fmt) op
@@ -763,6 +774,8 @@ patchRegsOfInstr platform instr env
LOCATION {} -> instr
UNWIND {} -> instr
DELTA _ -> instr
+ LDATA {} -> instr
+ NEWBLOCK {} -> instr
JXX _ _ -> instr
JXX_GBL _ _ -> instr
@@ -829,8 +842,6 @@ patchRegsOfInstr platform instr env
PUNPCKLQDQ fmt src dst
-> PUNPCKLQDQ fmt (patchOp src) (env dst)
- _other -> panic "patchRegs: unrecognised instr"
-
where
patch1 :: (Operand -> a) -> Operand -> a
patch1 insn op = insn $! patchOp op
=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -752,11 +752,14 @@ pprInstr platform i = case i of
XOR format src dst
-> pprFormatOpOp (text "xor") format src dst
+ VXOR fmt src1 src2 dst
+ -> pprVxor fmt src1 src2 dst
+
POPCNT format src dst
-> pprOpOp (text "popcnt") format src (OpReg dst)
LZCNT format src dst
- -> pprOpOp (text "lzcnt") format src (OpReg dst)
+ -> pprOpOp (text "lzcnt") format src (OpReg dst)
TZCNT format src dst
-> pprOpOp (text "tzcnt") format src (OpReg dst)
@@ -1304,6 +1307,23 @@ pprInstr platform i = case i of
pprReg platform format reg3
]
+ pprVxor :: Format -> Operand -> Reg -> Reg -> doc
+ pprVxor fmt src1 src2 dst
+ = line $ hcat [
+ pprGenMnemonic mem fmt,
+ pprOperand platform fmt src1,
+ comma,
+ pprReg platform fmt src2,
+ comma,
+ pprReg platform fmt dst
+ ]
+ where
+ mem = case fmt of
+ VecFormat _ FmtFloat -> text "vxorps"
+ VecFormat _ FmtDouble -> text "vxorpd"
+ _ -> pprPanic "GHC.CmmToAsm.X86.Ppr.pprVxor: elementy type must be Float or Double"
+ (ppr fmt)
+
pprInsert :: Line doc -> Format -> Imm -> Operand -> Reg -> doc
pprInsert name format off src dst
= line $ hcat [
=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -727,7 +727,7 @@ pprMachOp_for_C platform mop = case mop of
MO_F_Quot _ -> char '/'
-- Floating-point fused multiply-add operations
- MO_FMA FMAdd w ->
+ MO_FMA FMAdd 1 w ->
case w of
W32 -> text "fmaf"
W64 -> text "fma"
@@ -736,10 +736,15 @@ pprMachOp_for_C platform mop = case mop of
(text "FMAdd")
(panic $ "PprC.pprMachOp_for_C: FMAdd unsupported"
++ "at width " ++ show w)
- MO_FMA var _ ->
- pprTrace "offending mop:"
+ MO_FMA var l width
+ | l == 1
+ -> pprTrace "offending mop:"
(text $ "FMA " ++ show var)
(panic $ "PprC.pprMachOp_for_C: should have been handled earlier!")
+ | otherwise
+ -> pprTrace "offending mop:"
+ (text $ "FMA " ++ show var ++ " " ++ show l ++ " " ++ show width)
+ (panic $ "PprC.pprMachOp_for_C: unsupported vector operation")
-- Signed comparisons
MO_S_Ge _ -> text ">="
@@ -828,6 +833,14 @@ pprMachOp_for_C platform mop = case mop of
MO_AlignmentCheck {} -> panic "-falignment-sanitisation not supported by unregisterised backend"
-- SIMD vector instructions: currently unsupported
+ MO_V_Shuffle {} -> pprTrace "offending mop:"
+ (text "MO_V_Shuffle")
+ (panic $ "PprC.pprMachOp_for_C: MO_V_Shuffle"
+ ++ "unsupported by the unregisterised backend")
+ MO_VF_Shuffle {} -> pprTrace "offending mop:"
+ (text "MO_VF_Shuffle")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Shuffle"
+ ++ "unsupported by the unregisterised backend")
MO_V_Insert {} -> pprTrace "offending mop:"
(text "MO_V_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_V_Insert"
@@ -868,6 +881,14 @@ pprMachOp_for_C platform mop = case mop of
(text "MO_VU_Rem")
(panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
++ "unsupported by the unregisterised backend")
+ MO_V_Broadcast {} -> pprTrace "offending mop:"
+ (text "MO_V_Broadcast")
+ (panic $ "PprC.pprMachOp_for_C: MO_V_Broadcast"
+ ++ "unsupported by the unregisterised backend")
+ MO_VF_Broadcast {} -> pprTrace "offending mop:"
+ (text "MO_VF_Broadcast")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast"
+ ++ "unsupported by the unregisterised backend")
MO_VF_Insert {} -> pprTrace "offending mop:"
(text "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1460,6 +1460,9 @@ genMachOp _ op [x] = case op of
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negateVec vecty all0s LM_MO_FSub
+ MO_V_Broadcast l w -> genBroadcastOp l w x
+ MO_VF_Broadcast l w -> genBroadcastOp l w x
+
MO_RelaxedRead w -> exprToVar (CmmLoad x (cmmBits w) NaturallyAligned)
MO_AlignmentCheck _ _ -> panic "-falignment-sanitisation is not supported by -fllvm"
@@ -1491,7 +1494,7 @@ genMachOp _ op [x] = case op of
MO_F_Mul _ -> panicOp
MO_F_Quot _ -> panicOp
- MO_FMA _ _ -> panicOp
+ MO_FMA _ _ _ -> panicOp
MO_F_Eq _ -> panicOp
MO_F_Ne _ -> panicOp
@@ -1523,6 +1526,9 @@ genMachOp _ op [x] = case op of
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
+ MO_V_Shuffle {} -> panicOp
+ MO_VF_Shuffle {} -> panicOp
+
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
MO_VF_Mul _ _ -> panicOp
@@ -1676,7 +1682,7 @@ genMachOp_slow opt op [x, y] = case op of
MO_F_Mul _ -> genBinMach LM_MO_FMul
MO_F_Quot _ -> genBinMach LM_MO_FDiv
- MO_FMA _ _ -> panicOp
+ MO_FMA _ _ _ -> panicOp
MO_And _ -> genBinMach LM_MO_And
MO_Or _ -> genBinMach LM_MO_Or
@@ -1716,9 +1722,14 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
+ MO_VF_Broadcast {} -> panicOp
+ MO_V_Broadcast {} -> panicOp
MO_V_Insert {} -> panicOp
MO_VF_Insert {} -> panicOp
+ MO_V_Shuffle _ _ is -> genShuffleOp is x y
+ MO_VF_Shuffle _ _ is -> genShuffleOp is x y
+
MO_VF_Neg {} -> panicOp
MO_RelaxedRead {} -> panicOp
@@ -1815,7 +1826,7 @@ genMachOp_slow _opt op [x, y, z] = do
panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: non-ternary op encountered "
++ "with three arguments! (" ++ show op ++ ")"
case op of
- MO_FMA var width ->
+ MO_FMA var lg width ->
case var of
-- LLVM only has the fmadd variant.
FMAdd -> genFmaOp x y z
@@ -1825,12 +1836,43 @@ genMachOp_slow _opt op [x, y, z] = do
FNMAdd -> genFmaOp (neg x) y z
FNMSub -> genFmaOp (neg x) y (neg z)
where
- neg x = CmmMachOp (MO_F_Neg width) [x]
+ neg x
+ | lg == 1
+ = CmmMachOp (MO_F_Neg width) [x]
+ | otherwise
+ = CmmMachOp (MO_VF_Neg lg width) [x]
_ -> panicOp
-- More than three expressions, invalid!
genMachOp_slow _ _ _ = panic "genMachOp_slow: More than 3 expressions in MachOp!"
+genBroadcastOp :: Int -> Width -> CmmExpr -> LlvmM ExprData
+genBroadcastOp lg _width x = runExprData $ do
+ -- To broadcast a scalar x as a vector v:
+ -- 1. insert x at the 0 position of the zero vector
+ -- 2. shuffle x into all positions
+ var_x <- exprToVarW x
+ let tx = getVarType var_x
+ tv = LMVector lg tx
+ z = if isFloat tx
+ then LMFloatLit 0 tx
+ else LMIntLit 0 tx
+ zs = LMLitVar $ LMVectorLit $ replicate lg z
+ w <- doExprW tv $ Insert zs var_x (LMLitVar $ LMIntLit 0 (LMInt 32))
+ doExprW tv $ Shuffle w w (replicate lg 0)
+
+genShuffleOp :: [Int] -> CmmExpr -> CmmExpr -> LlvmM ExprData
+genShuffleOp is x y = runExprData $ do
+ vx <- exprToVarW x
+ vy <- exprToVarW y
+ let tx = getVarType vx
+ ty = getVarType vy
+ Panic.massertPpr
+ (tx == ty)
+ (vcat [ text "shuffle: mismatched arg types"
+ , ppLlvmType tx, ppLlvmType ty ])
+ doExprW tx $ Shuffle vx vy is
+
-- | Generate code for a fused multiply-add operation.
genFmaOp :: CmmExpr -> CmmExpr -> CmmExpr -> LlvmM ExprData
genFmaOp x y z = runExprData $ do
@@ -1847,6 +1889,12 @@ genFmaOp x y z = runExprData $ do
let fname = case tx of
LMFloat -> fsLit "llvm.fma.f32"
LMDouble -> fsLit "llvm.fma.f64"
+ LMVector 4 LMFloat -> fsLit "llvm.fma.v4f32"
+ LMVector 8 LMFloat -> fsLit "llvm.fma.v8f32"
+ LMVector 16 LMFloat -> fsLit "llvm.fma.v16f32"
+ LMVector 2 LMDouble -> fsLit "llvm.fma.v2f64"
+ LMVector 4 LMDouble -> fsLit "llvm.fma.v4f64"
+ LMVector 8 LMDouble -> fsLit "llvm.fma.v8f64"
_ -> pprPanic "CmmToLlvm.genFmaOp: unsupported type" (ppLlvmType tx)
fptr <- liftExprData $ getInstrinct fname ty [tx, ty, tz]
doExprW tx $ Call StdCall fptr [vx, vy, vz] [ReadNone, NoUnwind]
=====================================
compiler/GHC/Llvm/Ppr.hs
=====================================
@@ -281,6 +281,7 @@ ppLlvmExpression opts expr
Extract vec idx -> ppExtract opts vec idx
ExtractV struct idx -> ppExtractV opts struct idx
Insert vec elt idx -> ppInsert opts vec elt idx
+ Shuffle v1 v2 idxs -> ppShuffle opts v1 v2 idxs
GetElemPtr inb ptr indexes -> ppGetElementPtr opts inb ptr indexes
Load ptr align -> ppLoad opts ptr align
ALoad ord st ptr -> ppALoad opts ord st ptr
@@ -577,6 +578,15 @@ ppInsert opts vec elt idx =
{-# SPECIALIZE ppInsert :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LlvmVar -> SDoc #-}
{-# SPECIALIZE ppInsert :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LlvmVar -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+ppShuffle :: IsLine doc => LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> doc
+ppShuffle opts v1 v2 idxs =
+ text "shufflevector"
+ <+> ppLlvmType (getVarType v1) <+> ppName opts v1 <> comma
+ <+> ppLlvmType (getVarType v2) <+> ppName opts v2 <> comma
+ <+> ppLlvmType (LMVector (length idxs) (LMInt 32)) <+> ppLit opts (LMVectorLit $ map ((`LMIntLit` (LMInt 32)) . fromIntegral) idxs)
+{-# SPECIALIZE ppShuffle :: LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> SDoc #-}
+{-# SPECIALIZE ppShuffle :: LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+
ppMetaAnnotExpr :: IsLine doc => LlvmCgConfig -> [MetaAnnot] -> LlvmExpression -> doc
ppMetaAnnotExpr opts meta expr =
ppLlvmExpression opts expr <> ppMetaAnnots opts meta
=====================================
compiler/GHC/Llvm/Syntax.hs
=====================================
@@ -237,6 +237,10 @@ data LlvmExpression
-}
| Insert LlvmVar LlvmVar LlvmVar
+ {- | Shuffle two vectors into a destination vector using given indices
+ -}
+ | Shuffle LlvmVar LlvmVar [Int]
+
{- |
Allocate amount * sizeof(tp) bytes on the heap
* tp: LlvmType to reserve room for
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -949,16 +949,8 @@ emitPrimOp cfg primop =
-- SIMD primops
(VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do
checkVecCompatibility cfg vcat n w
- doVecPackOp ty zeros (replicate n e) res
+ doVecBroadcastOp ty e res
where
- zeros :: CmmExpr
- zeros = CmmLit $ CmmVec (replicate n zero)
-
- zero :: CmmLit
- zero = case vcat of
- IntVec -> CmmInt 0 w
- WordVec -> CmmInt 0 w
- FloatVec -> CmmFloat 0 w
ty :: CmmType
ty = vecVmmType vcat n w
@@ -1085,6 +1077,10 @@ emitPrimOp cfg primop =
ty :: CmmType
ty = vecCmmCat vcat w
+ VecShuffleOp vcat n w -> \ args -> opIntoRegs $ \ [res] -> do
+ checkVecCompatibility cfg vcat n w
+ doShuffleOp (vecVmmType vcat n w) args res
+
-- Prefetch
PrefetchByteArrayOp3 -> \args -> opIntoRegs $ \[] ->
doPrefetchByteArrayOp 3 args
@@ -1491,10 +1487,10 @@ emitPrimOp cfg primop =
DoubleDivOp -> opTranslate (MO_F_Quot W64)
DoubleNegOp -> opTranslate (MO_F_Neg W64)
- DoubleFMAdd -> fmaOp FMAdd W64
- DoubleFMSub -> fmaOp FMSub W64
- DoubleFNMAdd -> fmaOp FNMAdd W64
- DoubleFNMSub -> fmaOp FNMSub W64
+ DoubleFMAdd -> fmaOp FMAdd 1 W64
+ DoubleFMSub -> fmaOp FMSub 1 W64
+ DoubleFNMAdd -> fmaOp FNMAdd 1 W64
+ DoubleFNMSub -> fmaOp FNMSub 1 W64
-- Float ops
@@ -1511,10 +1507,10 @@ emitPrimOp cfg primop =
FloatDivOp -> opTranslate (MO_F_Quot W32)
FloatNegOp -> opTranslate (MO_F_Neg W32)
- FloatFMAdd -> fmaOp FMAdd W32
- FloatFMSub -> fmaOp FMSub W32
- FloatFNMAdd -> fmaOp FNMAdd W32
- FloatFNMSub -> fmaOp FNMSub W32
+ FloatFMAdd -> fmaOp FMAdd 1 W32
+ FloatFMSub -> fmaOp FMSub 1 W32
+ FloatFNMAdd -> fmaOp FNMAdd 1 W32
+ FloatFNMSub -> fmaOp FNMSub 1 W32
-- Vector ops
@@ -1542,6 +1538,12 @@ emitPrimOp cfg primop =
(VecRemOp WordVec n w) -> opTranslate (MO_VU_Rem n w)
(VecNegOp WordVec _ _) -> \_ -> panic "unsupported primop"
+ -- Vector FMA instructions
+ VecFMAdd _ n w -> fmaOp FMAdd n w
+ VecFMSub _ n w -> fmaOp FMSub n w
+ VecFNMAdd _ n w -> fmaOp FNMAdd n w
+ VecFNMSub _ n w -> fmaOp FNMSub n w
+
-- Conversions
IntToDoubleOp -> opTranslate (MO_SF_Round (wordWidth platform) W64)
@@ -1839,10 +1841,11 @@ emitPrimOp cfg primop =
allowFMA = stgToCmmAllowFMAInstr cfg
- fmaOp :: FMASign -> Width -> [CmmActual] -> PrimopCmmEmit
- fmaOp signs w args@[arg_x, arg_y, arg_z]
- | allowFMA signs
- = opTranslate (MO_FMA signs w) args
+ fmaOp :: FMASign -> Length -> Width -> [CmmActual] -> PrimopCmmEmit
+ fmaOp signs l w args@[arg_x, arg_y, arg_z]
+ | allowFMA signs
+ || l > 1 -- (always use the MachOp for vector FMA)
+ = opTranslate (MO_FMA signs l w) args
| otherwise
= case signs of
@@ -1851,12 +1854,16 @@ emitPrimOp cfg primop =
-- Other fused multiply-add operations are implemented in terms of fmadd
-- This is sound: it does not lose any precision.
- FMSub -> fmaOp FMAdd w [arg_x, arg_y, neg arg_z]
- FNMAdd -> fmaOp FMAdd w [neg arg_x, arg_y, arg_z]
- FNMSub -> fmaOp FMAdd w [neg arg_x, arg_y, neg arg_z]
+ FMSub -> fmaOp FMAdd l w [arg_x, arg_y, neg arg_z]
+ FNMAdd -> fmaOp FMAdd l w [neg arg_x, arg_y, arg_z]
+ FNMSub -> fmaOp FMAdd l w [neg arg_x, arg_y, neg arg_z]
where
- neg x = CmmMachOp (MO_F_Neg w) [x]
- fmaOp _ _ _ = panic "fmaOp: wrong number of arguments (expected 3)"
+ neg x
+ | l == 1
+ = CmmMachOp (MO_F_Neg w) [x]
+ | otherwise
+ = CmmMachOp (MO_VF_Neg l w) [x]
+ fmaOp _ _ _ _ = panic "fmaOp: wrong number of arguments (expected 3)"
data PrimopCmmEmit
-- | Out of line fake primop that's actually just a foreign call to other
@@ -2588,6 +2595,21 @@ checkVecCompatibility cfg vcat l w =
------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
+doVecBroadcastOp :: CmmType -- Type of vector
+ -> CmmExpr -- Element
+ -> CmmFormal -- Destination for result
+ -> FCode ()
+doVecBroadcastOp ty e dst
+ | isFloatType (vecElemType ty)
+ = emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid) [e])
+ | otherwise
+ = emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Broadcast len wid) [e])
+ where
+ len :: Length
+ len = vecLength ty
+ wid :: Width
+ wid = typeWidth (vecElemType ty)
+
doVecPackOp :: CmmType -- Type of vector
-> [CmmExpr] -- Elements
-> CmmFormal -- Destination for result
@@ -2666,6 +2688,36 @@ doVecInsertOp ty src e idx res = do
wid :: Width
wid = typeWidth (vecElemType ty)
+------------------------------------------------------------------------------
+-- Shuffles
+
+doShuffleOp :: CmmType -> [CmmExpr] -> LocalReg -> FCode ()
+doShuffleOp ty (v1:v2:idxs) res
+ | isVecType ty
+ = case mapMaybe idx_maybe idxs of
+ is
+ | length is == len
+ -> emitAssign (CmmLocal res) (CmmMachOp (mo is) [v1,v2])
+ | otherwise
+ -> pprPanic "doShuffleOp" $
+ vcat [ text "shuffle indices must be literals, 0 <= i <" <+> ppr len ]
+ | otherwise
+ = pprPanic "doShuffleOp" $
+ vcat [ text "non-vector argument type:" <+> ppr ty ]
+ where
+ len = vecLength ty
+ wid = typeWidth $ vecElemType ty
+ mo = if isFloatType (vecElemType ty)
+ then MO_VF_Shuffle len wid
+ else MO_V_Shuffle len wid
+ idx_maybe (CmmLit (CmmInt i _))
+ | let j :: Int; j = fromInteger i
+ , j >= 0, j < 2 * len
+ = Just j
+ idx_maybe _ = Nothing
+doShuffleOp _ _ _ =
+ panic "doShuffleOp: wrong number of arguments"
+
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -1192,12 +1192,18 @@ genPrim prof bound ty op = case op of
VecReadOffAddrOp _ _ _ -> unhandledPrimop op
VecWriteOffAddrOp _ _ _ -> unhandledPrimop op
+ VecFMAdd {} -> unhandledPrimop op
+ VecFMSub {} -> unhandledPrimop op
+ VecFNMAdd {} -> unhandledPrimop op
+ VecFNMSub {} -> unhandledPrimop op
+
VecIndexScalarByteArrayOp _ _ _ -> unhandledPrimop op
VecReadScalarByteArrayOp _ _ _ -> unhandledPrimop op
VecWriteScalarByteArrayOp _ _ _ -> unhandledPrimop op
VecIndexScalarOffAddrOp _ _ _ -> unhandledPrimop op
VecReadScalarOffAddrOp _ _ _ -> unhandledPrimop op
VecWriteScalarOffAddrOp _ _ _ -> unhandledPrimop op
+ VecShuffleOp _ _ _ -> unhandledPrimop op
PrefetchByteArrayOp3 -> noOp
PrefetchMutableByteArrayOp3 -> noOp
=====================================
docs/users_guide/9.12.1-notes.rst
=====================================
@@ -111,6 +111,19 @@ Runtime system
- Usage of deprecated primops is now correctly reported (#19629).
+- New fused multiply-add instructions for vectors of floating-point values,
+ such as ``fmaddFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# -> FloatX4#`` and
+ ``fnmsubDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# -> DoubleX2#``.
+ These follow the same semantics as ``fmadd``/``fmsub``/``fnmadd``/``fnmsub``,
+ operating in parallel on vectors of floating-point values.
+
+- New vector shuffle instructions, such as ``shuffleFloatX4# :: FloatX4# -> FloatX4# -> (# Int#, Int#, Int#, Int# #) -> FloatX4#``.
+ These instructions take two input vectors and a collection of indices (which must
+ be compile-time literal integers), and constructs a result vector by extracting
+ out the values at those indices. For instance, ``shuffleFloatX4#`` on input vectors with
+ components ``(# 0.1#, 11.1#, 22.1#, 33.1# #)`` and ``(# 44.1#, 55.1#, 66.1#, 77.1# #)``,
+ and indices ``(# 4#, 3#, 6#, 1# #)``, will return a vector with components
+ ``(# 44.1#, 33.1#, 66.1#, 11.1# #)``.
``ghc`` library
~~~~~~~~~~~~~~~
=====================================
libraries/base/src/GHC/Base.hs
=====================================
@@ -147,6 +147,62 @@ import GHC.Prim hiding
dataToTagSmall#, dataToTagLarge#
-- whereFrom# is similarly internal.
, whereFrom#
+ -- Don't re-export vector FMA instructions
+ , fmaddFloatX4#
+ , fmsubFloatX4#
+ , fnmaddFloatX4#
+ , fnmsubFloatX4#
+ , fmaddFloatX8#
+ , fmsubFloatX8#
+ , fnmaddFloatX8#
+ , fnmsubFloatX8#
+ , fmaddFloatX16#
+ , fmsubFloatX16#
+ , fnmaddFloatX16#
+ , fnmsubFloatX16#
+ , fmaddDoubleX2#
+ , fmsubDoubleX2#
+ , fnmaddDoubleX2#
+ , fnmsubDoubleX2#
+ , fmaddDoubleX4#
+ , fmsubDoubleX4#
+ , fnmaddDoubleX4#
+ , fnmsubDoubleX4#
+ , fmaddDoubleX8#
+ , fmsubDoubleX8#
+ , fnmaddDoubleX8#
+ , fnmsubDoubleX8#
+ -- Don't re-export SIMD shuffle primops
+ , shuffleDoubleX2#
+ , shuffleDoubleX4#
+ , shuffleDoubleX8#
+ , shuffleFloatX16#
+ , shuffleFloatX4#
+ , shuffleFloatX8#
+ , shuffleInt16X16#
+ , shuffleInt16X32#
+ , shuffleInt16X8#
+ , shuffleInt32X16#
+ , shuffleInt32X4#
+ , shuffleInt32X8#
+ , shuffleInt64X2#
+ , shuffleInt64X4#
+ , shuffleInt64X8#
+ , shuffleInt8X16#
+ , shuffleInt8X32#
+ , shuffleInt8X64#
+ , shuffleWord16X16#
+ , shuffleWord16X32#
+ , shuffleWord16X8#
+ , shuffleWord32X16#
+ , shuffleWord32X4#
+ , shuffleWord32X8#
+ , shuffleWord64X2#
+ , shuffleWord64X4#
+ , shuffleWord64X8#
+ , shuffleWord8X16#
+ , shuffleWord8X32#
+ , shuffleWord8X64#
)
import GHC.Prim.Ext
=====================================
libraries/base/src/GHC/Exts.hs
=====================================
@@ -120,6 +120,62 @@ import GHC.Prim hiding
, dataToTagSmall#, dataToTagLarge#
-- whereFrom# is similarly internal.
, whereFrom#
+ -- Don't re-export vector FMA instructions
+ , fmaddFloatX4#
+ , fmsubFloatX4#
+ , fnmaddFloatX4#
+ , fnmsubFloatX4#
+ , fmaddFloatX8#
+ , fmsubFloatX8#
+ , fnmaddFloatX8#
+ , fnmsubFloatX8#
+ , fmaddFloatX16#
+ , fmsubFloatX16#
+ , fnmaddFloatX16#
+ , fnmsubFloatX16#
+ , fmaddDoubleX2#
+ , fmsubDoubleX2#
+ , fnmaddDoubleX2#
+ , fnmsubDoubleX2#
+ , fmaddDoubleX4#
+ , fmsubDoubleX4#
+ , fnmaddDoubleX4#
+ , fnmsubDoubleX4#
+ , fmaddDoubleX8#
+ , fmsubDoubleX8#
+ , fnmaddDoubleX8#
+ , fnmsubDoubleX8#
+ -- Don't re-export SIMD shuffle primops
+ , shuffleDoubleX2#
+ , shuffleDoubleX4#
+ , shuffleDoubleX8#
+ , shuffleFloatX16#
+ , shuffleFloatX4#
+ , shuffleFloatX8#
+ , shuffleInt16X16#
+ , shuffleInt16X32#
+ , shuffleInt16X8#
+ , shuffleInt32X16#
+ , shuffleInt32X4#
+ , shuffleInt32X8#
+ , shuffleInt64X2#
+ , shuffleInt64X4#
+ , shuffleInt64X8#
+ , shuffleInt8X16#
+ , shuffleInt8X32#
+ , shuffleInt8X64#
+ , shuffleWord16X16#
+ , shuffleWord16X32#
+ , shuffleWord16X8#
+ , shuffleWord32X16#
+ , shuffleWord32X4#
+ , shuffleWord32X8#
+ , shuffleWord64X2#
+ , shuffleWord64X4#
+ , shuffleWord64X8#
+ , shuffleWord8X16#
+ , shuffleWord8X32#
+ , shuffleWord8X64#
)
import GHC.Prim.Ext
=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -22,4 +22,5 @@ test('simd009', [ req_th
]
, multimod_compile_and_run, ['simd009', ''])
test('simd010', [], compile_and_run, [''])
+test('simd011', [when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))], compile_and_run, [''])
test('simd012', [], compile_and_run, [''])
=====================================
testsuite/tests/simd/should_run/simd006.hs
=====================================
@@ -123,17 +123,15 @@ instance Arbitrary Word32 where
newtype FloatNT = FloatNT Float
deriving newtype (Show, Num)
instance Eq FloatNT where
- FloatNT f1 == FloatNT f2 = f1 == f2
- -- TODO: tests fail with this equality due to signed zeros
- -- castFloatToWord32 f1 == castFloatToWord32 f2
+ FloatNT f1 == FloatNT f2 =
+ castFloatToWord32 f1 == castFloatToWord32 f2
instance Arbitrary FloatNT where
arbitrary = FloatNT . castWord32ToFloat <$> arbitrary
newtype DoubleNT = DoubleNT Double
deriving newtype (Show, Num)
instance Eq DoubleNT where
- DoubleNT d1 == DoubleNT d2 = d1 == d2
- -- TODO: tests fail with this equality due to signed zeros
- -- castDoubleToWord64 d1 == castDoubleToWord64 d2
+ DoubleNT d1 == DoubleNT d2 =
+ castDoubleToWord64 d1 == castDoubleToWord64 d2
instance Arbitrary DoubleNT where
arbitrary = DoubleNT . castWord64ToDouble <$> arbitrary
=====================================
testsuite/tests/simd/should_run/simd011.stdout
=====================================
@@ -0,0 +1,8 @@
+(1011.11,2044.4401,3099.99,4177.7603)
+(-988.89,-1955.5599,-2900.01,-3822.24)
+(988.89,1955.5599,2900.01,3822.24)
+(-1011.11,-2044.4401,-3099.99,-4177.7603)
+(1011.11,2044.44)
+(-988.89,-1955.56)
+(988.89,1955.56)
+(-1011.11,-2044.44)
=====================================
utils/genprimopcode/Lexer.x
=====================================
@@ -67,6 +67,7 @@ words :-
<0> "SCALAR" { mkT TSCALAR }
<0> "VECTOR" { mkT TVECTOR }
<0> "VECTUPLE" { mkT TVECTUPLE }
+ <0> "INTVECTUPLE" { mkT TINTVECTUPLE }
<0> [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
<0> [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
<0> \-? [0-9][0-9]* { mkTv (TInteger . read) }
=====================================
utils/genprimopcode/Main.hs
=====================================
@@ -79,6 +79,8 @@ desugarVectorSpec i = case vecOptions i of
desugarTy (TyApp SCALAR []) = TyApp (TyCon repCon) []
desugarTy (TyApp VECTOR []) = TyApp (VecTyCon vecCons vecTyName) []
desugarTy (TyApp VECTUPLE []) = TyUTup (replicate n (TyApp (TyCon repCon) []))
+ desugarTy (TyApp INTVECTUPLE [])
+ = TyUTup (replicate n (TyApp (TyCon "Int#") []) )
desugarTy (TyApp tycon ts) = TyApp tycon (map desugarTy ts)
desugarTy t@(TyVar {}) = t
desugarTy (TyUTup ts) = TyUTup (map desugarTy ts)
=====================================
utils/genprimopcode/Parser.y
=====================================
@@ -58,6 +58,7 @@ import AccessOps
SCALAR { TSCALAR }
VECTOR { TVECTOR }
VECTUPLE { TVECTUPLE }
+ INTVECTUPLE { TINTVECTUPLE }
bytearray_access_ops { TByteArrayAccessOps }
addr_access_ops { TAddrAccessOps }
thats_all_folks { TThatsAllFolks }
@@ -215,6 +216,7 @@ pTycon : upperName { TyCon $1 }
| SCALAR { SCALAR }
| VECTOR { VECTOR }
| VECTUPLE { VECTUPLE }
+ | INTVECTUPLE { INTVECTUPLE }
{
parse :: String -> Either String Info
=====================================
utils/genprimopcode/ParserM.hs
=====================================
@@ -124,6 +124,7 @@ data Token = TEOF
| TSCALAR
| TVECTOR
| TVECTUPLE
+ | TINTVECTUPLE
deriving Show
-- Actions
=====================================
utils/genprimopcode/Syntax.hs
=====================================
@@ -87,6 +87,7 @@ data TyCon = TyCon String
| SCALAR
| VECTOR
| VECTUPLE
+ | INTVECTUPLE
| VecTyCon String String
deriving (Eq, Ord)
@@ -95,6 +96,7 @@ instance Show TyCon where
show SCALAR = "SCALAR"
show VECTOR = "VECTOR"
show VECTUPLE = "VECTUPLE"
+ show INTVECTUPLE = "INTVECTUPLE"
show (VecTyCon tc _) = tc
-- Follow definitions of Fixity and FixityDirection in GHC
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e2b57729572cd502b27b91635e0f37376306d583...f47ef2d5f999e43af1dcc333142368d91ee5d3af
--
This project does not include diff previews in email notifications.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e2b57729572cd502b27b91635e0f37376306d583...f47ef2d5f999e43af1dcc333142368d91ee5d3af
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240703/93a6ee42/attachment-0001.html>
More information about the ghc-commits
mailing list