[Git][ghc/ghc][wip/ncg-simd] WIP: start adding vector shuffle primops
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Sun Jun 9 12:18:57 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
d46d56d4 by sheaf at 2024-06-09T14:18:00+02:00
WIP: start adding vector shuffle primops
- - - - -
12 changed files:
- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- utils/genprimopcode/Lexer.x
- utils/genprimopcode/Main.hs
- utils/genprimopcode/Parser.y
- utils/genprimopcode/ParserM.hs
- utils/genprimopcode/Syntax.hs
Changes:
=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -4190,6 +4190,13 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
llvm_only = True
vector = ALL_VECTOR_TYPES
+primop VecShuffleOp "shuffle#" GenPrimOp
+ VECTOR -> VECTOR -> INTVECTUPLE -> VECTOR
+ { Shuffle elements of the concatenation of the input two vectors
+ into the result vector.}
+ with llvm_only = True
+ vector = ALL_VECTOR_TYPES
+
------------------------------------------------------------------------
section "Prefetch"
=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -171,6 +171,10 @@ data MachOp
| MO_VU_Quot Length Width
| MO_VU_Rem Length Width
+ -- Vector shuffles
+ | MO_V_Shuffle Length Width [Int]
+ | MO_VF_Shuffle Length Width [Int]
+
-- Floating point vector element insertion and extraction operations
| MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector
| MO_VF_Insert Length Width -- Insert scalar into vector
@@ -494,6 +498,9 @@ machOpResultType platform mop tys =
MO_VU_Quot l w -> cmmVec l (cmmBits w)
MO_VU_Rem l w -> cmmVec l (cmmBits w)
+ MO_V_Shuffle l w _ -> cmmVec l (cmmBits w)
+ MO_VF_Shuffle l w _ -> cmmVec l (cmmFloat w)
+
MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
@@ -576,6 +583,9 @@ machOpArgReps platform op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
+ MO_V_Shuffle l r _ -> [vecwidth l r, vecwidth l r]
+ MO_VF_Shuffle l r _ -> [vecwidth l r, vecwidth l r]
+
MO_V_Insert l r -> [vecwidth l r, r, W32]
MO_V_Extract l r -> [vecwidth l r, W32]
MO_VF_Broadcast l r -> [vecwidth l r, r]
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1104,6 +1104,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Neg {} -> needLlvm mop
MO_VU_Quot {} -> needLlvm mop
MO_VU_Rem {} -> needLlvm mop
+ MO_V_Shuffle {} -> incorrectOperands
+ MO_VF_Shuffle {} -> incorrectOperands
MO_VF_Broadcast {} -> incorrectOperands
MO_VF_Insert {} -> incorrectOperands
MO_VF_Extract {} -> incorrectOperands
@@ -1246,6 +1248,18 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
+ MO_V_Shuffle l w is
+ | avx
+ -> vector_shuffle_int l w x y is
+ | otherwise
+ -> sorry "Please enable the -mavx flag"
+
+ MO_VF_Shuffle l w is
+ | avx
+ -> vector_shuffle_float l w x y is
+ | otherwise
+ -> sorry "Please enable the -mavx flag"
+
MO_VF_Broadcast l W32 | avx -> vector_float_broadcast_avx l W32 x y
| sse4_1 -> vector_float_broadcast_sse l W32 x y
| otherwise
@@ -1635,6 +1649,22 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_float_broadcast_sse _ _ c _
= pprPanic "Broadcast not supported for : " (pdoc platform c)
+ vector_shuffle_int :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
+ vector_shuffle_int = undefined
+
+ vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
+ vector_shuffle_float l w v1 v2 is = do
+ (r1, exp1) <- getSomeReg v1
+ (r2, exp2) <- getSomeReg v2
+ let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble) w
+ code dst
+ = exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst) --VSHUFPD format imm (OpReg r1) r2)
+ return (Any fmt code)
+
+ shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
+ shuffleInstructions _fmt _v1 _v2 _is _dst =
+ error "SIMD NCG TODO: lower to shuffle instructions (e.g. VSHUFPD)"
+
getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
sse4_1 <- sse4_1Enabled
sse2 <- sse2Enabled
=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -800,6 +800,9 @@ pprMachOp_for_C platform mop = case mop of
MO_SF_Conv _from to -> parens (machRep_F_CType to)
MO_FS_Conv _from to -> parens (machRep_S_CType platform to)
+ MO_V_Shuffle {} -> text "__builtin_shufflevector"
+ MO_VF_Shuffle {} -> text "__builtin_shufflevector"
+
MO_RelaxedRead _ -> pprTrace "offending mop:"
(text "MO_RelaxedRead")
(panic $ "PprC.pprMachOp_for_C: MO_S_MulMayOflo"
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1524,6 +1524,9 @@ genMachOp _ op [x] = case op of
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
+ MO_V_Shuffle {} -> panicOp
+ MO_VF_Shuffle {} -> panicOp
+
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
MO_VF_Mul _ _ -> panicOp
@@ -1719,6 +1722,9 @@ genMachOp_slow opt op [x, y] = case op of
MO_VF_Broadcast {} -> panicOp
MO_VF_Insert {} -> panicOp
+ MO_V_Shuffle _ _ is -> genShuffleOp is x y
+ MO_VF_Shuffle _ _ is -> genShuffleOp is x y
+
MO_VF_Neg {} -> panicOp
MO_RelaxedRead {} -> panicOp
@@ -1831,6 +1837,21 @@ genMachOp_slow _opt op [x, y, z] = do
-- More than three expressions, invalid!
genMachOp_slow _ _ _ = panic "genMachOp_slow: More than 3 expressions in MachOp!"
+genShuffleOp :: [Int] -> CmmExpr -> CmmExpr -> LlvmM ExprData
+genShuffleOp is x y = runExprData $ do
+ vx <- exprToVarW x
+ vy <- exprToVarW y
+ let tx = getVarType vx
+ ty = getVarType vy
+ Panic.massertPpr
+ (tx == ty)
+ (vcat [ text "shuffle: mismatched arg types"
+ , ppLlvmType tx, ppLlvmType ty ])
+ let fname = fsLit "__builtin_shufflevector"
+ error "SIMD NCG TODO: generate a call to __builtin_shufflevector"
+ --fptr <- liftExprData $ getInstrinct fname ty [tx, ty]
+ --doExprW tx $ Call StdCall fptr (vx: vy: map ?? is) [ReadNone, NoUnwind]
+
-- | Generate code for a fused multiply-add operation.
genFmaOp :: CmmExpr -> CmmExpr -> CmmExpr -> LlvmM ExprData
genFmaOp x y z = runExprData $ do
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1,4 +1,5 @@
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
@@ -1092,6 +1093,13 @@ emitPrimOp cfg primop =
ty :: CmmType
ty = vecCmmCat vcat w
+ VecShuffleOp vcat n w -> \ args -> opIntoRegs $ \ [res] -> do
+ checkVecCompatibility cfg vcat n w
+ doShuffleOp ty args res
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
-- Prefetch
PrefetchByteArrayOp3 -> \args -> opIntoRegs $ \[] ->
doPrefetchByteArrayOp 3 args
@@ -2587,6 +2595,36 @@ doVecInsertOp ty src e idx res = do
wid :: Width
wid = typeWidth (vecElemType ty)
+------------------------------------------------------------------------------
+-- Shuffles
+
+doShuffleOp :: CmmType -> [CmmExpr] -> LocalReg -> FCode ()
+doShuffleOp ty (v1:v2:idxs) res
+ | isVecType ty
+ = case mapMaybe idx_maybe idxs of
+ is
+ | length is == len
+ -> emitAssign (CmmLocal res) (CmmMachOp (mo is) [v1,v2])
+ | otherwise
+ -> pprPanic "doShuffleOp" $
+ vcat [ text "shuffle indices must be literals, 0 <= i <" <+> ppr len ]
+ | otherwise
+ = pprPanic "doShuffleOp" $
+ vcat [ text "non-vector argument type:" <+> ppr ty ]
+ where
+ len = vecLength ty
+ wid = typeWidth $ vecElemType ty
+ mo = if isFloatType (vecElemType ty)
+ then MO_VF_Shuffle len wid
+ else MO_V_Shuffle len wid
+ idx_maybe (CmmLit (CmmInt i _))
+ | let j :: Int; j = fromInteger i
+ , j >= 0, j < 2 * len
+ = Just j
+ idx_maybe _ = Nothing
+doShuffleOp _ _ _ =
+ panic "doShuffleOp: wrong number of arguments"
+
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -1197,6 +1197,7 @@ genPrim prof bound ty op = case op of
VecIndexScalarOffAddrOp _ _ _ -> unhandledPrimop op
VecReadScalarOffAddrOp _ _ _ -> unhandledPrimop op
VecWriteScalarOffAddrOp _ _ _ -> unhandledPrimop op
+ VecShuffleOp _ _ _ -> unhandledPrimop op
PrefetchByteArrayOp3 -> noOp
PrefetchMutableByteArrayOp3 -> noOp
=====================================
utils/genprimopcode/Lexer.x
=====================================
@@ -67,6 +67,7 @@ words :-
<0> "SCALAR" { mkT TSCALAR }
<0> "VECTOR" { mkT TVECTOR }
<0> "VECTUPLE" { mkT TVECTUPLE }
+ <0> "INTVECTUPLE" { mkT TINTVECTUPLE }
<0> [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
<0> [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
<0> \-? [0-9][0-9]* { mkTv (TInteger . read) }
=====================================
utils/genprimopcode/Main.hs
=====================================
@@ -79,6 +79,8 @@ desugarVectorSpec i = case vecOptions i of
desugarTy (TyApp SCALAR []) = TyApp (TyCon repCon) []
desugarTy (TyApp VECTOR []) = TyApp (VecTyCon vecCons vecTyName) []
desugarTy (TyApp VECTUPLE []) = TyUTup (replicate n (TyApp (TyCon repCon) []))
+ desugarTy (TyApp INTVECTUPLE [])
+ = TyUTup (replicate n (TyApp (TyCon "Int#") []) )
desugarTy (TyApp tycon ts) = TyApp tycon (map desugarTy ts)
desugarTy t@(TyVar {}) = t
desugarTy (TyUTup ts) = TyUTup (map desugarTy ts)
=====================================
utils/genprimopcode/Parser.y
=====================================
@@ -58,6 +58,7 @@ import AccessOps
SCALAR { TSCALAR }
VECTOR { TVECTOR }
VECTUPLE { TVECTUPLE }
+ INTVECTUPLE { TINTVECTUPLE }
bytearray_access_ops { TByteArrayAccessOps }
addr_access_ops { TAddrAccessOps }
thats_all_folks { TThatsAllFolks }
@@ -215,6 +216,7 @@ pTycon : upperName { TyCon $1 }
| SCALAR { SCALAR }
| VECTOR { VECTOR }
| VECTUPLE { VECTUPLE }
+ | INTVECTUPLE { INTVECTUPLE }
{
parse :: String -> Either String Info
=====================================
utils/genprimopcode/ParserM.hs
=====================================
@@ -124,6 +124,7 @@ data Token = TEOF
| TSCALAR
| TVECTOR
| TVECTUPLE
+ | TINTVECTUPLE
deriving Show
-- Actions
=====================================
utils/genprimopcode/Syntax.hs
=====================================
@@ -87,6 +87,7 @@ data TyCon = TyCon String
| SCALAR
| VECTOR
| VECTUPLE
+ | INTVECTUPLE
| VecTyCon String String
deriving (Eq, Ord)
@@ -95,6 +96,7 @@ instance Show TyCon where
show SCALAR = "SCALAR"
show VECTOR = "VECTOR"
show VECTUPLE = "VECTUPLE"
+ show INTVECTUPLE = "INTVECTUPLE"
show (VecTyCon tc _) = tc
-- Follow definitions of Fixity and FixityDirection in GHC
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d46d56d4f417517eb1245c45c338378f59bceb4f
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d46d56d4f417517eb1245c45c338378f59bceb4f
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240609/992b86da/attachment-0001.html>
More information about the ghc-commits
mailing list