[Git][ghc/ghc][wip/ncg-simd] WIP: start adding vector shuffle primops

Sun Jun 9 12:18:57 UTC 2024


sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
d46d56d4 by sheaf at 2024-06-09T14:18:00+02:00
WIP: start adding vector shuffle primops

- - - - -


12 changed files:

- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToC.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- utils/genprimopcode/Lexer.x
- utils/genprimopcode/Main.hs
- utils/genprimopcode/Parser.y
- utils/genprimopcode/ParserM.hs
- utils/genprimopcode/Syntax.hs


Changes:

=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -4190,6 +4190,13 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
         llvm_only = True
         vector = ALL_VECTOR_TYPES
 
+primop VecShuffleOp "shuffle#" GenPrimOp
+  VECTOR -> VECTOR -> INTVECTUPLE -> VECTOR
+  { Shuffle elements of the concatenation of the input two vectors
+  into the result vector.}
+   with llvm_only = True
+        vector = ALL_VECTOR_TYPES
+
 ------------------------------------------------------------------------
 
 section "Prefetch"


=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -171,6 +171,10 @@ data MachOp
   | MO_VU_Quot Length Width
   | MO_VU_Rem  Length Width
 
+  -- Vector shuffles
+  | MO_V_Shuffle  Length Width [Int]
+  | MO_VF_Shuffle Length Width [Int]
+
   -- Floating point vector element insertion and extraction operations
   | MO_VF_Broadcast Length Width   -- Broadcast a scalar into a vector
   | MO_VF_Insert    Length Width   -- Insert scalar into vector
@@ -494,6 +498,9 @@ machOpResultType platform mop tys =
     MO_VU_Quot l w      -> cmmVec l (cmmBits w)
     MO_VU_Rem  l w      -> cmmVec l (cmmBits w)
 
+    MO_V_Shuffle  l w _ -> cmmVec l (cmmBits w)
+    MO_VF_Shuffle l w _ -> cmmVec l (cmmFloat w)
+
     MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
     MO_VF_Insert  l w   -> cmmVec l (cmmFloat w)
     MO_VF_Extract _ w   -> cmmFloat w
@@ -576,6 +583,9 @@ machOpArgReps platform op =
     MO_FS_Conv from _   -> [from]
     MO_FF_Conv from _   -> [from]
 
+    MO_V_Shuffle  l r _ -> [vecwidth l r, vecwidth l r]
+    MO_VF_Shuffle l r _ -> [vecwidth l r, vecwidth l r]
+
     MO_V_Insert   l r   -> [vecwidth l r, r, W32]
     MO_V_Extract  l r   -> [vecwidth l r, W32]
     MO_VF_Broadcast l r -> [vecwidth l r, r]


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1104,6 +1104,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
       MO_VS_Neg {}        -> needLlvm mop
       MO_VU_Quot {}       -> needLlvm mop
       MO_VU_Rem {}        -> needLlvm mop
+      MO_V_Shuffle {}  -> incorrectOperands
+      MO_VF_Shuffle {}  -> incorrectOperands
       MO_VF_Broadcast {}  -> incorrectOperands
       MO_VF_Insert {}     -> incorrectOperands
       MO_VF_Extract {}    -> incorrectOperands
@@ -1246,6 +1248,18 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       MO_U_Shr rep -> shift_code rep SHR x y {-False-}
       MO_S_Shr rep -> shift_code rep SAR x y {-False-}
 
+      MO_V_Shuffle l w is
+        | avx
+        -> vector_shuffle_int l w x y is
+        | otherwise
+        -> sorry "Please enable the -mavx flag"
+
+      MO_VF_Shuffle l w is
+        | avx
+        -> vector_shuffle_float l w x y is
+        | otherwise
+        -> sorry "Please enable the -mavx flag"
+
       MO_VF_Broadcast l W32 | avx       -> vector_float_broadcast_avx l W32 x y
                             | sse4_1    -> vector_float_broadcast_sse l W32 x y
                             | otherwise
@@ -1635,6 +1649,22 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
     vector_float_broadcast_sse _ _ c _
       = pprPanic "Broadcast not supported for : " (pdoc platform c)
 
+    vector_shuffle_int :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
+    vector_shuffle_int = undefined
+
+    vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
+    vector_shuffle_float l w v1 v2 is = do
+      (r1, exp1) <- getSomeReg v1
+      (r2, exp2) <- getSomeReg v2
+      let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble) w
+          code dst
+            = exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst) --VSHUFPD format imm (OpReg r1) r2)
+      return (Any fmt code)
+
+    shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
+    shuffleInstructions _fmt _v1 _v2 _is _dst =
+      error "SIMD NCG TODO: lower to shuffle instructions (e.g. VSHUFPD)"
+
 getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
   sse4_1 <- sse4_1Enabled
   sse2   <- sse2Enabled


=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -800,6 +800,9 @@ pprMachOp_for_C platform mop = case mop of
         MO_SF_Conv _from to -> parens (machRep_F_CType to)
         MO_FS_Conv _from to -> parens (machRep_S_CType platform to)
 
+        MO_V_Shuffle {} -> text "__builtin_shufflevector"
+        MO_VF_Shuffle {} -> text "__builtin_shufflevector"
+
         MO_RelaxedRead _ -> pprTrace "offending mop:"
                                 (text "MO_RelaxedRead")
                                 (panic $ "PprC.pprMachOp_for_C: MO_S_MulMayOflo"


=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1524,6 +1524,9 @@ genMachOp _ op [x] = case op of
     MO_VF_Insert  _ _ -> panicOp
     MO_VF_Extract _ _ -> panicOp
 
+    MO_V_Shuffle {} -> panicOp
+    MO_VF_Shuffle {} -> panicOp
+
     MO_VF_Add     _ _ -> panicOp
     MO_VF_Sub     _ _ -> panicOp
     MO_VF_Mul     _ _ -> panicOp
@@ -1719,6 +1722,9 @@ genMachOp_slow opt op [x, y] = case op of
     MO_VF_Broadcast  {} -> panicOp
     MO_VF_Insert  {} -> panicOp
 
+    MO_V_Shuffle _ _ is -> genShuffleOp is x y
+    MO_VF_Shuffle _ _ is -> genShuffleOp is x y
+
     MO_VF_Neg {} -> panicOp
 
     MO_RelaxedRead {} -> panicOp
@@ -1831,6 +1837,21 @@ genMachOp_slow _opt op [x, y, z] = do
 -- More than three expressions, invalid!
 genMachOp_slow _ _ _ = panic "genMachOp_slow: More than 3 expressions in MachOp!"
 
+genShuffleOp :: [Int] -> CmmExpr -> CmmExpr -> LlvmM ExprData
+genShuffleOp is x y = runExprData $ do
+  vx <- exprToVarW x
+  vy <- exprToVarW y
+  let tx = getVarType vx
+      ty = getVarType vy
+  Panic.massertPpr
+    (tx == ty)
+    (vcat [ text "shuffle: mismatched arg types"
+          , ppLlvmType tx, ppLlvmType ty ])
+  let fname = fsLit "__builtin_shufflevector"
+  error "SIMD NCG TODO: generate a call to __builtin_shufflevector"
+  --fptr <- liftExprData $ getInstrinct fname ty [tx, ty]
+  --doExprW tx $ Call StdCall fptr (vx: vy: map ?? is) [ReadNone, NoUnwind]
+
 -- | Generate code for a fused multiply-add operation.
 genFmaOp :: CmmExpr -> CmmExpr -> CmmExpr -> LlvmM ExprData
 genFmaOp x y z = runExprData $ do


=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
 
 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
 
@@ -1092,6 +1093,13 @@ emitPrimOp cfg primop =
     ty :: CmmType
     ty = vecCmmCat vcat w
 
+  VecShuffleOp vcat n w -> \ args -> opIntoRegs $ \ [res] -> do
+    checkVecCompatibility cfg vcat n w
+    doShuffleOp ty args res
+   where
+    ty :: CmmType
+    ty = vecCmmCat vcat w
+
 -- Prefetch
   PrefetchByteArrayOp3         -> \args -> opIntoRegs $ \[] ->
     doPrefetchByteArrayOp 3  args
@@ -2587,6 +2595,36 @@ doVecInsertOp ty src e idx res = do
     wid :: Width
     wid = typeWidth (vecElemType ty)
 
+------------------------------------------------------------------------------
+-- Shuffles
+
+doShuffleOp :: CmmType -> [CmmExpr] -> LocalReg -> FCode ()
+doShuffleOp ty (v1:v2:idxs) res
+  | isVecType ty
+  = case mapMaybe idx_maybe idxs of
+      is
+        | length is == len
+        -> emitAssign (CmmLocal res) (CmmMachOp (mo is) [v1,v2])
+        | otherwise
+        -> pprPanic "doShuffleOp" $
+             vcat [ text "shuffle indices must be literals, 0 <= i <" <+> ppr len ]
+  | otherwise
+  = pprPanic "doShuffleOp" $
+        vcat [ text "non-vector argument type:" <+> ppr ty ]
+  where
+    len = vecLength ty
+    wid = typeWidth $ vecElemType ty
+    mo = if isFloatType (vecElemType ty)
+         then MO_VF_Shuffle len wid
+         else MO_V_Shuffle  len wid
+    idx_maybe (CmmLit (CmmInt i _))
+      | let j :: Int; j = fromInteger i
+      , j >= 0, j < 2 * len
+      = Just j
+    idx_maybe _ = Nothing
+doShuffleOp _ _ _ =
+  panic "doShuffleOp: wrong number of arguments"
+
 ------------------------------------------------------------------------------
 -- Helpers for translating prefetching.
 


=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -1197,6 +1197,7 @@ genPrim prof bound ty op = case op of
   VecIndexScalarOffAddrOp _ _ _     -> unhandledPrimop op
   VecReadScalarOffAddrOp _ _ _      -> unhandledPrimop op
   VecWriteScalarOffAddrOp _ _ _     -> unhandledPrimop op
+  VecShuffleOp _ _ _                -> unhandledPrimop op
 
   PrefetchByteArrayOp3              -> noOp
   PrefetchMutableByteArrayOp3       -> noOp


=====================================
utils/genprimopcode/Lexer.x
=====================================
@@ -67,6 +67,7 @@ words :-
     <0>         "SCALAR"            { mkT TSCALAR }
     <0>         "VECTOR"            { mkT TVECTOR }
     <0>         "VECTUPLE"          { mkT TVECTUPLE }
+    <0>         "INTVECTUPLE"       { mkT TINTVECTUPLE }
     <0>         [a-z][a-zA-Z0-9\#_]* { mkTv TLowerName }
     <0>         [A-Z][a-zA-Z0-9\#_]* { mkTv TUpperName }
     <0>         \-? [0-9][0-9]*     { mkTv (TInteger . read) }


=====================================
utils/genprimopcode/Main.hs
=====================================
@@ -79,6 +79,8 @@ desugarVectorSpec i              = case vecOptions i of
         desugarTy (TyApp SCALAR [])   = TyApp (TyCon repCon) []
         desugarTy (TyApp VECTOR [])   = TyApp (VecTyCon vecCons vecTyName) []
         desugarTy (TyApp VECTUPLE []) = TyUTup (replicate n (TyApp (TyCon repCon) []))
+        desugarTy (TyApp INTVECTUPLE [])
+                                      = TyUTup (replicate n (TyApp (TyCon "Int#") []) )
         desugarTy (TyApp tycon ts)    = TyApp tycon (map desugarTy ts)
         desugarTy t@(TyVar {})        = t
         desugarTy (TyUTup ts)         = TyUTup (map desugarTy ts)


=====================================
utils/genprimopcode/Parser.y
=====================================
@@ -58,6 +58,7 @@ import AccessOps
     SCALAR          { TSCALAR }
     VECTOR          { TVECTOR }
     VECTUPLE        { TVECTUPLE }
+    INTVECTUPLE     { TINTVECTUPLE }
     bytearray_access_ops { TByteArrayAccessOps }
     addr_access_ops { TAddrAccessOps }
     thats_all_folks { TThatsAllFolks }
@@ -215,6 +216,7 @@ pTycon : upperName { TyCon $1 }
        | SCALAR       { SCALAR }
        | VECTOR       { VECTOR }
        | VECTUPLE     { VECTUPLE }
+       | INTVECTUPLE  { INTVECTUPLE }
 
 {
 parse :: String -> Either String Info


=====================================
utils/genprimopcode/ParserM.hs
=====================================
@@ -124,6 +124,7 @@ data Token = TEOF
            | TSCALAR
            | TVECTOR
            | TVECTUPLE
+           | TINTVECTUPLE
     deriving Show
 
 -- Actions


=====================================
utils/genprimopcode/Syntax.hs
=====================================
@@ -87,6 +87,7 @@ data TyCon = TyCon String
            | SCALAR
            | VECTOR
            | VECTUPLE
+           | INTVECTUPLE
            | VecTyCon String String
   deriving (Eq, Ord)
 
@@ -95,6 +96,7 @@ instance Show TyCon where
     show SCALAR          = "SCALAR"
     show VECTOR          = "VECTOR"
     show VECTUPLE        = "VECTUPLE"
+    show INTVECTUPLE     = "INTVECTUPLE"
     show (VecTyCon tc _) = tc
 
 -- Follow definitions of Fixity and FixityDirection in GHC



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d46d56d4f417517eb1245c45c338378f59bceb4f

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d46d56d4f417517eb1245c45c338378f59bceb4f
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240609/992b86da/attachment-0001.html>