[Git][ghc/ghc][wip/ncg-simd] WIP: lower vector shuffle instruction on X86
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Mon Jun 10 16:09:53 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
be86b5a0 by sheaf at 2024-06-10T18:09:34+02:00
WIP: lower vector shuffle instruction on X86
- - - - -
6 changed files:
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/StgToCmm/Prim.hs
- testsuite/tests/codeGen/should_run/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1659,12 +1659,75 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(r2, exp2) <- getSomeReg v2
let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble) w
code dst
- = exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst) --VSHUFPD format imm (OpReg r1) r2)
+ = exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst)
return (Any fmt code)
shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
- shuffleInstructions _fmt _v1 _v2 _is _dst =
- error "SIMD NCG TODO: lower to shuffle instructions (e.g. VSHUFPD)"
+ shuffleInstructions fmt v1 v2 is dst =
+ case fmt of
+ VecFormat 2 FmtDouble _ ->
+ case is of
+ [i1, i2] -> case (i1, i2) of
+ (i, j) | i == j
+ -> let v = if i < 2 then v1 else v2
+ mov = if i == 0 || i == 2 then MOVL else MOVH
+ in unitOL (mov fmt (OpReg v) (OpReg dst)) `snocOL`
+ SHUFPD fmt (ImmInt 0b00) (OpReg dst) dst
+ (0,1) -> unitOL (MOVU fmt (OpReg v1) (OpReg dst))
+ (2,3) -> unitOL (MOVU fmt (OpReg v2) (OpReg dst))
+ (1,0) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v1 dst)
+ (3,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v2 dst)
+ (0,2) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v2) v1 dst)
+ (2,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v2 dst)
+ (0,3) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v2) v1 dst)
+ (3,0) -> unitOL (VSHUFPD fmt (ImmInt 0b10) (OpReg v1) v2 dst)
+ (1,2) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v2) v1 dst)
+ (2,1) -> unitOL (VSHUFPD fmt (ImmInt 0b01) (OpReg v1) v2 dst)
+ (1,3) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v2) v1 dst)
+ (3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
+ _ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
+ _ -> pprPanic "vector shuffle: wrong number of indices (expected 2)" (ppr is)
+ VecFormat 4 FmtFloat _ ->
+ case is of
+ -- indices 0 <= i <= 7
+ [i1, i2, i3, i4]
+ | all ( <= 3 ) is
+ , let imm = i1 + i2 `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v1 dst)
+ | all ( >= 4 ) is
+ , let [j1, j2, j3, j4] = map ( subtract 4 ) is
+ imm = j1 + j2 `shiftL` 2 + j3 `shiftL` 4 + j4 `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v2 dst)
+ | i1 <= 3, i2 <= 3
+ , i3 >= 4, i4 >= 4
+ , let imm = i1 + i2 `shiftL` 2 + (i3 - 4) `shiftL` 4 + (i4 - 4) `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v1) v2 dst)
+ | i1 >= 4, i2 >= 4
+ , i3 <= 3, i4 <= 3
+ , let imm = i3 + i4 `shiftL` 2 + (i1 - 4) `shiftL` 4 + (i2 - 4) `shiftL` 6
+ -> unitOL (VSHUFPS fmt (ImmInt imm) (OpReg v2) v1 dst)
+ | otherwise
+ ->
+ -- Fall-back code with 4 INSERTPS operations.
+ -- SIMD NCG TODO: handle more cases with better lowering.
+ let -- bits: ss_dd_zzzz
+ -- ss: pick source location
+ -- dd: pick destination location
+ -- zzzz: pick locations to be zeroed
+ insertImm src dst = shiftL ( src `mod` 4 ) 6
+ .|. shiftL dst 4
+ vec src = if src >= 4 then v2 else v1
+ in unitOL
+ (INSERTPS fmt (OpImm $ ImmInt $ insertImm i1 0 .|. 0b1110) (OpReg $ vec i1) dst)
+ `snocOL`
+ (INSERTPS fmt (OpImm $ ImmInt $ insertImm i2 1) (OpReg $ vec i2) dst)
+ `snocOL`
+ (INSERTPS fmt (OpImm $ ImmInt $ insertImm i3 2) (OpReg $ vec i3) dst)
+ `snocOL`
+ (INSERTPS fmt (OpImm $ ImmInt $ insertImm i4 3) (OpReg $ vec i4) dst)
+ _ -> pprPanic "vector shuffle: wrong number of indices (expected 4)" (ppr is)
+ _ ->
+ pprPanic "vector shuffle: unsupported format" (ppr fmt)
getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
sse4_1 <- sse4_1Enabled
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -378,9 +378,9 @@ data Instr
| VPSHUFD Format Imm Operand Reg
| PSHUFD Format Imm Operand Reg
| SHUFPS Format Imm Operand Reg
- | VSHUFPS Format Imm Operand Reg
+ | VSHUFPS Format Imm Operand Reg Reg
| SHUFPD Format Imm Operand Reg
- | VSHUFPD Format Imm Operand Reg
+ | VSHUFPD Format Imm Operand Reg Reg
-- SIMD NCG TODO: don't store the Format (or only what we need)
-- in order to emit these instructions.
@@ -522,10 +522,10 @@ regUsageOfInstr platform instr
-> mkRU fmt (use_R src [dst]) [dst]
SHUFPS fmt _off src dst
-> mkRU fmt (use_R src [dst]) [dst]
- VSHUFPD fmt _off src dst
- -> mkRU fmt (use_R src [dst]) [dst]
- VSHUFPS fmt _off src dst
- -> mkRU fmt (use_R src [dst]) [dst]
+ VSHUFPD fmt _off src1 src2 dst
+ -> mkRU fmt (use_R src1 [src2]) [dst]
+ VSHUFPS fmt _off src1 src2 dst
+ -> mkRU fmt (use_R src1 [src2]) [dst]
PSLLDQ fmt off dst -> mkRU fmt (use_R off []) [dst]
@@ -729,10 +729,10 @@ patchRegsOfInstr instr env
-> SHUFPS fmt off (patchOp src) (env dst)
SHUFPD fmt off src dst
-> SHUFPD fmt off (patchOp src) (env dst)
- VSHUFPS fmt off src dst
- -> VSHUFPS fmt off (patchOp src) (env dst)
- VSHUFPD fmt off src dst
- -> VSHUFPD fmt off (patchOp src) (env dst)
+ VSHUFPS fmt off src1 src2 dst
+ -> VSHUFPS fmt off (patchOp src1) (env src2) (env dst)
+ VSHUFPD fmt off src1 src2 dst
+ -> VSHUFPD fmt off (patchOp src1) (env src2) (env dst)
PSLLDQ fmt off dst
-> PSLLDQ fmt (patchOp off) (env dst)
=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -973,10 +973,10 @@ pprInstr platform i = case i of
-> pprShuf (text "shufps") format offset src dst
SHUFPD format offset src dst
-> pprShuf (text "shufpd") format offset src dst
- VSHUFPS format offset src dst
- -> pprShuf (text "vshufps") format offset src dst
- VSHUFPD format offset src dst
- -> pprShuf (text "vshufpd") format offset src dst
+ VSHUFPS format offset src1 src2 dst
+ -> pprVShuf (text "vshufps") format offset src1 src2 dst
+ VSHUFPD format offset src1 src2 dst
+ -> pprVShuf (text "vshufpd") format offset src1 src2 dst
PSLLDQ format offset dst
-> pprShiftLeft (text "pslldq") format offset dst
PSRLDQ format offset dst
@@ -1248,6 +1248,19 @@ pprInstr platform i = case i of
pprReg platform format reg3
]
+ pprVShuf :: Line doc -> Format -> Imm -> Operand -> Reg -> Reg -> doc
+ pprVShuf name format imm1 op2 reg3 reg4
+ = line $ hcat [
+ pprGenMnemonic name format,
+ pprDollImm imm1,
+ comma,
+ pprOperand platform format op2,
+ comma,
+ pprReg platform format reg3,
+ comma,
+ pprReg platform format reg4
+ ]
+
pprShiftLeft :: Line doc -> Format -> Operand -> Reg -> doc
pprShiftLeft name format off reg
= line $ hcat [
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1841,16 +1841,16 @@ genShuffleOp :: [Int] -> CmmExpr -> CmmExpr -> LlvmM ExprData
genShuffleOp is x y = runExprData $ do
vx <- exprToVarW x
vy <- exprToVarW y
+ mask <- exprToVarW $ CmmLit $ CmmVec $ map ((`CmmInt` W32) . fromIntegral) is
let tx = getVarType vx
ty = getVarType vy
Panic.massertPpr
(tx == ty)
(vcat [ text "shuffle: mismatched arg types"
, ppLlvmType tx, ppLlvmType ty ])
- let fname = fsLit "__builtin_shufflevector"
- error "SIMD NCG TODO: generate a call to __builtin_shufflevector"
- --fptr <- liftExprData $ getInstrinct fname ty [tx, ty]
- --doExprW tx $ Call StdCall fptr (vx: vy: map ?? is) [ReadNone, NoUnwind]
+ let fname = fsLit "shufflevector"
+ fptr <- liftExprData $ getInstrinct fname ty [tx, ty]
+ doExprW tx $ Call StdCall fptr [vx, vy, mask] [ReadNone, NoUnwind]
-- | Generate code for a fused multiply-add operation.
genFmaOp :: CmmExpr -> CmmExpr -> CmmExpr -> LlvmM ExprData
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1095,10 +1095,7 @@ emitPrimOp cfg primop =
VecShuffleOp vcat n w -> \ args -> opIntoRegs $ \ [res] -> do
checkVecCompatibility cfg vcat n w
- doShuffleOp ty args res
- where
- ty :: CmmType
- ty = vecCmmCat vcat w
+ doShuffleOp (vecVmmType vcat n w) args res
-- Prefetch
PrefetchByteArrayOp3 -> \args -> opIntoRegs $ \[] ->
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -101,6 +101,8 @@ test('simd004', when(unregisterised(), skip), compile_and_run, [''])
test('simd005', when(unregisterised(), skip), compile_and_run, [''])
test('simd006', when(unregisterised(), skip), compile_and_run, [''])
test('simd007', when(unregisterised(), skip), compile_and_run, [''])
+test('simd009', [when(unregisterised(), skip), extra_files(['Simd009b.hs', 'Simd009c.hs'])]
+ ,multimod_compile_and_run, ['simd009', ''])
test('T1852', normal, compile_and_run, [''])
test('T1861', extra_run_opts('0'), compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/be86b5a0e500a8654bde98ef347d14b69a27cca1
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/be86b5a0e500a8654bde98ef347d14b69a27cca1
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240610/29adc99f/attachment-0001.html>
More information about the ghc-commits
mailing list