[Git][ghc/ghc][wip/ncg-simd] 3 commits: TODO: MOV stuff
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Thu Jun 13 15:10:13 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
64611d0d by sheaf at 2024-06-13T13:38:09+02:00
TODO: MOV stuff
- - - - -
f5032fc9 by sheaf at 2024-06-13T17:09:40+02:00
X86 NCG SIMD: refactoring
- - - - -
68d896f4 by sheaf at 2024-06-13T17:09:54+02:00
SIMD tests: fixup
- - - - -
11 changed files:
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/PPC/Ppr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/Platform/Reg/Class.hs
- compiler/GHC/StgToCmm/Prim.hs
- testsuite/tests/simd/should_run/Simd009c.hs
- testsuite/tests/simd/should_run/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -23,6 +23,7 @@ module GHC.CmmToAsm.Format (
cmmTypeFormat,
formatToWidth,
formatInBytes,
+ scalarWidth,
isIntScalarFormat,
)
@@ -79,7 +80,6 @@ data Format
| FF64
| VecFormat !Length -- ^ number of elements
!ScalarFormat -- ^ format of each element
- !Width -- ^ size of each element
deriving (Show, Eq, Ord)
pattern IntegerFormat :: Format
@@ -98,13 +98,14 @@ isIntegerFormat = \case
instance Outputable Format where
ppr fmt = text (show fmt)
-data ScalarFormat = FmtInt8
- | FmtInt16
- | FmtInt32
- | FmtInt64
- | FmtFloat
- | FmtDouble
- deriving (Show, Eq, Ord)
+data ScalarFormat
+ = FmtInt8
+ | FmtInt16
+ | FmtInt32
+ | FmtInt64
+ | FmtFloat
+ | FmtDouble
+ deriving (Show, Eq, Ord)
isIntScalarFormat :: ScalarFormat -> Bool
isIntScalarFormat FmtInt8 = True
@@ -165,27 +166,37 @@ vecFormat ty =
elemTy = vecElemType ty
in if isFloatType elemTy
then case typeWidth elemTy of
- W32 -> VecFormat l FmtFloat W32
- W64 -> VecFormat l FmtDouble W64
+ W32 -> VecFormat l FmtFloat
+ W64 -> VecFormat l FmtDouble
_ -> pprPanic "Incorrect vector element width" (ppr elemTy)
else case typeWidth elemTy of
- W8 -> VecFormat l FmtInt8 W8
- W16 -> VecFormat l FmtInt16 W16
- W32 -> VecFormat l FmtInt32 W32
- W64 -> VecFormat l FmtInt64 W64
+ W8 -> VecFormat l FmtInt8
+ W16 -> VecFormat l FmtInt16
+ W32 -> VecFormat l FmtInt32
+ W64 -> VecFormat l FmtInt64
_ -> pprPanic "Incorrect vector element width" (ppr elemTy)
-- | Get the Width of a Format.
formatToWidth :: Format -> Width
formatToWidth format
= case format of
- II8 -> W8
- II16 -> W16
- II32 -> W32
- II64 -> W64
- FF32 -> W32
- FF64 -> W64
- VecFormat l _ w -> widthFromBytes (l*widthInBytes w)
+ II8 -> W8
+ II16 -> W16
+ II32 -> W32
+ II64 -> W64
+ FF32 -> W32
+ FF64 -> W64
+ VecFormat l s ->
+ widthFromBytes (l * widthInBytes (scalarWidth s))
+
+scalarWidth :: ScalarFormat -> Width
+scalarWidth = \case
+ FmtInt8 -> W8
+ FmtInt16 -> W16
+ FmtInt32 -> W32
+ FmtInt64 -> W64
+ FmtFloat -> W32
+ FmtDouble -> W64
formatInBytes :: Format -> Int
formatInBytes = widthInBytes . formatToWidth
=====================================
compiler/GHC/CmmToAsm/PPC/Ppr.hs
=====================================
@@ -221,7 +221,7 @@ pprFormat x
II64 -> text "d"
FF32 -> text "fs"
FF64 -> text "fd"
- VecFormat _ _ _ -> panic "PPC pprFormat: VecFormat"
+ VecFormat {} -> panic "PPC pprFormat: VecFormat"
pprCond :: IsLine doc => Cond -> doc
pprCond c
@@ -384,7 +384,7 @@ pprInstr platform instr = case instr of
II64 -> text "d"
FF32 -> text "fs"
FF64 -> text "fd"
- VecFormat _ _ _ -> panic "PPC pprInstr: VecFormat"
+ VecFormat {} -> panic "PPC pprInstr: VecFormat"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
@@ -427,7 +427,7 @@ pprInstr platform instr = case instr of
II64 -> text "d"
FF32 -> text "fs"
FF64 -> text "fd"
- VecFormat _ _ _ -> panic "PPC pprInstr: VecFormat"
+ VecFormat {} -> panic "PPC pprInstr: VecFormat"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -399,17 +399,6 @@ linearRA block_live block_id = go [] []
(accInstr', new_fixups) <- raInsn block_live accInstr block_id instr
go accInstr' (new_fixups ++ accFixups) instrs
--- TODO: move to proper place
-classOfReg :: Platform -> Reg -> RegClass
-classOfReg platform (RegReal x) = targetClassOfRealReg platform x
-classOfReg _ (RegVirtual x) = classOfVirtualReg x
-
--- TODO: move to proper place
--- TODO: on some platforms we can be more lenient,
--- e.g. on X86 Float/Double/Vec are compatible
-compatibleReg :: Platform -> Reg -> Reg -> Bool
-compatibleReg platform r1 r2 = classOfReg platform r1 == classOfReg platform r2
-
-- | Do allocation for a single instruction.
raInsn
:: OutputableRegConstraint freeRegs instr
@@ -434,7 +423,6 @@ raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing)
raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
= do
assig <- getAssigR :: RegM freeRegs (UniqFM Reg Loc)
- platform <- getPlatform
-- If we have a reg->reg move between virtual registers, where the
-- src register is not live after this instruction, and the dst
@@ -447,8 +435,7 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
Just (src,dst) | Just (_, fmt) <- lookupUFM (liveDieRead live) src,
isVirtualReg dst,
not (dst `elemUFM` assig),
- isRealReg src || isInReg src assig,
- compatibleReg platform src dst -> do
+ isRealReg src || isInReg src assig -> do
case src of
RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
-- if src is a fixed reg, then we just map dest to this
@@ -888,7 +875,9 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
-- trimmed down datatype that only keeps track of e.g.
-- how many stack slots something uses up.
vr_fmt = case r of
- VirtualRegVec {} -> VecFormat 2 FmtDouble W64
+ VirtualRegVec {} -> VecFormat 2 FmtDouble
+ -- SIMD NCG TODO: handle 256 and 512 by adding
+ -- new virtual register constructors.
_ -> II64
-- Can we put the variable into a register it already was?
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1046,6 +1046,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_WF_Bitcast W32 -> bitcast II32 FF32 x
MO_FW_Bitcast W64 -> bitcast FF64 II64 x
MO_WF_Bitcast W64 -> bitcast II64 FF64 x
+ MO_WF_Bitcast {} -> incorrectOperands
+ MO_FW_Bitcast {} -> incorrectOperands
-- widenings
MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
@@ -1084,42 +1086,84 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
-
-
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
+ MO_FF_Conv {} -> incorrectOperands
+ MO_UU_Conv {} -> incorrectOperands
+ MO_SS_Conv {} -> incorrectOperands
+ MO_XX_Conv {} -> incorrectOperands
+
MO_FS_Truncate from to -> coerceFP2Int from to x
MO_SF_Round from to -> coerceInt2FP from to x
- -- SIMD NCG TODO (optional): implement these vector operations as well
- -- (they are integer vector operations).
- MO_V_Insert {} -> needLlvm mop
- MO_V_Extract {} -> needLlvm mop
- MO_V_Add {} -> needLlvm mop
- MO_V_Sub {} -> needLlvm mop
- MO_V_Mul {} -> needLlvm mop
- MO_VS_Quot {} -> needLlvm mop
- MO_VS_Rem {} -> needLlvm mop
- MO_VS_Neg {} -> needLlvm mop
- MO_VU_Quot {} -> needLlvm mop
- MO_VU_Rem {} -> needLlvm mop
- MO_V_Shuffle {} -> incorrectOperands
- MO_VF_Shuffle {} -> incorrectOperands
- MO_VF_Broadcast {} -> incorrectOperands
- MO_VF_Insert {} -> incorrectOperands
+ MO_VF_Neg l w | avx -> vector_float_negate_avx l w x
+ | sse && sse2 -> vector_float_negate_sse l w x
+ | otherwise
+ -> sorry "Please enable the -mavx or -msse, -msse2 flag"
+ -- SIMD NCG TODO
+ MO_VS_Neg {} -> needLlvm mop
+
+ -- Binary MachOps
+ MO_Add {} -> incorrectOperands
+ MO_Sub {} -> incorrectOperands
+ MO_Eq {} -> incorrectOperands
+ MO_Ne {} -> incorrectOperands
+ MO_Mul {} -> incorrectOperands
+ MO_S_MulMayOflo {} -> incorrectOperands
+ MO_S_Quot {} -> incorrectOperands
+ MO_S_Rem {} -> incorrectOperands
+ MO_U_Quot {} -> incorrectOperands
+ MO_U_Rem {} -> incorrectOperands
+ MO_S_Ge {} -> incorrectOperands
+ MO_S_Le {} -> incorrectOperands
+ MO_S_Gt {} -> incorrectOperands
+ MO_S_Lt {} -> incorrectOperands
+ MO_U_Ge {} -> incorrectOperands
+ MO_U_Le {} -> incorrectOperands
+ MO_U_Gt {} -> incorrectOperands
+ MO_U_Lt {} -> incorrectOperands
+ MO_F_Add {} -> incorrectOperands
+ MO_F_Sub {} -> incorrectOperands
+ MO_F_Mul {} -> incorrectOperands
+ MO_F_Quot {} -> incorrectOperands
+ MO_F_Eq {} -> incorrectOperands
+ MO_F_Ne {} -> incorrectOperands
+ MO_F_Ge {} -> incorrectOperands
+ MO_F_Le {} -> incorrectOperands
+ MO_F_Gt {} -> incorrectOperands
+ MO_F_Lt {} -> incorrectOperands
+ MO_And {} -> incorrectOperands
+ MO_Or {} -> incorrectOperands
+ MO_Xor {} -> incorrectOperands
+ MO_Shl {} -> incorrectOperands
+ MO_U_Shr {} -> incorrectOperands
+ MO_S_Shr {} -> incorrectOperands
+
+ MO_V_Extract {} -> incorrectOperands
+ MO_V_Add {} -> incorrectOperands
+ MO_V_Sub {} -> incorrectOperands
+ MO_V_Mul {} -> incorrectOperands
+ MO_VS_Quot {} -> incorrectOperands
+ MO_VS_Rem {} -> incorrectOperands
+ MO_VU_Quot {} -> incorrectOperands
+ MO_VU_Rem {} -> incorrectOperands
+ MO_V_Shuffle {} -> incorrectOperands
+ MO_VF_Shuffle {} -> incorrectOperands
+
MO_VF_Extract {} -> incorrectOperands
MO_VF_Add {} -> incorrectOperands
MO_VF_Sub {} -> incorrectOperands
MO_VF_Mul {} -> incorrectOperands
MO_VF_Quot {} -> incorrectOperands
+ MO_V_Broadcast {} -> incorrectOperands
+ MO_VF_Broadcast {} -> incorrectOperands
- MO_VF_Neg l w | avx -> vector_float_negate_avx l w x
- | sse && sse2 -> vector_float_negate_sse l w x
- | otherwise
- -> sorry "Please enable the -mavx or -msse, -msse2 flag"
-
+ -- Ternary MachOps
+ MO_FMA {} -> incorrectOperands
+ MO_VF_Insert {} -> incorrectOperands
+ MO_V_Insert {} -> incorrectOperands
- _other -> pprPanic "getRegister" (pprMachOp mop)
+ --_other -> pprPanic "getRegister" (pprMachOp mop)
where
triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
triv_ucode instr format = trivialUCode format (instr format) x
@@ -1163,12 +1207,12 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
vector_float_negate_avx :: Length -> Width -> CmmExpr -> NatM Register
vector_float_negate_avx l w expr = do
- tmp <- getNewRegNat (VecFormat l FmtFloat w)
+ tmp <- getNewRegNat (VecFormat l FmtFloat)
(reg, exp) <- getSomeReg expr
Amode addr addr_code <- memConstant (mkAlignment $ widthInBytes W32) (CmmFloat 0.0 W32)
let format = case w of
- W32 -> VecFormat l FmtFloat w
- W64 -> VecFormat l FmtDouble w
+ W32 -> VecFormat l FmtFloat
+ W64 -> VecFormat l FmtDouble
_ -> pprPanic "Cannot negate vector of width" (ppr w)
code dst = case w of
W32 -> exp `appOL` addr_code `snocOL`
@@ -1183,11 +1227,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
vector_float_negate_sse :: Length -> Width -> CmmExpr -> NatM Register
vector_float_negate_sse l w expr = do
- tmp <- getNewRegNat (VecFormat l FmtFloat w)
+ tmp <- getNewRegNat (VecFormat l FmtFloat)
(reg, exp) <- getSomeReg expr
let format = case w of
- W32 -> VecFormat l FmtFloat w
- W64 -> VecFormat l FmtDouble w
+ W32 -> VecFormat l FmtFloat
+ W64 -> VecFormat l FmtDouble
_ -> pprPanic "Cannot negate vector of width" (ppr w)
code dst = exp `snocOL`
(XOR format (OpReg tmp) (OpReg tmp)) `snocOL`
@@ -1253,12 +1297,6 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
- MO_V_Shuffle l w is
- | avx
- -> vector_shuffle_int l w x y is
- | otherwise
- -> sorry "Please enable the -mavx flag"
-
MO_VF_Shuffle l w is
| avx
-> vector_shuffle_float l w x y is
@@ -1269,23 +1307,27 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
| sse4_1 -> vector_float_broadcast_sse l W32 x y
| otherwise
-> sorry "Please enable the -mavx or -msse4 flag"
-
MO_VF_Broadcast l W64 | sse2 -> vector_float_broadcast_avx l W64 x y
| otherwise -> sorry "Please enable the -msse2 flag"
-
+ MO_VF_Broadcast {} -> incorrectOperands
+
MO_V_Broadcast l W64 | sse2 -> vector_int_broadcast l W64 x y
| otherwise -> sorry "Please enable the -msse2 flag"
+ -- SIMD NCG TODO: W32, W16, W8
+ MO_V_Broadcast {} -> needLlvm mop
MO_VF_Extract l W32 | avx -> vector_float_unpack l W32 x y
| sse -> vector_float_unpack_sse l W32 x y
| otherwise
-> sorry "Please enable the -mavx or -msse flag"
-
MO_VF_Extract l W64 | sse2 -> vector_float_unpack l W64 x y
| otherwise -> sorry "Please enable the -msse2 flag"
-
+ MO_VF_Extract {} -> incorrectOperands
+
MO_V_Extract l W64 | sse2 -> vector_int_unpack_sse l W64 x y
| otherwise -> sorry "Please enable the -msse2 flag"
+ -- SIMD NCG TODO: W32, W16, W8
+ MO_V_Extract {} -> needLlvm mop
MO_VF_Add l w | avx -> vector_float_op_avx VA_Add l w x y
| sse && w == W32 -> vector_float_op_sse VA_Add l w x y
@@ -1311,10 +1353,38 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
| otherwise
-> sorry "Please enable the -mavx or -msse flag"
- MO_VF_Insert {} -> incorrectOperands
+ -- SIMD NCG TODO: integer vector operations
+ MO_V_Shuffle {} -> needLlvm mop
+ MO_V_Add {} -> needLlvm mop
+ MO_V_Sub {} -> needLlvm mop
+ MO_V_Mul {} -> needLlvm mop
+ MO_VS_Quot {} -> needLlvm mop
+ MO_VS_Rem {} -> needLlvm mop
+ MO_VU_Quot {} -> needLlvm mop
+ MO_VU_Rem {} -> needLlvm mop
+
+ -- Unary MachOps
+ MO_S_Neg {} -> incorrectOperands
+ MO_F_Neg {} -> incorrectOperands
+ MO_Not {} -> incorrectOperands
+ MO_SF_Round {} -> incorrectOperands
+ MO_FS_Truncate {} -> incorrectOperands
+ MO_SS_Conv {} -> incorrectOperands
+ MO_XX_Conv {} -> incorrectOperands
+ MO_FF_Conv {} -> incorrectOperands
+ MO_UU_Conv {} -> incorrectOperands
+ MO_WF_Bitcast {} -> incorrectOperands
+ MO_FW_Bitcast {} -> incorrectOperands
+ MO_RelaxedRead {} -> incorrectOperands
+ MO_AlignmentCheck {} -> incorrectOperands
+ MO_VS_Neg {} -> incorrectOperands
+ MO_VF_Neg {} -> incorrectOperands
+
+ -- Ternary MachOps
+ MO_FMA {} -> incorrectOperands
+ MO_V_Insert {} -> incorrectOperands
+ MO_VF_Insert {} -> incorrectOperands
-
- _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
--------------------
triv_op width instr = trivialCode width op (Just op) x y
@@ -1511,8 +1581,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(reg1, exp1) <- getSomeReg expr1
(reg2, exp2) <- getSomeReg expr2
let format = case w of
- W32 -> VecFormat l FmtFloat W32
- W64 -> VecFormat l FmtDouble W64
+ W32 -> VecFormat l FmtFloat
+ W64 -> VecFormat l FmtDouble
_ -> pprPanic "Operation not supported for width " (ppr w)
code dst = case op of
VA_Add -> arithInstr VADD
@@ -1535,8 +1605,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(reg1, exp1) <- getSomeReg expr1
(reg2, exp2) <- getSomeReg expr2
let format = case w of
- W32 -> VecFormat l FmtFloat W32
- W64 -> VecFormat l FmtDouble W64
+ W32 -> VecFormat l FmtFloat
+ W64 -> VecFormat l FmtDouble
_ -> pprPanic "Operation not supported for width " (ppr w)
code dst = case op of
VA_Add -> arithInstr ADD
@@ -1559,26 +1629,26 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_float_unpack l W32 expr (CmmLit lit)
= do
(r, exp) <- getSomeReg expr
- let format = VecFormat l FmtFloat W32
+ let format = VecFormat l FmtFloat
imm = litToImm lit
code dst
= case lit of
- CmmInt 0 _ -> exp `snocOL` (MOV FF32 (OpReg r) (OpReg dst))
+ CmmInt 0 _ -> exp `snocOL` (MOVSD FF32 (OpReg r) (OpReg dst))
CmmInt _ _ -> exp `snocOL` (VPSHUFD format imm (OpReg r) dst)
_ -> panic "Error in offset while unpacking"
return (Any format code)
vector_float_unpack l W64 expr (CmmLit lit)
= do
(r, exp) <- getSomeReg expr
- let format = VecFormat l FmtDouble W64
+ let format = VecFormat l FmtDouble
addr = spRel platform 0
code dst
= case lit of
CmmInt 0 _ -> exp `snocOL`
- (MOV FF64 (OpReg r) (OpReg dst))
+ (MOVSD FF64 (OpReg r) (OpReg dst))
CmmInt 1 _ -> exp `snocOL`
(MOVH format (OpReg r) (OpAddr addr)) `snocOL`
- (MOV FF64 (OpAddr addr) (OpReg dst))
+ (MOVSD FF64 (OpAddr addr) (OpReg dst))
-- SIMD NCG TODO: avoid going via the stack here?
_ -> panic "Error in offset while unpacking"
return (Any format code)
@@ -1594,7 +1664,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_float_unpack_sse l W32 expr (CmmLit lit)
= do
(r,exp) <- getSomeReg expr
- let format = VecFormat l FmtFloat W32
+ let format = VecFormat l FmtFloat
imm = litToImm lit
code dst
= case lit of
@@ -1614,7 +1684,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= do
fn <- getAnyReg expr1
(r', exp) <- getSomeReg expr2
- let f = VecFormat len FmtFloat W32
+ let f = VecFormat len FmtFloat
addr = spRel platform 0
in return $ Any f (\r -> exp `appOL`
(fn r) `snocOL`
@@ -1624,7 +1694,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= do
fn <- getAnyReg expr1
(r', exp) <- getSomeReg expr2
- let f = VecFormat len FmtDouble W64
+ let f = VecFormat len FmtDouble
addr = spRel platform 0
in return $ Any f (\r -> exp `appOL`
(fn r) `snocOL`
@@ -1643,15 +1713,15 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= do
fn <- getAnyReg expr1 -- destination
(r, exp) <- getSomeReg expr2 -- source
- let f = VecFormat len FmtFloat W32
+ let f = VecFormat len FmtFloat
addr = spRel platform 0
code dst = exp `appOL`
(fn dst) `snocOL`
(MOVU f (OpReg r) (OpAddr addr)) `snocOL`
(insertps 0) `snocOL`
- (insertps 8) `snocOL`
(insertps 16) `snocOL`
- (insertps 24)
+ (insertps 32) `snocOL`
+ (insertps 48)
where
insertps off =
INSERTPS f (litToImm $ CmmInt off W32) (OpAddr addr) dst
@@ -1659,7 +1729,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
in return $ Any f code
vector_float_broadcast_sse _ _ c _
= pprPanic "Broadcast not supported for : " (pdoc platform c)
-
+
vector_int_broadcast :: Length
-> Width
-> CmmExpr
@@ -1669,10 +1739,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= do
fn <- getAnyReg expr1
(val, exp) <- getSomeReg expr2
- let fmt = VecFormat len FmtInt64 W64
+ let fmt = VecFormat len FmtInt64
return $ Any fmt (\dst -> exp `appOL`
(fn dst) `snocOL`
- (MOV fmt (OpReg val) (OpReg dst)) `snocOL`
+ (MOV II64 (OpReg val) (OpReg dst)) `snocOL`
(PUNPCKLQDQ fmt (OpReg dst) dst))
vector_int_broadcast _ _ c _
= pprPanic "Broadcast not supported for : " (pdoc platform c)
@@ -1686,7 +1756,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_int_unpack_sse l at 2 W64 expr (CmmLit lit)
= do
(r, exp) <- getSomeReg expr
- let fmt = VecFormat l FmtInt64 W64
+ let fmt = VecFormat l FmtInt64
tmp <- getNewRegNat fmt
let code dst
= case lit of
@@ -1700,14 +1770,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
vector_int_unpack_sse _ w c e
= pprPanic "Unpack not supported for : " (pdoc platform c $$ pdoc platform e $$ ppr w)
- vector_shuffle_int :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
- vector_shuffle_int = undefined
-
vector_shuffle_float :: Length -> Width -> CmmExpr -> CmmExpr -> [Int] -> NatM Register
vector_shuffle_float l w v1 v2 is = do
(r1, exp1) <- getSomeReg v1
(r2, exp2) <- getSomeReg v2
- let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble) w
+ let fmt = VecFormat l (if w == W32 then FmtFloat else FmtDouble)
code dst
= exp1 `appOL` (exp2 `appOL` shuffleInstructions fmt r1 r2 is dst)
return (Any fmt code)
@@ -1715,7 +1782,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
shuffleInstructions :: Format -> Reg -> Reg -> [Int] -> Reg -> OrdList Instr
shuffleInstructions fmt v1 v2 is dst =
case fmt of
- VecFormat 2 FmtDouble _ ->
+ VecFormat 2 FmtDouble ->
case is of
[i1, i2] -> case (i1, i2) of
(0,0) -> unitOL (VSHUFPD fmt (ImmInt 0b00) (OpReg v1) v1 dst)
@@ -1736,7 +1803,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
_ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
_ -> pprPanic "vector shuffle: wrong number of indices (expected 2)" (ppr is)
- VecFormat 4 FmtFloat _ ->
+ VecFormat 4 FmtFloat ->
case is of
-- indices 0 <= i <= 7
[i1, i2, i3, i4]
@@ -1815,7 +1882,7 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
= do
fn <- getAnyReg vecExpr
(r, exp) <- getSomeReg valExpr
- let fmt = VecFormat len FmtFloat W32
+ let fmt = VecFormat len FmtFloat
imm = litToImm offset
code dst = exp `appOL`
(fn dst) `snocOL`
@@ -1826,17 +1893,17 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
= do
(valReg, valExp) <- getSomeReg valExpr
(vecReg, vecExp) <- getSomeReg vecExpr
- let fmt = VecFormat len FmtDouble W64
+ let fmt = VecFormat len FmtDouble
code dst
= case offset of
-- TODO: why not just index by element rather than by byte?
CmmInt 0 _ -> valExp `appOL`
vecExp `snocOL`
- (MOV fmt (OpReg valReg) (OpReg dst)) `snocOL`
+ (MOVSD FF64 (OpReg valReg) (OpReg dst)) `snocOL`
(SHUFPD fmt (ImmInt 0b00) (OpReg vecReg) dst)
- CmmInt 8 _ -> valExp `appOL`
+ CmmInt 16 _ -> valExp `appOL`
vecExp `snocOL`
- (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+ (MOVSD FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
(SHUFPD fmt (ImmInt 0b00) (OpReg valReg) dst)
_ -> pprPanic "MO_VF_Insert DoubleX2: unsupported offset" (ppr offset)
in return $ Any fmt code
@@ -1865,7 +1932,7 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
pprTraceM "vecExpr:" (pdoc platform vecExpr)
(valReg, valExp) <- getSomeReg valExpr
(vecReg, vecExp) <- getSomeReg vecExpr
- let fmt = VecFormat len FmtInt64 W64
+ let fmt = VecFormat len FmtInt64
tmp <- getNewRegNat fmt
pprTraceM "tmp:" (ppr tmp)
let code dst
@@ -1875,11 +1942,11 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
(MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
(MOV II64 (OpReg valReg) (OpReg dst)) `snocOL`
(PUNPCKLQDQ fmt (OpReg tmp) dst)
- CmmInt 8 _ -> valExp `appOL`
- vecExp `snocOL`
- (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
- (MOV II64 (OpReg valReg) (OpReg tmp)) `snocOL`
- (PUNPCKLQDQ fmt (OpReg tmp) dst)
+ CmmInt 16 _ -> valExp `appOL`
+ vecExp `snocOL`
+ (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+ (MOV II64 (OpReg valReg) (OpReg tmp)) `snocOL`
+ (PUNPCKLQDQ fmt (OpReg tmp) dst)
_ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
in return $ Any fmt code
vector_int_insert_sse len width _ _ offset
@@ -2024,13 +2091,16 @@ getAnyReg expr = do
anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code) = return code
-anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+anyReg (Fixed rep reg fcode) = do
+ platform <- getPlatform
+ return (\dst -> fcode `snocOL` mkRegRegMoveInstr platform rep reg dst)
-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
+ platform <- getPlatform
is32Bit <- is32BitPlatform
if is32Bit
then do r <- getRegister expr
@@ -2042,7 +2112,7 @@ getByteReg expr = do
| isVirtualReg reg -> return (reg,code)
| otherwise -> do
tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
+ return (tmp, code `snocOL` mkRegRegMoveInstr platform rep reg tmp)
-- ToDo: could optimise slightly by checking for
-- byte-addressable real registers, but that will
-- happen very rarely if at all.
@@ -2063,18 +2133,10 @@ getNonClobberedReg expr = do
| reg `elem` instrClobberedRegs platform
-> do
tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
+ return (tmp, code `snocOL` mkRegRegMoveInstr platform rep reg tmp)
| otherwise ->
return (reg, code)
-reg2reg :: Format -> Reg -> Reg -> Instr
-reg2reg format@(VecFormat _ FmtFloat W32) src dst
- = VMOVU format (OpReg src) (OpReg dst)
-reg2reg format@(VecFormat _ FmtDouble W64) src dst
- = VMOVU format (OpReg src) (OpReg dst)
-reg2reg format src dst
- = MOV format (OpReg src) (OpReg dst)
-
--------------------------------------------------------------------------------
-- | Convert a 'CmmExpr' representing a memory address into an 'Amode'.
@@ -3427,7 +3489,7 @@ genCCall64 addr conv dest_regs args = do
tmp <- getNewRegNat arg_fmt
let
code' = code `appOL` arg_code tmp
- acode' = acode `snocOL` reg2reg arg_fmt tmp r
+ acode' = acode `snocOL` mkRegRegMoveInstr platform arg_fmt tmp r
return (code',acode')
arg_rep = cmmExprType platform arg
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -370,6 +370,9 @@ data Instr
| MOVL Format Operand Operand
| MOVH Format Operand Operand
| MOVA Format Operand Operand
+ | MOVDQU Format Operand Operand
+ | VMOVDQU Format Operand Operand
+ | MOVSD Format Operand Operand
-- logic operations
| VPXOR Format Reg Reg Reg
@@ -526,6 +529,10 @@ regUsageOfInstr platform instr
MOVA fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
MOVL fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
MOVH fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ MOVDQU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ VMOVDQU fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+ MOVSD fmt src dst -> mkRU fmt (use_R src []) (use_R dst [])
+
VPXOR fmt s1 s2 dst -> mkRU fmt [s1,s2] [dst]
VADD fmt s1 s2 dst -> mkRU fmt ((use_R s1 []) ++ [s2]) [dst]
@@ -739,6 +746,10 @@ patchRegsOfInstr instr env
MOVA fmt src dst -> MOVA fmt (patchOp src) (patchOp dst)
MOVL fmt src dst -> MOVL fmt (patchOp src) (patchOp dst)
MOVH fmt src dst -> MOVH fmt (patchOp src) (patchOp dst)
+ MOVDQU fmt src dst -> MOVDQU fmt (patchOp src) (patchOp dst)
+ VMOVDQU fmt src dst -> VMOVDQU fmt (patchOp src) (patchOp dst)
+ MOVSD fmt src dst -> MOVSD fmt (patchOp src) (patchOp dst)
+
VPXOR fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst)
VADD fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst)
@@ -763,7 +774,7 @@ patchRegsOfInstr instr env
-> PSLLDQ fmt (patchOp off) (env dst)
PSRLDQ fmt off dst
-> PSRLDQ fmt (patchOp off) (env dst)
-
+
MOVHLPS fmt src dst
-> MOVHLPS fmt (patchOp src) (env dst)
PUNPCKLQDQ fmt src dst
@@ -945,11 +956,20 @@ mkRegRegMoveInstr
-> Reg
-> Reg
-> Instr
-
-mkRegRegMoveInstr _platform fmt src dst =
- case fmt of
- VecFormat {} -> MOVU fmt (OpReg src) (OpReg dst)
- _ -> MOV fmt (OpReg src) (OpReg dst)
+mkRegRegMoveInstr _platform fmt@(VecFormat _ s) src dst
+ | isIntScalarFormat s
+ = if widthInBytes (formatToWidth fmt) <= 128
+ then MOVDQU fmt (OpReg src) (OpReg dst)
+ else VMOVDQU fmt (OpReg src) (OpReg dst)
+ | otherwise
+ = if widthInBytes (formatToWidth fmt) <= 128
+ then MOVU fmt (OpReg src) (OpReg dst)
+ else VMOVU fmt (OpReg src) (OpReg dst)
+mkRegRegMoveInstr _platform fmt src dst
+ | isFloatFormat fmt
+ = MOVSD fmt (OpReg src) (OpReg dst)
+ | otherwise
+ = MOV fmt (OpReg src) (OpReg dst)
-- | Check whether an instruction represents a reg-reg move.
-- The register allocator attempts to eliminate reg->reg moves whenever it can,
@@ -959,8 +979,24 @@ takeRegRegMoveInstr
:: Instr
-> Maybe (Reg,Reg)
-takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
- = Just (r1,r2)
+takeRegRegMoveInstr (MOV fmt (OpReg r1) (OpReg r2))
+ -- MOV zeroes the upper part of vector registers,
+ -- so it is not a real "move" in that case.
+ | not (isVecFormat fmt)
+ = Just (r1,r2)
+takeRegRegMoveInstr (MOVSD fmt (OpReg r1) (OpReg r2))
+ | not (isVecFormat fmt)
+ = Just (r1,r2)
+takeRegRegMoveInstr (MOVA _ (OpReg r1) (OpReg r2))
+ = Just (r1, r2)
+takeRegRegMoveInstr (MOVU _ (OpReg r1) (OpReg r2))
+ = Just (r1, r2)
+takeRegRegMoveInstr (VMOVU _ (OpReg r1) (OpReg r2))
+ = Just (r1, r2)
+takeRegRegMoveInstr (MOVDQU _ (OpReg r1) (OpReg r2))
+ = Just (r1, r2)
+takeRegRegMoveInstr (VMOVDQU _ (OpReg r1) (OpReg r2))
+ = Just (r1, r2)
takeRegRegMoveInstr _ = Nothing
=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -431,13 +431,13 @@ pprFormat x = case x of
II64 -> text "q"
FF32 -> text "ss" -- "scalar single-precision float" (SSE2)
FF64 -> text "sd" -- "scalar double-precision float" (SSE2)
- VecFormat _ FmtFloat W32 -> text "ps"
- VecFormat _ FmtDouble W64 -> text "pd"
+ VecFormat _ FmtFloat -> text "ps"
+ VecFormat _ FmtDouble -> text "pd"
-- TODO: this is shady because it only works for certain instructions
- VecFormat _ FmtInt8 W8 -> text "b"
- VecFormat _ FmtInt16 W16 -> text "w"
- VecFormat _ FmtInt32 W32 -> text "l"
- VecFormat _ FmtInt64 W64 -> text "q"
+ VecFormat _ FmtInt8 -> text "b"
+ VecFormat _ FmtInt16 -> text "w"
+ VecFormat _ FmtInt32 -> text "l"
+ VecFormat _ FmtInt64 -> text "q"
pprFormat_x87 :: IsLine doc => Format -> doc
pprFormat_x87 x = case x of
@@ -715,9 +715,9 @@ pprInstr platform i = case i of
XOR FF64 src dst
-> pprOpOp (text "xorpd") FF64 src dst
- XOR format@(VecFormat _ sfmt _) src dst | isIntScalarFormat sfmt
+ XOR format@(VecFormat _ sfmt) src dst | isIntScalarFormat sfmt
-> pprOpOp (text "pxor") format src dst
-
+
XOR format src dst
-> pprFormatOpOp (text "xor") format src dst
@@ -968,6 +968,14 @@ pprInstr platform i = case i of
-> pprFormatOpOp (text "movl") format from to
MOVH format from to
-> pprFormatOpOp (text "movh") format from to
+
+ MOVDQU format from to
+ -> pprOpOp (text "movdqu") format from to
+ VMOVDQU format from to
+ -> pprOpOp (text "vmovdqu") format from to
+ MOVSD format from to
+ -> pprOpOp (text "movsd") format from to
+
VPXOR format s1 s2 dst
-> pprXor (text "vpxor") format s1 s2 dst
VEXTRACT format offset from to
@@ -1045,9 +1053,9 @@ pprInstr platform i = case i of
pprBroadcastFormat :: Format -> Line doc
pprBroadcastFormat x
= case x of
- VecFormat _ FmtFloat W32 -> text "ss"
- VecFormat _ FmtDouble W64 -> text "sd"
- -- TODO: Add Ints and remove panic
+ VecFormat _ FmtFloat -> text "ss"
+ VecFormat _ FmtDouble -> text "sd"
+ -- SIMD NCG TODO: Add Ints and remove panic
VecFormat {} -> panic "Incorrect width"
_ -> panic "Scalar Format invading vector operation"
@@ -1061,7 +1069,6 @@ pprInstr platform i = case i of
pprOperand platform format op1
]
-
pprFormatOp_ :: Line doc -> Format -> Operand -> doc
pprFormatOp_ name format op1
= line $ hcat [
@@ -1153,7 +1160,7 @@ pprInstr platform i = case i of
comma,
pprReg platform (archWordFormat (target32Bit platform)) reg2
]
-
+
pprOpReg :: Line doc -> Format -> Operand -> Reg -> doc
pprOpReg name format op reg
= line $ hcat [
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1521,6 +1521,7 @@ genMachOp _ op [x] = case op of
MO_VU_Rem _ _ -> panicOp
MO_VF_Broadcast _ _ -> panicOp
+ MO_V_Broadcast _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
@@ -1722,6 +1723,7 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
+ MO_V_Broadcast {} -> panicOp
MO_VF_Broadcast {} -> panicOp
MO_VF_Insert {} -> panicOp
=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -11,7 +11,7 @@ import GHC.Prelude
import GHC.Utils.Outputable as Outputable
import GHC.Types.Unique
-import GHC.Builtin.Uniques
+import GHC.Builtin.Uniques ( mkRegClassUnique )
-- | The class of a register.
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -2653,7 +2653,7 @@ doVecPackOp ty z es res = do
where
-- vector indices are always 32-bits
-- TODO: consider indexing by element rather than by byte
- iLit = CmmLit (CmmInt ((toInteger i) * 8) W32)
+ iLit = CmmLit (CmmInt ((toInteger i) * 16) W32)
len :: Length
len = vecLength ty
=====================================
testsuite/tests/simd/should_run/Simd009c.hs
=====================================
@@ -13,6 +13,7 @@ module Simd009c where
import Control.Monad ( unless )
import Data.Foldable ( for_ )
import GHC.Exts
+import GHC.Prim
import Language.Haskell.TH ( CodeQ )
import Language.Haskell.TH.Syntax ( Lift(liftTyped) )
=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -1,6 +1,7 @@
# N.B. Only the X86 NCG and LLVM backends support SIMD operations for now.
setTestOpts([ unless(arch('x86_64'), only_ways(llvm_ways))
, when(unregisterised(), skip)
+ , js_skip, when(arch('wasm32'), skip)
])
test('simd000', [], compile_and_run, [''])
@@ -12,5 +13,5 @@ test('simd005', [], compile_and_run, [''])
test('simd006', [], compile_and_run, [''])
test('simd007', [], compile_and_run, [''])
test('simd008', [], compile_and_run, [''])
-test('simd009', [extra_files(['Simd009b.hs', 'Simd009c.hs'])], multimod_compile_and_run, ['simd009', ''])
+test('simd009', [req_th, extra_files(['Simd009b.hs', 'Simd009c.hs'])], multimod_compile_and_run, ['simd009', ''])
test('simd010', [], compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/db9a19a6e10287e60619751c76710e80aae5687a...68d896f4f1b1db0dcaa263503910de475906a72f
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/db9a19a6e10287e60619751c76710e80aae5687a...68d896f4f1b1db0dcaa263503910de475906a72f
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240613/79ecb6d8/attachment-0001.html>
More information about the ghc-commits
mailing list