[Git][ghc/ghc][wip/ncg-simd] SIMD: cleanup
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Fri Jun 14 13:50:35 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
6669eee7 by sheaf at 2024-06-14T15:50:19+02:00
SIMD: cleanup
- - - - -
18 changed files:
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/PPC/Ppr.hs
- compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- compiler/GHC/CmmToAsm/X86/RegInfo.hs
- compiler/GHC/CmmToAsm/X86/Regs.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/Llvm/Ppr.hs
- compiler/GHC/Llvm/Syntax.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/Types/Unique/FM.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -151,7 +151,9 @@ regUsageOfInstr platform instr = case instr of
-- registers as well, as they show up.
usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
(map (,II64) $ filter (interesting platform) dst)
- -- SIMD NCG TODO: remove this hack
+ -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
+ -- As the AArch64 NCG does not currently support SIMD registers,
+ -- we simply use II64 format for all instructions.
regAddr :: AddrMode -> [Reg]
regAddr (AddrRegReg r1 r2) = [r1, r2]
=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -393,7 +393,9 @@ regUsageOfInstr platform instr
where
usage (src, dst) = RU (map (,II64) $ filter (interesting platform) src)
(map (,II64) $ filter (interesting platform) dst)
- -- SIMD NCG TODO: remove this hack
+ -- SIMD NCG TODO: the format here is used for register spilling/unspilling.
+ -- As the PowerPC NCG does not currently support SIMD registers,
+ -- we simply use II64 format for all instructions.
regAddr (AddrRegReg r1 r2) = [r1, r2]
regAddr (AddrRegImm r1 _) = [r1]
=====================================
compiler/GHC/CmmToAsm/PPC/Ppr.hs
=====================================
@@ -196,12 +196,12 @@ pprReg :: forall doc. IsLine doc => Reg -> doc
pprReg r
= case r of
- RegReal (RealRegSingle i) -> ppr_reg_no i
- RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
- RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
- RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
- RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
- RegVirtual (VirtualRegVec u) -> text "%vVec_" <> pprUniqueAlways u
+ RegReal (RealRegSingle i) -> ppr_reg_no i
+ RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegV128 u) -> text "%vV128_" <> pprUniqueAlways u
where
ppr_reg_no :: Int -> doc
=====================================
compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs
=====================================
@@ -168,7 +168,6 @@ cleanForward platform blockId assoc acc (li1 : li2 : instrs)
cleanForward platform blockId assoc acc
$ li1 : LiveInstr (mkRegRegMoveInstr platform fmt reg1 reg2) Nothing
: instrs
- -- SIMD NCG TODO: is this "fmt" correct?
cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
| Just (r1, r2) <- takeRegRegMoveInstr platform i1
@@ -249,7 +248,6 @@ cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg fmt) _)
return ( assoc'
, Just $ LiveInstr (mkRegRegMoveInstr platform fmt reg2 reg) Nothing)
- -- SIMD NCG TODO: is this fmt correct?
-- Gotta keep this instr.
| otherwise
=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -871,14 +871,12 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
freeRegs <- getFreeRegsR
let regclass = classOfVirtualReg r
freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs :: [RealReg]
- -- SIMD NCG TODO: this is not the right thing to be doing,
- -- and is indicative we should not use Format but a more
- -- trimmed down datatype that only keeps track of e.g.
- -- how many stack slots something uses up.
vr_fmt = case r of
- VirtualRegVec {} -> VecFormat 2 FmtDouble
- -- SIMD NCG TODO: handle 256 and 512 by adding
- -- new virtual register constructors.
+ VirtualRegV128 {} -> VecFormat 2 FmtDouble
+ -- It doesn't really matter whether we use e.g. v2f64 or v4f32
+ -- or v4i32 etc here. This is perhaps a sign that 'Format'
+ -- is not the right type to use here, but that is a battle
+ -- for another day.
_ -> II64
-- Can we put the variable into a register it already was?
=====================================
compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs
=====================================
@@ -55,9 +55,7 @@ getStackSlotFor fs@(StackMap _ reserved) _fmt regUnique
getStackSlotFor (StackMap freeSlot reserved) fmt regUnique =
let
- nbSlots = case fmt of
- VecFormat {} -> 2 -- SIMD NCG TODO: panic for unsupported vectors
- _ -> 1
+ nbSlots = formatInBytes fmt `div` 8
in
(StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot)
=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -64,7 +64,6 @@ import Data.List (mapAccumL, partition)
import Data.Maybe
import Data.IntSet (IntSet)
import GHC.CmmToAsm.Format
-import GHC.Types.Unique (Uniquable)
-----------------------------------------------------------------------------
type RegSet = UniqSet Reg
@@ -681,10 +680,6 @@ patchRegsLiveInstr patchF li
, liveDieWrite = mapKeysUFM patchF $ liveDieWrite live })
-- See Note [Unique Determinism and code generation]
--- SIMD NCG TODO: move this to Unique.FM module
-mapKeysUFM :: Uniquable a => (t -> a) -> UniqFM key (t, b) -> UniqFM a (a, b)
-mapKeysUFM f m = listToUFM $ map ( \ (r, fmt) -> let r' = f r in (r', (r', fmt)) ) $ nonDetEltsUFM m
-
--------------------------------------------------------------------------------
-- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1,4 +1,5 @@
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NondecreasingIndentation #-}
@@ -1298,10 +1299,14 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
MO_VF_Shuffle l w is
- | avx
- -> vector_shuffle_float l w x y is
+ | l * widthInBytes w == 128
+ -> if
+ | avx
+ -> vector_shuffle_float l w x y is
+ | otherwise
+ -> sorry "Please enable the -mavx flag"
| otherwise
- -> sorry "Please enable the -mavx flag"
+ -> sorry "Please use -fllvm for wide shuffle instructions"
MO_VF_Broadcast l W32 | avx -> vector_float_broadcast_avx l W32 x y
| sse4_1 -> vector_float_broadcast_sse l W32 x y
@@ -1641,15 +1646,12 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= do
(r, exp) <- getSomeReg expr
let format = VecFormat l FmtDouble
- addr = spRel platform 0
code dst
= case lit of
CmmInt 0 _ -> exp `snocOL`
(MOVSD FF64 (OpReg r) (OpReg dst))
CmmInt 1 _ -> exp `snocOL`
- (MOVH format (OpReg r) (OpAddr addr)) `snocOL`
- (MOVSD FF64 (OpAddr addr) (OpReg dst))
- -- SIMD NCG TODO: avoid going via the stack here?
+ (MOVHLPS format (OpReg r) dst)
_ -> panic "Error in offset while unpacking"
return (Any format code)
vector_float_unpack _ w c e
@@ -1676,10 +1678,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= pprPanic "Unpack not supported for : " (pdoc platform c $$ pdoc platform e $$ ppr w)
-----------------------
vector_float_broadcast_avx :: Length
- -> Width
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
+ -> Width
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
vector_float_broadcast_avx len W32 expr1 expr2
= do
fn <- getAnyReg expr1
@@ -1731,10 +1733,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
= pprPanic "Broadcast not supported for : " (pdoc platform c)
vector_int_broadcast :: Length
- -> Width
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
+ -> Width
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
vector_int_broadcast len W64 expr1 expr2
= do
fn <- getAnyReg expr1
@@ -1749,10 +1751,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
-----------------------
vector_int_unpack_sse :: Length
- -> Width
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
+ -> Width
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
vector_int_unpack_sse l at 2 W64 expr (CmmLit lit)
= do
(r, exp) <- getSomeReg expr
@@ -1803,9 +1805,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
(3,1) -> unitOL (VSHUFPD fmt (ImmInt 0b11) (OpReg v1) v2 dst)
_ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 3" (ppr is)
_ -> pprPanic "vector shuffle: wrong number of indices (expected 2)" (ppr is)
- VecFormat 4 FmtFloat ->
+ VecFormat 4 FmtFloat
+ -- indices 0 <= i <= 7
+ | all ( (>= 0) <&&> (<= 7) ) is ->
case is of
- -- indices 0 <= i <= 7
[i1, i2, i3, i4]
| all ( <= 3 ) is
, let imm = i1 + i2 `shiftL` 2 + i3 `shiftL` 4 + i4 `shiftL` 6
@@ -1842,6 +1845,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
`snocOL`
(INSERTPS fmt (ImmInt $ insertImm i4 3) (OpReg $ vec i4) dst)
_ -> pprPanic "vector shuffle: wrong number of indices (expected 4)" (ppr is)
+ | otherwise
+ -> pprPanic "vector shuffle: indices out of bounds 0 <= i <= 7" (ppr is)
_ ->
pprPanic "vector shuffle: unsupported format" (ppr fmt)
@@ -1882,12 +1887,12 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
-> CmmExpr
-> NatM Register
-- FloatX4
- vector_float_insert len at 4 W32 vecExpr valExpr (CmmLit offset)
+ vector_float_insert len at 4 W32 vecExpr valExpr (CmmLit (CmmInt offset _))
= do
fn <- getAnyReg vecExpr
(r, exp) <- getSomeReg valExpr
let fmt = VecFormat len FmtFloat
- imm = litToImm offset
+ imm = litToImm (CmmInt (offset `shiftL` 4) W32)
code dst = exp `appOL`
(fn dst) `snocOL`
(INSERTPS fmt imm (OpReg r) dst)
@@ -1900,15 +1905,14 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
let fmt = VecFormat len FmtDouble
code dst
= case offset of
- -- TODO: why not just index by element rather than by byte?
- CmmInt 0 _ -> valExp `appOL`
- vecExp `snocOL`
- (MOVSD FF64 (OpReg valReg) (OpReg dst)) `snocOL`
- (SHUFPD fmt (ImmInt 0b00) (OpReg vecReg) dst)
- CmmInt 16 _ -> valExp `appOL`
- vecExp `snocOL`
- (MOVSD FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
- (SHUFPD fmt (ImmInt 0b00) (OpReg valReg) dst)
+ CmmInt 0 _ -> valExp `appOL`
+ vecExp `snocOL`
+ (MOVSD FF64 (OpReg valReg) (OpReg dst)) `snocOL`
+ (SHUFPD fmt (ImmInt 0b00) (OpReg vecReg) dst)
+ CmmInt 1 _ -> valExp `appOL`
+ vecExp `snocOL`
+ (MOVSD FF64 (OpReg vecReg) (OpReg dst)) `snocOL`
+ (SHUFPD fmt (ImmInt 0b00) (OpReg valReg) dst)
_ -> pprPanic "MO_VF_Insert DoubleX2: unsupported offset" (ppr offset)
in return $ Any fmt code
-- For DoubleX4: use VSHUFPD.
@@ -1941,16 +1945,16 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
pprTraceM "tmp:" (ppr tmp)
let code dst
= case offset of
- CmmInt 0 _ -> valExp `appOL`
- vecExp `snocOL`
- (MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
- (MOV II64 (OpReg valReg) (OpReg dst)) `snocOL`
- (PUNPCKLQDQ fmt (OpReg tmp) dst)
- CmmInt 16 _ -> valExp `appOL`
- vecExp `snocOL`
- (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
- (MOV II64 (OpReg valReg) (OpReg tmp)) `snocOL`
- (PUNPCKLQDQ fmt (OpReg tmp) dst)
+ CmmInt 0 _ -> valExp `appOL`
+ vecExp `snocOL`
+ (MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
+ (MOV II64 (OpReg valReg) (OpReg dst)) `snocOL`
+ (PUNPCKLQDQ fmt (OpReg tmp) dst)
+ CmmInt 1 _ -> valExp `appOL`
+ vecExp `snocOL`
+ (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+ (MOV II64 (OpReg valReg) (OpReg tmp)) `snocOL`
+ (PUNPCKLQDQ fmt (OpReg tmp) dst)
_ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
in return $ Any fmt code
vector_int_insert_sse len width _ _ offset
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -391,8 +391,6 @@ data Instr
| VSHUFPS Format Imm Operand Reg Reg
| SHUFPD Format Imm Operand Reg
| VSHUFPD Format Imm Operand Reg Reg
- -- SIMD NCG TODO: don't store the Format (or only what we need)
- -- in order to emit these instructions.
| MOVHLPS Format Operand Reg
| PUNPCKLQDQ Format Operand Reg
@@ -874,9 +872,11 @@ mkSpillInstr config reg fmt delta slot
= let off = spillSlotToOffset platform slot - delta
in case fmt of
VecFormat {}
- -- SIMD NCG TODO: panic for unsupported VecFormats (& same in mkLoadInstr)
+ | formatInBytes fmt > 16
+ -> [VMOVU fmt (OpReg reg) (OpAddr (spRel platform off))]
+ | otherwise
-> [MOVU fmt (OpReg reg) (OpAddr (spRel platform off))]
- -- NB: not using MOVA because we have no guarantees about the stack
+ -- NB: not using MOVA, because we have no guarantees about the stack
-- being sufficiently aligned, including even numbered stack slots.
_ -> [MOV fmt (OpReg reg) (OpAddr (spRel platform off))]
where platform = ncgPlatform config
@@ -894,9 +894,11 @@ mkLoadInstr config reg fmt delta slot
= let off = spillSlotToOffset platform slot - delta
in case fmt of
VecFormat {}
- -- SIMD NCG TODO: panic for unsupported VecFormats (& same in mkLoadInstr)
+ | formatInBytes fmt > 16
+ -> [VMOVU fmt (OpAddr (spRel platform off)) (OpReg reg)]
+ | otherwise
-> [MOVU fmt (OpAddr (spRel platform off)) (OpReg reg)]
- -- NB: not using MOVA because we have no guarantees about the stack
+ -- NB: not using MOVA, because we have no guarantees about the stack
-- being sufficiently aligned, including even numbered stack slots.
_ -> [MOV fmt (OpAddr (spRel platform off)) (OpReg reg)]
@@ -985,8 +987,9 @@ takeRegRegMoveInstr platform (MOV fmt (OpReg r1) (OpReg r2))
-- MOV zeroes the upper part of vector registers,
-- so it is not a real "move" in that case.
| not (isVecFormat fmt)
- -- Don't eliminate a move between e.g. RAX and XMM,
- -- even though we might be using XMM to store a scalar integer value.
+ -- Don't eliminate a move between e.g. RAX and XMM:
+ -- even though we might be using XMM to store a scalar integer value,
+ -- some instructions only support XMM registers.
, targetClassOfReg platform r1 == targetClassOfReg platform r2
= Just (r1,r2)
takeRegRegMoveInstr _ (MOVSD fmt (OpReg r1) (OpReg r2))
=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -295,7 +295,7 @@ pprReg platform f r
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
- RegVirtual (VirtualRegVec u) -> text "%vVec_" <> pprUniqueAlways u
+ RegVirtual (VirtualRegV128 u) -> text "%vVec_" <> pprUniqueAlways u
where
ppr32_reg_no :: Format -> Int -> doc
@@ -1051,13 +1051,15 @@ pprInstr platform i = case i of
char '\t' <> name <> pprBroadcastFormat format <> space
pprBroadcastFormat :: Format -> Line doc
- pprBroadcastFormat x
- = case x of
- VecFormat _ FmtFloat -> text "ss"
- VecFormat _ FmtDouble -> text "sd"
- -- SIMD NCG TODO: Add Ints and remove panic
- VecFormat {} -> panic "Incorrect width"
- _ -> panic "Scalar Format invading vector operation"
+ pprBroadcastFormat (VecFormat _ f)
+ = case f of
+ FmtFloat -> text "ss"
+ FmtDouble -> text "sd"
+ FmtInt8 -> text "b"
+ FmtInt16 -> text "w"
+ FmtInt32 -> text "d"
+ FmtInt64 -> text "q"
+ pprBroadcastFormat _ = panic "Scalar Format invading vector operation"
pprFormatImmOp :: Line doc -> Format -> Imm -> Operand -> doc
pprFormatImmOp name format imm op1
=====================================
compiler/GHC/CmmToAsm/X86/RegInfo.hs
=====================================
@@ -31,7 +31,7 @@ mkVirtualReg u format
FF64 -> VirtualRegD u
--TODO:
-- Add VirtualRegAVX and inspect VecFormat and allocate
- VecFormat {} -> VirtualRegVec u
+ VecFormat {} -> VirtualRegV128 u
_other -> VirtualRegI u
regDotColor :: Platform -> RealReg -> SDoc
=====================================
compiler/GHC/CmmToAsm/X86/Regs.hs
=====================================
@@ -81,7 +81,7 @@ virtualRegSqueeze cls vr
-> case vr of
VirtualRegD{} -> 1
VirtualRegF{} -> 0
- VirtualRegVec{} -> 1
+ VirtualRegV128{} -> 1
_other -> 0
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -1850,16 +1850,13 @@ genShuffleOp :: [Int] -> CmmExpr -> CmmExpr -> LlvmM ExprData
genShuffleOp is x y = runExprData $ do
vx <- exprToVarW x
vy <- exprToVarW y
- mask <- exprToVarW $ CmmLit $ CmmVec $ map ((`CmmInt` W32) . fromIntegral) is
let tx = getVarType vx
ty = getVarType vy
Panic.massertPpr
(tx == ty)
(vcat [ text "shuffle: mismatched arg types"
, ppLlvmType tx, ppLlvmType ty ])
- let fname = fsLit "shufflevector"
- fptr <- liftExprData $ getInstrinct fname ty [tx, ty]
- doExprW tx $ Call StdCall fptr [vx, vy, mask] [ReadNone, NoUnwind]
+ doExprW tx $ Shuffle vx vy is
-- | Generate code for a fused multiply-add operation.
genFmaOp :: CmmExpr -> CmmExpr -> CmmExpr -> LlvmM ExprData
=====================================
compiler/GHC/Llvm/Ppr.hs
=====================================
@@ -281,6 +281,7 @@ ppLlvmExpression opts expr
Extract vec idx -> ppExtract opts vec idx
ExtractV struct idx -> ppExtractV opts struct idx
Insert vec elt idx -> ppInsert opts vec elt idx
+ Shuffle v1 v2 idxs -> ppShuffle opts v1 v2 idxs
GetElemPtr inb ptr indexes -> ppGetElementPtr opts inb ptr indexes
Load ptr align -> ppLoad opts ptr align
ALoad ord st ptr -> ppALoad opts ord st ptr
@@ -577,6 +578,15 @@ ppInsert opts vec elt idx =
{-# SPECIALIZE ppInsert :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LlvmVar -> SDoc #-}
{-# SPECIALIZE ppInsert :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LlvmVar -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+ppShuffle :: IsLine doc => LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> doc
+ppShuffle opts v1 v2 idxs =
+ text "shufflevector"
+ <+> ppLlvmType (getVarType v1) <+> ppName opts v1 <> comma
+ <+> ppLlvmType (getVarType v2) <+> ppName opts v2 <> comma
+ <+> ppLlvmType (LMVector (length idxs) (LMInt 32)) <+> ppLit opts (LMVectorLit $ map ((`LMIntLit` (LMInt 32)) . fromIntegral) idxs)
+{-# SPECIALIZE ppShuffle :: LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> SDoc #-}
+{-# SPECIALIZE ppShuffle :: LlvmCgConfig -> LlvmVar -> LlvmVar -> [Int] -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
+
ppMetaAnnotExpr :: IsLine doc => LlvmCgConfig -> [MetaAnnot] -> LlvmExpression -> doc
ppMetaAnnotExpr opts meta expr =
ppLlvmExpression opts expr <> ppMetaAnnots opts meta
=====================================
compiler/GHC/Llvm/Syntax.hs
=====================================
@@ -237,6 +237,10 @@ data LlvmExpression
-}
| Insert LlvmVar LlvmVar LlvmVar
+ {- | Shuffle two vectors into a destination vector using given indices
+ -}
+ | Shuffle LlvmVar LlvmVar [Int]
+
{- |
Allocate amount * sizeof(tp) bytes on the heap
* tp: LlvmType to reserve room for
=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -1,3 +1,5 @@
+{-# LANGUAGE MagicHash #-}
+
-- | An architecture independent description of a register.
-- This needs to stay architecture independent because it is used
-- by NCGMonad and the register allocators, which are shared
@@ -27,6 +29,7 @@ module GHC.Platform.Reg (
where
import GHC.Prelude
+import GHC.Exts ( Int(I#), dataToTag# )
import GHC.Utils.Outputable
import GHC.Utils.Panic
@@ -53,66 +56,44 @@ type RegNo
-- Virtual regs can be of either class, so that info is attached.
--
data VirtualReg
- = VirtualRegI {-# UNPACK #-} !Unique
- | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
- | VirtualRegF {-# UNPACK #-} !Unique
- | VirtualRegD {-# UNPACK #-} !Unique
- | VirtualRegVec {-# UNPACK #-} !Unique
- deriving (Eq, Show)
-
--- This is laborious, but necessary. We can't derive Ord because
--- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
--- implementation. See Note [No Ord for Unique]
+ -- | Integer virtual register
+ = VirtualRegI { virtualRegUnique :: {-# UNPACK #-} !Unique }
+ -- | High part of 2-word virtual register
+ | VirtualRegHi { virtualRegUnique :: {-# UNPACK #-} !Unique }
+ -- | Float virtual register
+ | VirtualRegF { virtualRegUnique :: {-# UNPACK #-} !Unique }
+ -- | Double virtual register
+ | VirtualRegD { virtualRegUnique :: {-# UNPACK #-} !Unique }
+ -- | 128-bit wide vector virtual register
+ | VirtualRegV128 { virtualRegUnique :: {-# UNPACK #-} !Unique }
+ deriving (Eq, Show)
+
+-- We can't derive Ord, because Unique doesn't have an Ord instance.
+-- Note nonDetCmpUnique in the implementation. See Note [No Ord for Unique].
-- This is non-deterministic but we do not currently support deterministic
-- code-generation. See Note [Unique Determinism and code generation]
instance Ord VirtualReg where
- compare (VirtualRegI a) (VirtualRegI b) = nonDetCmpUnique a b
- compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
- compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
- compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
- compare (VirtualRegVec a) (VirtualRegVec b) = nonDetCmpUnique a b
-
- compare VirtualRegI{} _ = LT
- compare _ VirtualRegI{} = GT
- compare VirtualRegHi{} _ = LT
- compare _ VirtualRegHi{} = GT
- compare VirtualRegF{} _ = LT
- compare _ VirtualRegF{} = GT
- compare VirtualRegVec{} _ = LT
- compare _ VirtualRegVec{} = GT
-
+ compare vr1 vr2 =
+ case compare (I# (dataToTag# vr1)) (I# (dataToTag# vr2)) of
+ LT -> LT
+ GT -> GT
+ EQ -> nonDetCmpUnique (virtualRegUnique vr1) (virtualRegUnique vr2)
instance Uniquable VirtualReg where
- getUnique reg
- = case reg of
- VirtualRegI u -> u
- VirtualRegHi u -> u
- VirtualRegF u -> u
- VirtualRegD u -> u
- VirtualRegVec u -> u
+ getUnique = virtualRegUnique
instance Outputable VirtualReg where
ppr reg
= case reg of
- VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
- VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
- -- this code is kinda wrong on x86
- -- because float and double occupy the same register set
- -- namely SSE2 register xmm0 .. xmm15
- VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
- VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
- VirtualRegVec u -> text "%vVec_" <> pprUniqueAlways u
-
+ VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
+ VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
+ VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
+ VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
+ VirtualRegV128 u -> text "%vV128_" <> pprUniqueAlways u
renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
-renameVirtualReg u r
- = case r of
- VirtualRegI _ -> VirtualRegI u
- VirtualRegHi _ -> VirtualRegHi u
- VirtualRegF _ -> VirtualRegF u
- VirtualRegD _ -> VirtualRegD u
- VirtualRegVec _ -> VirtualRegVec u
+renameVirtualReg u r = r { virtualRegUnique = u }
classOfVirtualReg :: VirtualReg -> RegClass
classOfVirtualReg vr
@@ -121,7 +102,7 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloatOrVector
VirtualRegD{} -> RcFloatOrVector
- VirtualRegVec{} -> RcFloatOrVector
+ VirtualRegV128{} -> RcFloatOrVector
-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -2651,7 +2651,7 @@ doVecPackOp ty z es res = do
vecPack src [] _ =
emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
- -- SIMD NCG TODO (optional): it should be possible to emit better code
+ -- SIMD NCG TODO: it should be possible to emit better code
-- for "pack" than doing a bunch of vector insertions in a row.
vecPack src (e : es) i = do
dst <- newTemp ty
@@ -2663,8 +2663,7 @@ doVecPackOp ty z es res = do
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
- -- TODO: consider indexing by element rather than by byte
- iLit = CmmLit (CmmInt ((toInteger i) * 16) W32)
+ iLit = CmmLit (CmmInt (toInteger i) W32)
len :: Length
len = vecLength ty
=====================================
compiler/GHC/Types/Unique/FM.hs
=====================================
@@ -71,6 +71,7 @@ module GHC.Types.Unique.FM (
nonDetStrictFoldUFM_Directly,
anyUFM, allUFM, seqEltsUFM,
mapUFM, mapUFM_Directly, strictMapUFM,
+ mapKeysUFM,
mapMaybeUFM, mapMaybeWithKeyUFM,
elemUFM, elemUFM_Directly,
filterUFM, filterUFM_Directly, partitionUFM,
@@ -397,6 +398,10 @@ mapMaybeWithKeyUFM f (UFM m) = UFM (M.mapMaybeWithKey (f . mkUniqueGrimily) m)
mapUFM_Directly :: (Unique -> elt1 -> elt2) -> UniqFM key elt1 -> UniqFM key elt2
mapUFM_Directly f (UFM m) = UFM (M.mapWithKey (f . mkUniqueGrimily) m)
+-- | Map over the keys in a 'UniqFM'.
+mapKeysUFM :: Uniquable key' => (key -> key') -> UniqFM key (key, b) -> UniqFM key' (key', b)
+mapKeysUFM f m = listToUFM $ map ( \ (r, fmt) -> let r' = f r in (r', (r', fmt)) ) $ nonDetEltsUFM m
+
strictMapUFM :: (a -> b) -> UniqFM k a -> UniqFM k b
strictMapUFM f (UFM a) = UFM $ MS.map f a
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6669eee7d5d242044fc0b17ff72c42755c4f4778
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6669eee7d5d242044fc0b17ff72c42755c4f4778
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240614/349833d8/attachment-0001.html>
More information about the ghc-commits
mailing list