[Git][ghc/ghc][wip/supersven/riscv-vectors] 4 commits: simd001 green
Sven Tennie (@supersven)
gitlab at gitlab.haskell.org
Sun Nov 10 15:38:59 UTC 2024
Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
558ed8e4 by Sven Tennie at 2024-11-02T10:46:28+01:00
simd001 green
- - - - -
03f2767a by Sven Tennie at 2024-11-02T12:22:36+01:00
simd003 green
- - - - -
a9b9a340 by Sven Tennie at 2024-11-02T13:19:05+01:00
simd006 green
- - - - -
0682aed3 by Sven Tennie at 2024-11-10T16:37:14+01:00
simd007 green
- - - - -
5 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
- testsuite/tests/simd/should_run/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -364,15 +364,11 @@ stmtToInstrs stmt = do
genCCall target result_regs args
CmmComment s -> pure (unitOL (COMMENT (ftext s)))
CmmTick {} -> pure nilOL
- CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode format reg src
- | otherwise -> assignReg_IntCode format reg src
+ CmmAssign reg src -> assignReg format reg src
where
ty = cmmRegType reg
format = cmmTypeFormat ty
- CmmStore addr src _alignment
- | isFloatType ty -> assignMem_FltCode format addr src
- | otherwise -> assignMem_IntCode format addr src
+ CmmStore addr src _alignment -> assignMem format addr src
where
ty = cmmExprType platform src
format = cmmTypeFormat ty
@@ -662,9 +658,21 @@ getRegister' config plat expr =
let format = cmmTypeFormat rep
width = typeWidth rep
Amode addr addr_code <- getAmode plat width mem
- case width of
- w
- | (w <= W64) || isVecFormat format ->
+ case (width, format) of
+ (_w, f)
+ | VecFormat l vf <- f ->
+ pure
+ ( Any
+ format
+ ( \dst ->
+ unitOL (COMMENT (text "XXX here")) `appOL`
+ addr_code `snocOL`
+ annExpr expr
+ (LDRU format (OpReg width dst) (OpAddr addr))
+ )
+ )
+ (w, _f)
+ | w <= W64 ->
-- Load without sign-extension. See Note [Signed arithmetic on RISCV64]
pure
( Any
@@ -828,20 +836,29 @@ getRegister' config plat expr =
--TODO: MO_V_Broadcast with immediate: If the right value is a literal,
-- it may use vmv.v.i (simpler)
MO_V_Broadcast length w -> do
- (reg_idx, format_idx, code_idx) <- getSomeReg e
- let w_idx = formatToWidth format_idx
+ (reg_val, format_val, code_val) <- getSomeReg e
+ let w_val = formatToWidth format_val
pure $ Any (intFormat w) $ \dst ->
- code_idx `snocOL`
- annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
- VMV (OpReg w dst) (OpReg w_idx reg_idx)
+ code_val `snocOL`
+ annExpr expr
+ (VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
MO_VF_Broadcast length w -> do
- (reg_idx, format_idx, code_idx) <- getSomeReg e
- let w_idx = formatToWidth format_idx
+ (reg_val, format_val, code_val) <- getSomeReg e
+ let w_val = formatToWidth format_val
pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
- code_idx `snocOL`
- annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
- VMV (OpReg w dst) (OpReg w_idx reg_idx)
+ code_val `snocOL`
+ annExpr expr
+ (VMV (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
+
+ -- TODO: NO MO_V_Neg? Why?
+ MO_VF_Neg length w -> do
+ (reg_v, format_v, code_v) <- getSomeReg e
+ let w_v = formatToWidth format_v
+ pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+ code_v `snocOL`
+ annExpr expr
+ (VNEG (VecFormat length (floatScalarFormat w))(OpReg w dst) (OpReg w_v reg_v))
x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
where
@@ -1123,6 +1140,17 @@ getRegister' config plat expr =
`appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)
)
+ vecOp length w op = do
+ (reg_x, format_x, code_x) <- getSomeReg x
+ (reg_y, format_y, code_y) <- getSomeReg y
+ massertPpr (isVecFormat format_x && isVecFormat format_y) $
+ text "vecOp: non-vector operand. operands: " <+> ppr format_x <+> ppr format_y
+ pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+ code_x `appOL`
+ code_y `snocOL`
+ annExpr expr
+ (op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
+
case op of
-- Integer operations
-- Add/Sub should only be Integer Options.
@@ -1174,6 +1202,7 @@ getRegister' config plat expr =
MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
+ -- TODO: Use vecOp here
MO_VF_Extract length w -> do
(reg_v, format_v, code_v) <- getSomeReg x
(reg_idx, format_idx, code_idx) <- getSomeReg y
@@ -1184,15 +1213,27 @@ getRegister' config plat expr =
code_v `appOL`
code_idx `snocOL`
-- Setup
- -- vsetivli zero, 1, e32, m1, ta, ma
-- TODO: Use width
- annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+ annExpr expr
-- Move selected element to index 0
-- vslidedown.vi v8, v9, 2
- VSLIDEDOWN (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+ (VSLIDEDOWN (VecFormat length (floatScalarFormat w)) (OpReg width_v tmp) (OpReg width_v reg_v) (OpReg (formatToWidth format_idx) reg_idx)) `snocOL`
-- Move to float register
-- vmv.x.s a0, v8
- VMV (OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
+ VMV (VecFormat length (floatScalarFormat w))(OpReg w dst) (OpReg (formatToWidth tmpFormat) tmp)
+ MO_VF_Add length w -> vecOp length w (\d x y -> (VADD (VecFormat length (floatScalarFormat w)) d x y))
+ MO_VF_Sub length w -> vecOp length w (\d x y -> (VSUB (VecFormat length (floatScalarFormat w)) d x y))
+ MO_VF_Mul length w -> vecOp length w (\d x y -> (VMUL (VecFormat length (floatScalarFormat w)) d x y))
+ MO_VF_Quot length w -> vecOp length w (\d x y -> (VQUOT (VecFormat length (floatScalarFormat w)) d x y))
+
+ -- See https://godbolt.org/z/PvcWKMKoW
+ MO_VS_Min length w -> vecOp length w (\d x y -> (VSMIN (VecFormat length (intScalarFormat w)) d x y))
+ MO_VS_Max length w -> vecOp length w (\d x y -> (VSMAX (VecFormat length (intScalarFormat w)) d x y))
+ MO_VU_Min length w -> vecOp length w (\d x y -> (VUMIN (VecFormat length (intScalarFormat w)) d x y))
+ MO_VU_Max length w -> vecOp length w (\d x y -> (VUMAX (VecFormat length (intScalarFormat w)) d x y))
+ MO_VF_Min length w -> vecOp length w (\d x y -> (VFMIN (VecFormat length (floatScalarFormat w)) d x y))
+ MO_VF_Max length w -> vecOp length w (\d x y -> (VFMAX (VecFormat length (floatScalarFormat w)) d x y))
+
_e -> panic $ "Missing operation " ++ show expr
@@ -1264,16 +1305,16 @@ getRegister' config plat expr =
code_f `appOL`
code_idx `appOL`
code_l `snocOL`
- annExpr expr (VSETIVLI zeroReg (fromIntegral length) w M1 TA MA) `snocOL`
+ annExpr expr
-- Build mask for index
-- 1. fill elements with index numbers
-- TODO: The Width is made up
- VID (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l) `snocOL`
+ (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
-- 2. Splat value into tmp vector
- VMV (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+ VMV (VecFormat length (floatScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
-- 3. Merge with mask -> set element at index
- VMSEQ (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
- VMERGE (OpReg w dst) (OpReg (formatToWidth format_v) reg_v) (OpReg w tmp) (OpReg W8 v0Reg)
+ VMSEQ (VecFormat length (floatScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+ VMERGE (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v) (OpReg w tmp) (OpReg W8 v0Reg)
_ ->
pprPanic "getRegister' (unhandled ternary CmmMachOp): "
@@ -1532,18 +1573,18 @@ getAmode _platform _ (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
| fitsIn12bitImm off =
do
(reg, _format, code) <- getSomeReg expr
- return $ Amode (AddrRegImm reg (ImmInteger off)) code
+ return $ Amode (AddrRegImm reg (ImmInteger off)) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
getAmode _platform _ (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
| fitsIn12bitImm (-off) =
do
(reg, _format, code) <- getSomeReg expr
- return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+ return $ Amode (AddrRegImm reg (ImmInteger (-off))) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
-- Generic case
getAmode _platform _ expr =
do
(reg, _format, code) <- getSomeReg expr
- return $ Amode (AddrReg reg) code
+ return $ Amode (AddrReg reg) $ COMMENT (text "getAmode generic" <+> (text . show) expr) `consOL` code
-- -----------------------------------------------------------------------------
-- Generating assignments
@@ -1557,11 +1598,8 @@ getAmode _platform _ expr =
-- fails when the right hand side is forced into a fixed register
-- (e.g. the result of a call).
-assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignMem_IntCode rep addrE srcE =
+assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem rep addrE srcE =
do
(src_reg, _format, code) <- getSomeReg srcE
platform <- getPlatform
@@ -1573,7 +1611,8 @@ assignMem_IntCode rep addrE srcE =
`snocOL` STR rep (OpReg w src_reg) (OpAddr addr)
)
-assignReg_IntCode _ reg src =
+assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg _ reg src =
do
platform <- getPlatform
let dst = getRegisterReg platform reg
@@ -1588,12 +1627,6 @@ assignReg_IntCode _ reg src =
`snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
)
--- Let's treat Floating point stuff
--- as integer code for now. Opaque.
-assignMem_FltCode = assignMem_IntCode
-
-assignReg_FltCode = assignReg_IntCode
-
-- -----------------------------------------------------------------------------
-- Jumps
-- AArch64 has 26bits for targets, whereas RiscV only has 20.
@@ -2336,19 +2369,30 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
CSET {} -> 2
STR {} -> 1
LDR {} -> 3
- LDRU {} -> 1
+ LDRU {} -> 2
FENCE {} -> 1
FCVT {} -> 1
FABS {} -> 1
FMIN {} -> 1
FMAX {} -> 1
FMA {} -> 1
- VMV {} -> 1
- VID {} -> 1
- VMSEQ {} -> 1
- VMERGE {} -> 1
- VSLIDEDOWN {} -> 1
+ VMV {} -> 2
+ VID {} -> 2
+ VMSEQ {} -> 2
+ VMERGE {} -> 2
+ VSLIDEDOWN {} -> 2
VSETIVLI {} -> 1
+ VNEG {} -> 2
+ VADD {} -> 2
+ VSUB {} -> 2
+ VMUL {} -> 2
+ VQUOT {} -> 2
+ VSMIN {} -> 2
+ VSMAX {} -> 2
+ VUMIN {} -> 2
+ VUMAX {} -> 2
+ VFMIN {} -> 2
+ VFMAX {} -> 2
-- estimate the subsituted size for jumps to lables
-- jumps to registers have size 1
BCOND {} -> long_bc_jump_size
=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -109,12 +109,24 @@ regUsageOfInstr platform instr = case instr of
FABS dst src -> usage (regOp src, regOp dst)
FMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
FMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
- VMV dst src1 -> usage (regOp src1, regOp dst)
- VID dst src1 -> usage (regOp src1, regOp dst)
- VMSEQ dst src op -> usage (regOp src ++ regOp op, regOp dst)
- VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
- VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
+ VMV fmt dst src1 -> usage (regOp src1, regOp dst)
+ VID fmt dst src1 -> usage (regOp src1, regOp dst)
+ VMSEQ fmt dst src op -> usage (regOp src ++ regOp op, regOp dst)
+ VMERGE fmt dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
+ VSLIDEDOWN fmt dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
+ -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
VSETIVLI dst _ _ _ _ _ -> usage ([], [dst])
+ VNEG fmt dst src1 -> usage (regOp src1, regOp dst)
+ VADD fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VSUB fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VMUL fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VQUOT fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VSMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VSMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VUMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VUMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VFMIN fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ VFMAX fmt dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
FMA _ dst src1 src2 src3 ->
usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
_ -> panic $ "regUsageOfInstr: " ++ instrCon instr
@@ -216,12 +228,23 @@ patchRegsOfInstr instr env = case instr of
FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
FMIN o1 o2 o3 -> FMIN (patchOp o1) (patchOp o2) (patchOp o3)
FMAX o1 o2 o3 -> FMAX (patchOp o1) (patchOp o2) (patchOp o3)
- VMV o1 o2 -> VMV (patchOp o1) (patchOp o2)
- VID o1 o2 -> VID (patchOp o1) (patchOp o2)
- VMSEQ o1 o2 o3 -> VMSEQ (patchOp o1) (patchOp o2) (patchOp o3)
- VMERGE o1 o2 o3 o4 -> VMERGE (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
- VSLIDEDOWN o1 o2 o3 -> VSLIDEDOWN (patchOp o1) (patchOp o2) (patchOp o3)
+ VMV fmt o1 o2 -> VMV fmt (patchOp o1) (patchOp o2)
+ VID fmt o1 o2 -> VID fmt (patchOp o1) (patchOp o2)
+ VMSEQ fmt o1 o2 o3 -> VMSEQ fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VMERGE fmt o1 o2 o3 o4 -> VMERGE fmt (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
+ VSLIDEDOWN fmt o1 o2 o3 -> VSLIDEDOWN fmt (patchOp o1) (patchOp o2) (patchOp o3)
VSETIVLI o1 o2 o3 o4 o5 o6 -> VSETIVLI (env o1) o2 o3 o4 o5 o6
+ VNEG fmt o1 o2 -> VNEG fmt (patchOp o1) (patchOp o2)
+ VADD fmt o1 o2 o3 -> VADD fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VSUB fmt o1 o2 o3 -> VSUB fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VMUL fmt o1 o2 o3 -> VMUL fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VQUOT fmt o1 o2 o3 -> VQUOT fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VSMIN fmt o1 o2 o3 -> VSMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VSMAX fmt o1 o2 o3 -> VSMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VUMIN fmt o1 o2 o3 -> VUMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VUMAX fmt o1 o2 o3 -> VUMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VFMIN fmt o1 o2 o3 -> VFMIN fmt (patchOp o1) (patchOp o2) (patchOp o3)
+ VFMAX fmt o1 o2 o3 -> VFMAX fmt (patchOp o1) (patchOp o2) (patchOp o3)
FMA s o1 o2 o3 o4 ->
FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
_ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
@@ -319,21 +342,23 @@ mkSpillInstr ::
-- | spill slot to use
Int ->
[Instr]
-mkSpillInstr _config (RegWithFormat reg _fmt) delta slot =
+mkSpillInstr _config (RegWithFormat reg fmt) delta slot =
case off - delta of
- imm | fitsIn12bitImm imm -> [mkStrSpImm imm]
+ imm | fitsIn12bitImm imm && not (isVecFormat fmt) -> [mkStrSpImm imm]
imm ->
[ movImmToTmp imm,
addSpToTmp,
mkStrTmp
]
where
- fmt = case reg of
- RegReal (RealRegSingle n) | n < d0RegNo -> II64
- _ -> FF64
+ fmt'
+ | isVecFormat fmt
+ = fmt
+ | otherwise
+ = scalarMoveFormat fmt
mkStrSpImm imm =
ANN (text "Spill@" <> int (off - delta))
- $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+ $ STR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
movImmToTmp imm =
ANN (text "Spill: TMP <- " <> int imm)
$ MOV tmp (OpImm (ImmInt imm))
@@ -342,7 +367,7 @@ mkSpillInstr _config (RegWithFormat reg _fmt) delta slot =
$ ADD tmp tmp sp
mkStrTmp =
ANN (text "Spill@" <> int (off - delta))
- $ STR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+ $ STR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
off = spillSlotToOffset slot
@@ -356,21 +381,23 @@ mkLoadInstr ::
-- | spill slot to use
Int ->
[Instr]
-mkLoadInstr _config (RegWithFormat reg _fmt) delta slot =
+mkLoadInstr _config (RegWithFormat reg fmt) delta slot =
case off - delta of
- imm | fitsIn12bitImm imm -> [mkLdrSpImm imm]
+ imm | fitsIn12bitImm imm && not (isVecFormat fmt) -> [mkLdrSpImm imm]
imm ->
[ movImmToTmp imm,
addSpToTmp,
mkLdrTmp
]
where
- fmt = case reg of
- RegReal (RealRegSingle n) | n < d0RegNo -> II64
- _ -> FF64
+ fmt'
+ | isVecFormat fmt
+ = fmt
+ | otherwise
+ = scalarMoveFormat fmt
mkLdrSpImm imm =
ANN (text "Reload@" <> int (off - delta))
- $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
+ $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
movImmToTmp imm =
ANN (text "Reload: TMP <- " <> int imm)
$ MOV tmp (OpImm (ImmInt imm))
@@ -379,10 +406,16 @@ mkLoadInstr _config (RegWithFormat reg _fmt) delta slot =
$ ADD tmp tmp sp
mkLdrTmp =
ANN (text "Reload@" <> int (off - delta))
- $ LDR fmt (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
+ $ LDR fmt' (OpReg W64 reg) (OpAddr (AddrReg tmpReg))
off = spillSlotToOffset slot
+scalarMoveFormat :: Format -> Format
+scalarMoveFormat fmt
+ | isFloatFormat fmt = FF64
+ | otherwise = II64
+
+
-- | See if this instruction is telling us the current C stack delta
takeDeltaInstr :: Instr -> Maybe Int
takeDeltaInstr (ANN _ i) = takeDeltaInstr i
@@ -638,12 +671,23 @@ data Instr
FMA FMASign Operand Operand Operand Operand
-- TODO: Care about the variants (<instr>.x.y) -> sum type
- | VMV Operand Operand
- | VID Operand Operand
- | VMSEQ Operand Operand Operand
- | VMERGE Operand Operand Operand Operand
- | VSLIDEDOWN Operand Operand Operand
+ | VMV Format Operand Operand
+ | VID Format Operand Operand
+ | VMSEQ Format Operand Operand Operand
+ | VMERGE Format Operand Operand Operand Operand
+ | VSLIDEDOWN Format Operand Operand Operand
| VSETIVLI Reg Word Width VectorGrouping TailAgnosticFlag MaskAgnosticFlag
+ | VNEG Format Operand Operand
+ | VADD Format Operand Operand Operand
+ | VSUB Format Operand Operand Operand
+ | VMUL Format Operand Operand Operand
+ | VQUOT Format Operand Operand Operand
+ | VSMIN Format Operand Operand Operand
+ | VSMAX Format Operand Operand Operand
+ | VUMIN Format Operand Operand Operand
+ | VUMAX Format Operand Operand Operand
+ | VFMIN Format Operand Operand Operand
+ | VFMAX Format Operand Operand Operand
-- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
data FenceType = FenceRead | FenceWrite | FenceReadWrite
@@ -714,6 +758,17 @@ instrCon i =
VMERGE {} -> "VMERGE"
VSLIDEDOWN {} -> "VSLIDEDOWN"
VSETIVLI {} -> "VSETIVLI"
+ VNEG {} -> "VNEG"
+ VADD {} -> "VADD"
+ VSUB {} -> "VSUB"
+ VMUL {} -> "VMUL"
+ VQUOT {} -> "VQUOT"
+ VSMIN {} -> "VSMIN"
+ VSMAX {} -> "VSMAX"
+ VUMIN {} -> "VUMIN"
+ VUMAX {} -> "VUMAX"
+ VFMIN {} -> "VFMIN"
+ VFMAX {} -> "VFMAX"
FMA variant _ _ _ _ ->
case variant of
FMAdd -> "FMADD"
@@ -725,6 +780,7 @@ data Target
= TBlock BlockId
| TReg Reg
+-- TODO: OpReg should carry the format, not only the width. This would unify OpReg and OpVecReg.
data Operand
= -- | register
OpReg Width Reg
=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -59,7 +59,6 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
-- elimination, it might be the target of a goto.
( if platformHasSubsectionsViaSymbols platform
then -- See Note [Subsections Via Symbols]
-
line
$ text "\t.long "
<+> pprAsmLabel platform info_lbl
@@ -655,9 +654,12 @@ pprInstr platform instr = case instr of
STR II64 o1 o2 -> op2 (text "\tsd") o1 o2
STR FF32 o1 o2 -> op2 (text "\tfsw") o1 o2
STR FF64 o1 o2 -> op2 (text "\tfsd") o1 o2
- STR (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvse32.v") o1 o2
- STR (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvse64.v") o1 o2
- STR f o1 o2 -> pprPanic "Unsupported store" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
+ STR fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvse8.v") o1 o2
+ STR fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvse16.v") o1 o2
+ STR fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvse32.v") o1 o2
+ STR fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvse64.v") o1 o2
+ STR fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvse32.v") o1 o2
+ STR fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvse64.v") o1 o2
LDR _f o1 (OpImm (ImmIndex lbl off)) ->
lines_
[ text "\tla" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl,
@@ -671,6 +673,12 @@ pprInstr platform instr = case instr of
LDR II64 o1 o2 -> op2 (text "\tld") o1 o2
LDR FF32 o1 o2 -> op2 (text "\tflw") o1 o2
LDR FF64 o1 o2 -> op2 (text "\tfld") o1 o2
+ LDR fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvle8.v") o1 o2
+ LDR fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvle16.v") o1 o2
+ LDR fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+ LDR fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
+ LDR fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+ LDR fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
LDRU II8 o1 o2 -> op2 (text "\tlbu") o1 o2
LDRU II16 o1 o2 -> op2 (text "\tlhu") o1 o2
LDRU II32 o1 o2 -> op2 (text "\tlwu") o1 o2
@@ -681,8 +689,12 @@ pprInstr platform instr = case instr of
LDRU FF64 o1 o2@(OpAddr (AddrReg _)) -> op2 (text "\tfld") o1 o2
LDRU FF64 o1 o2@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tfld") o1 o2
-- vectors
- LDRU (VecFormat 2 FmtFloat) o1 o2@(OpAddr _) -> op2 (text "\tvle32.v") o1 o2
- LDRU (VecFormat 2 FmtDouble) o1 o2@(OpAddr _) -> op2 (text "\tvle64.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtInt8) o1 o2 -> configVec fmt $$ op2 (text "\tvle8.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtInt16) o1 o2 -> configVec fmt $$ op2 (text "\tvle16.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtInt32) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtInt64) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtFloat) o1 o2 -> configVec fmt $$ op2 (text "\tvle32.v") o1 o2
+ LDRU fmt@(VecFormat _ FmtDouble) o1 o2 -> configVec fmt $$ op2 (text "\tvle64.v") o1 o2
LDRU f o1 o2 -> pprPanic "Unsupported unsigned load" ((text . show) f <+> pprOp platform o1 <+> pprOp platform o2)
FENCE r w -> line $ text "\tfence" <+> pprFenceType r <> char ',' <+> pprFenceType w
FCVT FloatToFloat o1@(OpReg W32 _) o2@(OpReg W64 _) -> op2 (text "\tfcvt.s.d") o1 o2
@@ -706,10 +718,12 @@ pprInstr platform instr = case instr of
$ line (pprOp platform o1 <> text "->" <> pprOp platform o2)
FABS o1 o2 | isSingleOp o2 -> op2 (text "\tfabs.s") o1 o2
FABS o1 o2 | isDoubleOp o2 -> op2 (text "\tfabs.d") o1 o2
- FMIN o1 o2 o3 | isSingleOp o1 -> op3 (text "\tfmin.s") o1 o2 o3
- | isDoubleOp o2 -> op3 (text "\tfmin.d") o1 o2 o3
- FMAX o1 o2 o3 | isSingleOp o1 -> op3 (text "\tfmax.s") o1 o2 o3
- | isDoubleOp o2 -> op3 (text "\tfmax.d") o1 o2 o3
+ FMIN o1 o2 o3
+ | isSingleOp o1 -> op3 (text "\tfmin.s") o1 o2 o3
+ | isDoubleOp o2 -> op3 (text "\tfmin.d") o1 o2 o3
+ FMAX o1 o2 o3
+ | isSingleOp o1 -> op3 (text "\tfmax.s") o1 o2 o3
+ | isDoubleOp o2 -> op3 (text "\tfmax.d") o1 o2 o3
FMA variant d r1 r2 r3 ->
let fma = case variant of
FMAdd -> text "\tfmadd" <> dot <> floatPrecission d
@@ -717,21 +731,44 @@ pprInstr platform instr = case instr of
FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
in op4 fma d r1 r2 r3
-
- VMV o1@(OpReg w _) o2 | isFloatOp o1 && isVectorOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
- VMV o1@(OpReg _w _) o2 | isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
- VMV o1 o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
+ VMV fmt o1@(OpReg w _) o2 | isFloatOp o1 && isVectorOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
+ VMV fmt o1@(OpReg _w _) o2 | isFloatOp o2 -> configVec fmt $$ op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
+ VMV fmt o1 o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> opToVInstrSuffix o2) o1 o2
-- TODO: Remove o2 from constructor
- VID o1 _o2 -> op1 (text "\tvid.v") o1
+ VID fmt o1 _o2 -> configVec fmt $$ op1 (text "\tvid.v") o1
-- TODO: This expects int register as third operand: Generalize by calculating
-- the instruction suffix (".vx")
- VMSEQ o1 o2 o3 -> op3 (text "\tvmseq.vx") o1 o2 o3
+ VMSEQ fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3
-- TODO: All operands need to be vector registers. Make this more general or
-- validate this constraint.
- VMERGE o1 o2 o3 o4 -> op4 (text "\tvmerge.vvm") o1 o2 o3 o4
- VSLIDEDOWN o1 o2 o3 -> op3 (text "\tvslidedown.vx") o1 o2 o3
- VSETIVLI dst len width grouping ta ma -> line $
- text "\tvsetivli" <+> pprReg W64 dst <> comma <+> (text.show) len <> comma <+> pprVWidth width <> comma <+> pprGrouping grouping <> comma <+> pprTA ta <> comma <+> pprMasking ma
+ VMERGE fmt o1 o2 o3 o4 -> configVec fmt $$ op4 (text "\tvmerge.vvm") o1 o2 o3 o4
+ VSLIDEDOWN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvslidedown.vx") o1 o2 o3
+ -- TODO: Use configVec, adjust VSETIVLI to contain only format?
+ VSETIVLI dst len width grouping ta ma ->
+ line
+ $ text "\tvsetivli"
+ <+> pprReg W64 dst
+ <> comma
+ <+> (text . show) len
+ <> comma
+ <+> pprVWidth width
+ <> comma
+ <+> pprGrouping grouping
+ <> comma
+ <+> pprTA ta
+ <> comma
+ <+> pprMasking ma
+ VNEG fmt o1 o2 -> configVec fmt $$ op2 (text "\tvfneg.v") o1 o2
+ VADD fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfadd.vv") o1 o2 o3
+ VSUB fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfsub.vv") o1 o2 o3
+ VMUL fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmul.vv") o1 o2 o3
+ VQUOT fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfdiv.vv") o1 o2 o3
+ VSMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmin.vv") o1 o2 o3
+ VSMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmax.vv") o1 o2 o3
+ VUMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvminu.vv") o1 o2 o3
+ VUMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmaxu.vv") o1 o2 o3
+ VFMIN fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmin.vv") o1 o2 o3
+ VFMAX fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvfmax.vv") o1 o2 o3
instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
where
op1 op o1 = line $ op <+> pprOp platform o1
@@ -749,7 +786,7 @@ pprInstr platform instr = case instr of
pprTA TA = text "ta"
pprTA TU = text "tu"
- pprVWidth :: IsLine doc => Width -> doc
+ pprVWidth :: (IsLine doc) => Width -> doc
pprVWidth W8 = text "e8"
pprVWidth W16 = text "e16"
pprVWidth W32 = text "e32"
@@ -767,17 +804,22 @@ pprInstr platform instr = case instr of
pprMasking MA = text "ma"
pprMasking MU = text "mu"
- opToVInstrSuffix :: IsLine doc => Operand -> doc
+ opToVInstrSuffix :: (IsLine doc) => Operand -> doc
opToVInstrSuffix op | isIntOp op = text "x"
opToVInstrSuffix op | isFloatOp op = text "f"
opToVInstrSuffix op | isVectorOp op = text "v"
opToVInstrSuffix op = pprPanic "Unsupported operand for vector instruction" (pprOp platform op)
- floatWidthSuffix :: IsLine doc => Width -> doc
+ floatWidthSuffix :: (IsLine doc) => Width -> doc
floatWidthSuffix W32 = text "s"
floatWidthSuffix W64 = text "d"
floatWidthSuffix w = pprPanic "Unsupported floating point vector operation width" (ppr w)
+ configVec :: (IsDoc doc) => Format -> doc
+ configVec (VecFormat length fmt) =
+ pprInstr platform (VSETIVLI zeroReg (fromIntegral length) ((formatToWidth . scalarFormatFormat) fmt) M1 TA MA)
+ configVec fmt = pprPanic "Unsupported vector configuration" ((text . show) fmt)
+
floatOpPrecision :: Platform -> Operand -> Operand -> String
floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isSingleOp l && isSingleOp r = "s" -- single precision
floatOpPrecision _p l r | isFloatOp l && isFloatOp r && isDoubleOp l && isDoubleOp r = "d" -- double precision
=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -144,6 +144,10 @@ allGpArgRegs = map regSingle [a0RegNo .. a7RegNo]
allFpArgRegs :: [Reg]
allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
+-- | Literally all general vector registers (no status registers)
+allVecRegs :: [Reg]
+allVecRegs = map regSingle [v0RegNo .. v31RegNo]
+
-- * Addressing modes
-- | Addressing modes
=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -26,14 +26,14 @@ setTestOpts(
# TODO: Revert debug trace dumps
test('simd000', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
-test('simd001', [], compile_and_run, [''])
-test('simd002', [], compile_and_run, [''])
-test('simd003', [], compile_and_run, [''])
-test('simd004', [], compile_and_run, ['-O2'])
-test('simd005', [], compile_and_run, [''])
-test('simd006', [], compile_and_run, [''])
-test('simd007', [], compile_and_run, [''])
-test('simd008', [], compile_and_run, [''])
+test('simd001', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd002', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd003', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd004', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm -O2'])
+test('simd005', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd006', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd007', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
+test('simd008', [], compile_and_run, ['-opta=-march=rv64gv -dppr-debug -ddump-to-file -ddump-asm'])
test('simd009', [ req_th
, extra_files(['Simd009b.hs', 'Simd009c.hs'])
, unless(have_cpu_feature('avx'), skip)
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd...0682aed34baf0f7a4767d1d1571982851086f7dc
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e8d84f6a448d91eebe48c130b6c66a9ee0ffb9fd...0682aed34baf0f7a4767d1d1571982851086f7dc
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241110/fbe15279/attachment-0001.html>
More information about the ghc-commits
mailing list