[Git][ghc/ghc][wip/supersven/riscv-vectors] 6 commits: Add documentation
Sven Tennie (@supersven)
gitlab at gitlab.haskell.org
Sun Jan 5 17:29:55 UTC 2025
Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
33e480c7 by Sven Tennie at 2024-12-31T11:23:06+01:00
Add documentation
- - - - -
828b3660 by Sven Tennie at 2024-12-31T17:37:13+01:00
Cleanup CodeGen
- - - - -
a6edfc38 by Sven Tennie at 2024-12-31T18:03:39+01:00
Add TODOs
- - - - -
3a426a77 by Sven Tennie at 2025-01-05T18:05:59+01:00
Add comment about freeReg for [X,Y,Z]MM
- - - - -
97755f41 by Sven Tennie at 2025-01-05T18:07:32+01:00
Define MAX_REAL_YMM_REG and MAX_REAL_ZMM_REG
- - - - -
91169343 by Sven Tennie at 2025-01-05T18:28:30+01:00
VID needs only one register; fix MO_V_Broadcast; refactor MO_V_Insert
and MO_V_Insert
- - - - -
6 changed files:
- compiler/CodeGen.Platform.h
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- docs/users_guide/using.rst
- rts/include/stg/MachRegs.h
Changes:
=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -1253,6 +1253,8 @@ freeReg REG_D5 = False
freeReg REG_D6 = False
# endif
+-- N.B. XMM* YMM* and ZMM* refer to the same registers on RISCV64. Thus,
+-- defining freeReg for one vector register size is good enough.
# if defined(REG_XMM1)
freeReg REG_XMM1 = False
# endif
=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -660,14 +660,16 @@ getRegister' config plat expr =
Amode addr addr_code <- getAmode plat width mem
case (width, format) of
(_w, f)
- | VecFormat l vf <- f ->
+ | isVecFormat f ->
+ -- TODO: Check for configured vectorMinBits
pure
( Any
format
( \dst ->
- unitOL (COMMENT (text "XXX here")) `appOL`
addr_code `snocOL`
annExpr expr
+ -- We pattern match on the format in the pretty-printer.
+ -- So, we can here simply emit LDRU for all vectors.
(LDRU format (OpReg width dst) (OpAddr addr))
)
)
@@ -682,22 +684,7 @@ getRegister' config plat expr =
`snocOL` LDRU format (OpReg width dst) (OpAddr addr)
)
)
- -- TODO: Load vector - instructions VLW, VLB, VLH, ... Encode in ppr of LDRU?
- -- riscv64-unknown-linux-gnu-ghc: panic! (the 'impossible' happened)
- -- GHC version 9.13.20241013:
- -- Width too big! Cannot load: W128
- -- Fx2V128[Sp + 8]
- -- Call stack:
- -- CallStack (from HasCallStack):
- -- callStackDoc, called at compiler/GHC/Utils/Panic.hs:190:37 in ghc-9.13-inplace:GHC.Utils.Panic
- -- pprPanic, called at compiler/GHC/CmmToAsm/RV64/CodeGen.hs:678:11 in ghc-9.13-inplace:GHC.CmmToAsm.RV64.CodeGen
- -- CallStack (from HasCallStack):
- -- panic, called at compiler/GHC/Utils/Error.hs:507:29 in ghc-9.13-inplace:GHC.Utils.Error
-
- -- Fx2V128 -> cat= Float, length = 2, widthInBits = 128
-
- _ ->
- pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
+ _ -> pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
CmmStackSlot _ _ ->
pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
CmmReg reg ->
@@ -839,7 +826,7 @@ getRegister' config plat expr =
MO_V_Broadcast length w -> do
(reg_val, format_val, code_val) <- getSomeReg e
let w_val = formatToWidth format_val
- pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+ pure $ Any (vecFormat (cmmVec length (cmmBits w))) $ \dst ->
code_val `snocOL`
annExpr expr
(VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
@@ -1324,53 +1311,9 @@ getRegister' config plat expr =
-- ret
--
-- https://godbolt.org/z/sEG8MrM8P
- MO_VF_Insert length w ->
- do
- (reg_v, format_v, code_v) <- getSomeReg x
- (reg_f, format_f, code_f) <- getFloatReg y
- (reg_idx, format_idx, code_idx) <- getSomeReg z
- (reg_l, format_l, code_l) <- getSomeReg (CmmLit (CmmInt (toInteger length) W64))
- tmp <- getNewRegNat (VecFormat length (floatScalarFormat w))
- let targetFormat = VecFormat length (floatScalarFormat w)
- pure $ Any targetFormat $ \dst ->
- code_v `appOL`
- code_f `appOL`
- code_idx `appOL`
- code_l `snocOL`
- annExpr expr
- -- Build mask for index
- -- 1. fill elements with index numbers
- -- TODO: The Width is made up
- (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
- -- 2. Splat value into tmp vector
- VMV (VecFormat length (floatScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
- -- 3. Merge with mask -> set element at index
- VMSEQ (VecFormat length (floatScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
- VMERGE (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v) (OpReg w tmp) (OpReg W8 v0Reg)
- -- TODO: Duplication with MO_VF_Insert
- MO_V_Insert length w ->
- do
- (reg_v, format_v, code_v) <- getSomeReg x
- (reg_f, format_f, code_f) <- getSomeReg y
- (reg_idx, format_idx, code_idx) <- getSomeReg z
- (reg_l, format_l, code_l) <- getSomeReg (CmmLit (CmmInt (toInteger length) W64))
- tmp <- getNewRegNat (VecFormat length (intScalarFormat w))
- let targetFormat = VecFormat length (intScalarFormat w)
- pure $ Any targetFormat $ \dst ->
- code_v `appOL`
- code_f `appOL`
- code_idx `appOL`
- code_l `snocOL`
- annExpr expr
- -- Build mask for index
- -- 1. fill elements with index numbers
- -- TODO: The Width is made up
- (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
- -- 2. Splat value into tmp vector
- VMV (VecFormat length (intScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
- -- 3. Merge with mask -> set element at index
- VMSEQ (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
- VMERGE (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v) (OpReg w tmp) (OpReg W8 v0Reg)
+ MO_VF_Insert length width ->vecInsert floatScalarFormat length width
+
+ MO_V_Insert length width -> vecInsert intScalarFormat length width
_ ->
pprPanic "getRegister' (unhandled ternary CmmMachOp): "
@@ -1378,6 +1321,31 @@ getRegister' config plat expr =
<+> text "in"
<+> pdoc plat expr
where
+ vecInsert :: (Width -> ScalarFormat) -> Int -> Width -> NatM Register
+ vecInsert widthToScalarFormat length width =
+ do
+ let targetVecFormat = VecFormat length (widthToScalarFormat width)
+ (reg_v, format_v, code_v) <- getSomeReg x
+ (reg_f, format_f, code_f) <- getSomeReg y
+ (reg_idx, format_idx, code_idx) <- getSomeReg z
+ tmp <- getNewRegNat targetVecFormat
+ pure $ Any targetVecFormat $ \dst ->
+ code_v `appOL`
+ code_f `appOL`
+ code_idx `snocOL`
+ annExpr expr
+ -- 1. fill elements with index numbers
+ -- TODO: The Width is made up
+ -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
+ -- Can we use an explicitly fetched register as mask (depends on instructions)?
+ (VID targetVecFormat (OpReg W8 v0Reg)) `snocOL`
+ -- 2. Build mask
+ VMSEQ targetVecFormat(OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+ -- 3. Splat value into tmp vector
+ VMV targetVecFormat (OpReg width tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+ -- 4. Merge with mask -> set element at index
+ VMERGE targetVecFormat (OpReg width dst) (OpReg (formatToWidth format_v) reg_v) (OpReg width tmp) (OpReg W8 v0Reg)
+
float3Op w op = do
(reg_fx, format_x, code_fx) <- getFloatReg x
(reg_fy, format_y, code_fy) <- getFloatReg y
@@ -1474,6 +1442,18 @@ getRegister' config plat expr =
unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1)))
)
+-- TODO: Missing MachOps:
+-- - MO_V_Add
+-- - MO_V_Sub
+-- - MO_V_Mul
+-- - MO_VS_Quot
+-- - MO_VS_Rem
+-- - MO_VS_Neg
+-- - MO_VU_Quot
+-- - MO_VU_Rem
+-- - MO_V_Shuffle
+-- - MO_VF_Shuffle
+
-- | Instructions to sign-extend the value in the given register from width @w@
-- up to width @w'@.
signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)
=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -110,7 +110,7 @@ regUsageOfInstr platform instr = case instr of
FMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
FMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
VMV fmt dst src1 -> usage (regOp src1, regOp dst)
- VID fmt dst src1 -> usage (regOp src1, regOp dst)
+ VID fmt dst -> usage ([], regOp dst)
VMSEQ fmt dst src op -> usage (regOp src ++ regOp op, regOp dst)
VMERGE fmt dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
VSLIDEDOWN fmt dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
@@ -231,7 +231,7 @@ patchRegsOfInstr instr env = case instr of
FMIN o1 o2 o3 -> FMIN (patchOp o1) (patchOp o2) (patchOp o3)
FMAX o1 o2 o3 -> FMAX (patchOp o1) (patchOp o2) (patchOp o3)
VMV fmt o1 o2 -> VMV fmt (patchOp o1) (patchOp o2)
- VID fmt o1 o2 -> VID fmt (patchOp o1) (patchOp o2)
+ VID fmt o1 -> VID fmt (patchOp o1)
VMSEQ fmt o1 o2 o3 -> VMSEQ fmt (patchOp o1) (patchOp o2) (patchOp o3)
VMERGE fmt o1 o2 o3 o4 -> VMERGE fmt (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
VSLIDEDOWN fmt o1 o2 o3 -> VSLIDEDOWN fmt (patchOp o1) (patchOp o2) (patchOp o3)
@@ -676,7 +676,7 @@ data Instr
-- TODO: Care about the variants (<instr>.x.y) -> sum type
| VMV Format Operand Operand
- | VID Format Operand Operand
+ | VID Format Operand
| VMSEQ Format Operand Operand Operand
| VMERGE Format Operand Operand Operand Operand
| VSLIDEDOWN Format Operand Operand Operand
=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -750,7 +750,7 @@ pprInstr platform instr = case instr of
| isVectorRegOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
| True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
-- TODO: Remove o2 from constructor
- VID fmt o1 _o2 -> configVec fmt $$ op1 (text "\tvid.v") o1
+ VID fmt o1 -> configVec fmt $$ op1 (text "\tvid.v") o1
-- TODO: This expects int register as third operand: Generalize by calculating
-- the instruction suffix (".vx")
VMSEQ fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3
=====================================
docs/users_guide/using.rst
=====================================
@@ -1743,6 +1743,37 @@ Some flags only make sense for particular target platforms.
multiply-add, which might perform non-IEEE-compliant software emulation on
some platforms (depending on the implementation of the C standard library).
+.. ghc-flag:: -mvector-min-width-bits
+ :shortdesc: (RISC-V NCG only) Minimal width of vector registers in bits
+ :type: dynamic
+ :category: platform-options
+
+ :default: Not set, which implies that no vector registers are used.
+
+ :since: 9.12.2
+
+ Though the RISC-V ISA supports dynamic configuration of vector registers via
+ "grouping", the GHC register allocators need to know what they can manage.
+ In RISC-V terms the value is the smallest VLEN of the machines the program
+ should be executed on.
+
+ Possible values are ``128``, ``256`` and ``512`` (these values are due to
+ internal alignment with other architectures.)
+
+ The question of "How big are my vector registers?" frequently appears in
+ other architectures as well. So, it could be that this parameter will be
+ useful for other architectures in future GHC versions. Right now, it is only
+ used by the native code generator (NCG) of RISC-V 64.
+
+ GHC only supports the "'V' Standard Extension for Vector Operations, Version
+ 1.0" as defined in "The RISC-V Instruction Set - Manual Volume I". Vendor
+ specific extensions and older standards are not supported. (There are still
+ many machines with older standard implementations around. If in doubt, check
+ your machine specification.)
+
+ This flag also enables vector operation support in the RISC-V NCG. Some
+ programs with vector specific code cannot be compiled without it.
+
Haddock
-------
=====================================
rts/include/stg/MachRegs.h
=====================================
@@ -219,6 +219,42 @@
# endif
#endif
+#if !defined(MAX_REAL_YMM_REG)
+# if defined(REG_YMM6)
+# define MAX_REAL_YMM_REG 6
+# elif defined(REG_YMM5)
+# define MAX_REAL_YMM_REG 5
+# elif defined(REG_YMM4)
+# define MAX_REAL_YMM_REG 4
+# elif defined(REG_YMM3)
+# define MAX_REAL_YMM_REG 3
+# elif defined(REG_YMM2)
+# define MAX_REAL_YMM_REG 2
+# elif defined(REG_YMM1)
+# define MAX_REAL_YMM_REG 1
+# else
+# define MAX_REAL_YMM_REG 0
+# endif
+#endif
+
+#if !defined(MAX_REAL_ZMM_REG)
+# if defined(REG_ZMM6)
+# define MAX_REAL_ZMM_REG 6
+# elif defined(REG_ZMM5)
+# define MAX_REAL_ZMM_REG 5
+# elif defined(REG_ZMM4)
+# define MAX_REAL_ZMM_REG 4
+# elif defined(REG_ZMM3)
+# define MAX_REAL_ZMM_REG 3
+# elif defined(REG_ZMM2)
+# define MAX_REAL_ZMM_REG 2
+# elif defined(REG_ZMM1)
+# define MAX_REAL_ZMM_REG 1
+# else
+# define MAX_REAL_ZMM_REG 0
+# endif
+#endif
+
/* define NO_ARG_REGS if we have no argument registers at all (we can
* optimise certain code paths using this predicate).
*/
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/158c705ba6815789b47819ce3df93d5240758d94...91169343d138c1afd81ee9aaa04711b9659d731b
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/158c705ba6815789b47819ce3df93d5240758d94...91169343d138c1afd81ee9aaa04711b9659d731b
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250105/c45006f9/attachment-0001.html>
More information about the ghc-commits
mailing list