[Git][ghc/ghc][wip/supersven/riscv-vectors] 6 commits: Add documentation

Sven Tennie (@supersven) gitlab at gitlab.haskell.org
Sun Jan 5 17:29:55 UTC 2025



Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC


Commits:
33e480c7 by Sven Tennie at 2024-12-31T11:23:06+01:00
Add documentation

- - - - -
828b3660 by Sven Tennie at 2024-12-31T17:37:13+01:00
Cleanup CodeGen

- - - - -
a6edfc38 by Sven Tennie at 2024-12-31T18:03:39+01:00
Add TODOs

- - - - -
3a426a77 by Sven Tennie at 2025-01-05T18:05:59+01:00
Add comment about freeReg for [X,Y,Z]MM

- - - - -
97755f41 by Sven Tennie at 2025-01-05T18:07:32+01:00
Define MAX_REAL_YMM_REG and MAX_REAL_ZMM_REG

- - - - -
91169343 by Sven Tennie at 2025-01-05T18:28:30+01:00
VID needs only one register; fix MO_V_Broadcast; refactor MO_V_Insert
and MO_V_Insert

- - - - -


6 changed files:

- compiler/CodeGen.Platform.h
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- docs/users_guide/using.rst
- rts/include/stg/MachRegs.h


Changes:

=====================================
compiler/CodeGen.Platform.h
=====================================
@@ -1253,6 +1253,8 @@ freeReg REG_D5    = False
 freeReg REG_D6    = False
 # endif
 
+-- N.B. XMM* YMM* and ZMM* refer to the same registers on RISCV64. Thus,
+-- defining freeReg for one vector register size is good enough.
 # if defined(REG_XMM1)
 freeReg REG_XMM1    = False
 # endif


=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -660,14 +660,16 @@ getRegister' config plat expr =
       Amode addr addr_code <- getAmode plat width mem
       case (width, format) of
         (_w, f)
-          | VecFormat l vf <- f ->
+          | isVecFormat f ->
+              -- TODO: Check for configured vectorMinBits
               pure
                 ( Any
                     format
                     ( \dst ->
-                        unitOL (COMMENT (text "XXX here")) `appOL`
                         addr_code `snocOL`
                           annExpr expr
+                            -- We pattern match on the format in the pretty-printer.
+                            -- So, we can here simply emit LDRU for all vectors.
                             (LDRU format (OpReg width dst) (OpAddr addr))
                     )
                 )
@@ -682,22 +684,7 @@ getRegister' config plat expr =
                           `snocOL` LDRU format (OpReg width dst) (OpAddr addr)
                     )
                 )
-        -- TODO: Load vector - instructions VLW, VLB, VLH, ... Encode in ppr of LDRU?
-        -- riscv64-unknown-linux-gnu-ghc: panic! (the 'impossible' happened)
-        -- GHC version 9.13.20241013:
-        --       Width too big! Cannot load: W128
-        -- Fx2V128[Sp + 8]
-        -- Call stack:
-        --     CallStack (from HasCallStack):
-        --       callStackDoc, called at compiler/GHC/Utils/Panic.hs:190:37 in ghc-9.13-inplace:GHC.Utils.Panic
-        --       pprPanic, called at compiler/GHC/CmmToAsm/RV64/CodeGen.hs:678:11 in ghc-9.13-inplace:GHC.CmmToAsm.RV64.CodeGen
-        -- CallStack (from HasCallStack):
-        --   panic, called at compiler/GHC/Utils/Error.hs:507:29 in ghc-9.13-inplace:GHC.Utils.Error
-
-        -- Fx2V128 -> cat= Float, length = 2, widthInBits = 128
-
-        _ ->
-          pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
+        _ -> pprPanic ("Width too big! Cannot load: " ++ show width) (pdoc plat expr)
     CmmStackSlot _ _ ->
       pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr)
     CmmReg reg ->
@@ -839,7 +826,7 @@ getRegister' config plat expr =
         MO_V_Broadcast length w -> do
           (reg_val, format_val, code_val) <- getSomeReg e
           let w_val = formatToWidth format_val
-          pure $ Any (vecFormat (cmmVec length (cmmFloat w))) $ \dst ->
+          pure $ Any (vecFormat (cmmVec length (cmmBits w))) $ \dst ->
             code_val `snocOL`
             annExpr expr
               (VMV (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg w_val reg_val))
@@ -1324,53 +1311,9 @@ getRegister' config plat expr =
         --   ret
         --
         -- https://godbolt.org/z/sEG8MrM8P
-        MO_VF_Insert length w ->
-          do
-            (reg_v, format_v, code_v) <- getSomeReg x
-            (reg_f, format_f, code_f) <- getFloatReg y
-            (reg_idx, format_idx, code_idx) <- getSomeReg z
-            (reg_l, format_l, code_l) <- getSomeReg (CmmLit (CmmInt (toInteger length) W64))
-            tmp <- getNewRegNat (VecFormat length (floatScalarFormat w))
-            let targetFormat = VecFormat length (floatScalarFormat w)
-            pure $ Any targetFormat $ \dst ->
-              code_v `appOL`
-              code_f `appOL`
-              code_idx `appOL`
-              code_l `snocOL`
-              annExpr expr
-              -- Build mask for index
-              -- 1. fill elements with index numbers
-              -- TODO: The Width is made up
-               (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
-              -- 2. Splat value into tmp vector
-              VMV (VecFormat length (floatScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
-              -- 3. Merge with mask -> set element at index
-              VMSEQ (VecFormat length (floatScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
-              VMERGE (VecFormat length (floatScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg w tmp) (OpReg W8 v0Reg)
-        -- TODO: Duplication with MO_VF_Insert
-        MO_V_Insert length w ->
-          do
-            (reg_v, format_v, code_v) <- getSomeReg x
-            (reg_f, format_f, code_f) <- getSomeReg y
-            (reg_idx, format_idx, code_idx) <- getSomeReg z
-            (reg_l, format_l, code_l) <- getSomeReg (CmmLit (CmmInt (toInteger length) W64))
-            tmp <- getNewRegNat (VecFormat length (intScalarFormat w))
-            let targetFormat = VecFormat length (intScalarFormat w)
-            pure $ Any targetFormat $ \dst ->
-              code_v `appOL`
-              code_f `appOL`
-              code_idx `appOL`
-              code_l `snocOL`
-              annExpr expr
-              -- Build mask for index
-              -- 1. fill elements with index numbers
-              -- TODO: The Width is made up
-               (VID (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg (formatToWidth format_l) reg_l)) `snocOL`
-              -- 2. Splat value into tmp vector
-              VMV (VecFormat length (intScalarFormat w)) (OpReg w tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
-              -- 3. Merge with mask -> set element at index
-              VMSEQ (VecFormat length (intScalarFormat w)) (OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
-              VMERGE (VecFormat length (intScalarFormat w)) (OpReg w dst) (OpReg (formatToWidth format_v) reg_v)  (OpReg w tmp) (OpReg W8 v0Reg)
+        MO_VF_Insert length width ->vecInsert floatScalarFormat length width
+
+        MO_V_Insert length width -> vecInsert intScalarFormat length width
 
         _ ->
           pprPanic "getRegister' (unhandled ternary CmmMachOp): "
@@ -1378,6 +1321,31 @@ getRegister' config plat expr =
             <+> text "in"
             <+> pdoc plat expr
       where
+        vecInsert :: (Width -> ScalarFormat) -> Int -> Width -> NatM Register
+        vecInsert widthToScalarFormat length width =
+            do
+              let targetVecFormat = VecFormat length (widthToScalarFormat width)
+              (reg_v, format_v, code_v) <- getSomeReg x
+              (reg_f, format_f, code_f) <- getSomeReg y
+              (reg_idx, format_idx, code_idx) <- getSomeReg z
+              tmp <- getNewRegNat targetVecFormat
+              pure $ Any targetVecFormat $ \dst ->
+                code_v `appOL`
+                code_f `appOL`
+                code_idx `snocOL`
+                annExpr expr
+                -- 1. fill elements with index numbers
+                -- TODO: The Width is made up
+                -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
+                -- Can we use an explicitly fetched register as mask (depends on instructions)?
+                 (VID targetVecFormat (OpReg W8 v0Reg)) `snocOL`
+                -- 2. Build mask
+                VMSEQ  targetVecFormat(OpReg W8 v0Reg) (OpReg W8 v0Reg) (OpReg (formatToWidth format_idx) reg_idx) `snocOL`
+                -- 3. Splat value into tmp vector
+                VMV targetVecFormat (OpReg width tmp) (OpReg (formatToWidth format_f) reg_f) `snocOL`
+                -- 4. Merge with mask -> set element at index
+                VMERGE targetVecFormat (OpReg width dst) (OpReg (formatToWidth format_v) reg_v) (OpReg width tmp) (OpReg W8 v0Reg)
+
         float3Op w op = do
           (reg_fx, format_x, code_fx) <- getFloatReg x
           (reg_fy, format_y, code_fy) <- getFloatReg y
@@ -1474,6 +1442,18 @@ getRegister' config plat expr =
                       unitOL $ annExpr expr (ADD (OpReg w dst) zero (OpImm (ImmInt 1)))
                   )
 
+-- TODO: Missing MachOps:
+-- - MO_V_Add
+-- - MO_V_Sub
+-- - MO_V_Mul
+-- - MO_VS_Quot
+-- - MO_VS_Rem
+-- - MO_VS_Neg
+-- - MO_VU_Quot
+-- - MO_VU_Rem
+-- - MO_V_Shuffle
+-- - MO_VF_Shuffle
+
 -- | Instructions to sign-extend the value in the given register from width @w@
 -- up to width @w'@.
 signExtendReg :: Width -> Width -> Reg -> NatM (Reg, OrdList Instr)


=====================================
compiler/GHC/CmmToAsm/RV64/Instr.hs
=====================================
@@ -110,7 +110,7 @@ regUsageOfInstr platform instr = case instr of
   FMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   FMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
   VMV fmt dst src1 -> usage (regOp src1, regOp dst)
-  VID fmt dst src1 -> usage (regOp src1, regOp dst)
+  VID fmt dst -> usage ([], regOp dst)
   VMSEQ fmt dst src op -> usage (regOp src ++ regOp op, regOp dst)
   VMERGE fmt dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
   VSLIDEDOWN fmt dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
@@ -231,7 +231,7 @@ patchRegsOfInstr instr env = case instr of
   FMIN o1 o2 o3 -> FMIN (patchOp o1) (patchOp o2) (patchOp o3)
   FMAX o1 o2 o3 -> FMAX (patchOp o1) (patchOp o2) (patchOp o3)
   VMV fmt o1 o2 -> VMV fmt (patchOp o1) (patchOp o2)
-  VID fmt o1 o2 -> VID fmt (patchOp o1) (patchOp o2)
+  VID fmt o1 -> VID fmt (patchOp o1)
   VMSEQ fmt o1 o2 o3 -> VMSEQ fmt (patchOp o1) (patchOp o2) (patchOp o3)
   VMERGE fmt o1 o2 o3 o4 -> VMERGE fmt (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
   VSLIDEDOWN fmt o1 o2 o3 -> VSLIDEDOWN fmt (patchOp o1) (patchOp o2) (patchOp o3)
@@ -676,7 +676,7 @@ data Instr
 
   -- TODO: Care about the variants (<instr>.x.y) -> sum type
   | VMV Format Operand Operand
-  | VID Format Operand Operand
+  | VID Format Operand 
   | VMSEQ Format Operand Operand Operand
   | VMERGE Format Operand Operand Operand Operand
   | VSLIDEDOWN Format Operand Operand Operand


=====================================
compiler/GHC/CmmToAsm/RV64/Ppr.hs
=====================================
@@ -750,7 +750,7 @@ pprInstr platform instr = case instr of
                 | isVectorRegOp o1 && isVectorRegOp o2 -> configVec fmt $$ op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
                 | True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
   -- TODO: Remove o2 from constructor
-  VID fmt o1 _o2 -> configVec fmt $$ op1 (text "\tvid.v") o1
+  VID fmt o1 -> configVec fmt $$ op1 (text "\tvid.v") o1
   -- TODO: This expects int register as third operand: Generalize by calculating
   -- the instruction suffix (".vx")
   VMSEQ fmt o1 o2 o3 -> configVec fmt $$ op3 (text "\tvmseq.vx") o1 o2 o3


=====================================
docs/users_guide/using.rst
=====================================
@@ -1743,6 +1743,37 @@ Some flags only make sense for particular target platforms.
     multiply-add, which might perform non-IEEE-compliant software emulation on
     some platforms (depending on the implementation of the C standard library).
 
+.. ghc-flag:: -mvector-min-width-bits
+   :shortdesc: (RISC-V NCG only) Minimal width of vector registers in bits
+   :type: dynamic
+   :category: platform-options
+
+   :default: Not set, which implies that no vector registers are used.
+
+   :since: 9.12.2
+
+   Though the RISC-V ISA supports dynamic configuration of vector registers via
+   "grouping", the GHC register allocators need to know what they can manage.
+   In RISC-V terms the value is the smallest VLEN of the machines the program
+   should be executed on.
+
+   Possible values are ``128``, ``256`` and ``512`` (these values are due to
+   internal alignment with other architectures.)
+
+   The question of "How big are my vector registers?" frequently appears in
+   other architectures as well. So, it could be that this parameter will be
+   useful for other architectures in future GHC versions. Right now, it is only
+   used by the native code generator (NCG) of RISC-V 64. 
+
+   GHC only supports the "'V' Standard Extension for Vector Operations, Version
+   1.0" as defined in "The RISC-V Instruction Set - Manual Volume I". Vendor
+   specific extensions and older standards are not supported. (There are still
+   many machines with older standard implementations around. If in doubt, check
+   your machine specification.)
+
+   This flag also enables vector operation support in the RISC-V NCG. Some
+   programs with vector specific code cannot be compiled without it.
+
 Haddock
 -------
 


=====================================
rts/include/stg/MachRegs.h
=====================================
@@ -219,6 +219,42 @@
 #  endif
 #endif
 
+#if !defined(MAX_REAL_YMM_REG)
+#  if   defined(REG_YMM6)
+#  define MAX_REAL_YMM_REG 6
+#  elif defined(REG_YMM5)
+#  define MAX_REAL_YMM_REG 5
+#  elif defined(REG_YMM4)
+#  define MAX_REAL_YMM_REG 4
+#  elif defined(REG_YMM3)
+#  define MAX_REAL_YMM_REG 3
+#  elif defined(REG_YMM2)
+#  define MAX_REAL_YMM_REG 2
+#  elif defined(REG_YMM1)
+#  define MAX_REAL_YMM_REG 1
+#  else
+#  define MAX_REAL_YMM_REG 0
+#  endif
+#endif
+
+#if !defined(MAX_REAL_ZMM_REG)
+#  if   defined(REG_ZMM6)
+#  define MAX_REAL_ZMM_REG 6
+#  elif defined(REG_ZMM5)
+#  define MAX_REAL_ZMM_REG 5
+#  elif defined(REG_ZMM4)
+#  define MAX_REAL_ZMM_REG 4
+#  elif defined(REG_ZMM3)
+#  define MAX_REAL_ZMM_REG 3
+#  elif defined(REG_ZMM2)
+#  define MAX_REAL_ZMM_REG 2
+#  elif defined(REG_ZMM1)
+#  define MAX_REAL_ZMM_REG 1
+#  else
+#  define MAX_REAL_ZMM_REG 0
+#  endif
+#endif
+
 /* define NO_ARG_REGS if we have no argument registers at all (we can
  * optimise certain code paths using this predicate).
  */



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/158c705ba6815789b47819ce3df93d5240758d94...91169343d138c1afd81ee9aaa04711b9659d731b

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/158c705ba6815789b47819ce3df93d5240758d94...91169343d138c1afd81ee9aaa04711b9659d731b
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250105/c45006f9/attachment-0001.html>


More information about the ghc-commits mailing list